Skip to content

Commit f3261a5

Browse files
davemgreentru
authored andcommitted
[AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105)
The code-generator is currently not able to handle scalable vectors of <vscale x 1 x eltty>. The usual "fix" for this until it is supported is to mark the costs of loads/stores with an invalid cost, preventing the vectorizer from vectorizing at those factors. But on rare occasions loops do not contain load/stores, only reductions. So whilst this is still unsupported return an invalid cost to avoid selecting vscale x 1 VFs. The cost of a reduction is not currently used by the vectorizer so this adds the cost to the add/mul/and/or/xor or min/max that should feed the reduction. It includes reduction costs too, for completeness. This change will be removed when code-generation for these types is sufficiently reliable. Fixes #99760 (cherry picked from commit 0b745a1)
1 parent d893708 commit f3261a5

File tree

6 files changed

+107
-0
lines changed

6 files changed

+107
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+32
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
541541
InstructionCost
542542
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
543543
TTI::TargetCostKind CostKind) {
544+
// The code-generator is currently not able to handle scalable vectors
545+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
546+
// it. This change will be removed when code-generation for these types is
547+
// sufficiently reliable.
544548
auto *RetTy = ICA.getReturnType();
549+
if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy))
550+
if (VTy->getElementCount() == ElementCount::getScalable(1))
551+
return InstructionCost::getInvalid();
552+
545553
switch (ICA.getID()) {
546554
case Intrinsic::experimental_vector_histogram_add:
547555
if (!ST->hasSVE2())
@@ -3024,6 +3032,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
30243032
ArrayRef<const Value *> Args,
30253033
const Instruction *CxtI) {
30263034

3035+
// The code-generator is currently not able to handle scalable vectors
3036+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
3037+
// it. This change will be removed when code-generation for these types is
3038+
// sufficiently reliable.
3039+
if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3040+
if (VTy->getElementCount() == ElementCount::getScalable(1))
3041+
return InstructionCost::getInvalid();
3042+
30273043
// TODO: Handle more cost kinds.
30283044
if (CostKind != TTI::TCK_RecipThroughput)
30293045
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -3798,6 +3814,14 @@ InstructionCost
37983814
AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
37993815
FastMathFlags FMF,
38003816
TTI::TargetCostKind CostKind) {
3817+
// The code-generator is currently not able to handle scalable vectors
3818+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
3819+
// it. This change will be removed when code-generation for these types is
3820+
// sufficiently reliable.
3821+
if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3822+
if (VTy->getElementCount() == ElementCount::getScalable(1))
3823+
return InstructionCost::getInvalid();
3824+
38013825
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
38023826

38033827
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
@@ -3842,6 +3866,14 @@ InstructionCost
38423866
AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
38433867
std::optional<FastMathFlags> FMF,
38443868
TTI::TargetCostKind CostKind) {
3869+
// The code-generator is currently not able to handle scalable vectors
3870+
// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
3871+
// it. This change will be removed when code-generation for these types is
3872+
// sufficiently reliable.
3873+
if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
3874+
if (VTy->getElementCount() == ElementCount::getScalable(1))
3875+
return InstructionCost::getInvalid();
3876+
38453877
if (TTI::requiresOrderedReduction(FMF)) {
38463878
if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
38473879
InstructionCost BaseCost =

llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ define void @fadd() {
88
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef
11+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fadd <vscale x 1 x float> undef, undef
1112
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef
1213
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef
1314
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef
@@ -19,6 +20,7 @@ define void @fadd() {
1920
%V8F16 = fadd <vscale x 8 x half> undef, undef
2021
%V16F16 = fadd <vscale x 16 x half> undef, undef
2122

23+
%V1F32 = fadd <vscale x 1 x float> undef, undef
2224
%V2F32 = fadd <vscale x 2 x float> undef, undef
2325
%V4F32 = fadd <vscale x 4 x float> undef, undef
2426
%V8F32 = fadd <vscale x 8 x float> undef, undef
@@ -34,6 +36,7 @@ define void @fsub() {
3436
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef
3537
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef
3638
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef
39+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fsub <vscale x 1 x float> undef, undef
3740
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef
3841
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef
3942
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef
@@ -45,6 +48,7 @@ define void @fsub() {
4548
%V8F16 = fsub <vscale x 8 x half> undef, undef
4649
%V16F16 = fsub <vscale x 16 x half> undef, undef
4750

51+
%V1F32 = fsub <vscale x 1 x float> undef, undef
4852
%V2F32 = fsub <vscale x 2 x float> undef, undef
4953
%V4F32 = fsub <vscale x 4 x float> undef, undef
5054
%V8F32 = fsub <vscale x 8 x float> undef, undef

llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
define void @foo_no_vscale_range() {
55
; CHECK-LABEL: 'foo_no_vscale_range'
6+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true)
67
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
78
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
89
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
@@ -45,6 +46,7 @@ define void @foo_no_vscale_range() {
4546
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false)
4647
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
4748
;
49+
%res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true)
4850
%res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
4951
%res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
5052
%res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)

llvm/test/Analysis/CostModel/AArch64/sve-arith.ll

+21
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,34 @@ define void @scalable_mul() #0 {
4343
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
4444
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
4545
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
46+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef
4647
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
4748
;
4849
entry:
4950
%mul_nxv16i8 = mul <vscale x 16 x i8> undef, undef
5051
%mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
5152
%mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
5253
%mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
54+
%mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef
55+
56+
ret void
57+
}
58+
59+
define void @scalable_add() #0 {
60+
; CHECK-LABEL: 'scalable_add'
61+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv16i8 = add <vscale x 16 x i8> undef, undef
62+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv8i16 = add <vscale x 8 x i16> undef, undef
63+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv4i32 = add <vscale x 4 x i32> undef, undef
64+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv2i64 = add <vscale x 2 x i64> undef, undef
65+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i64 = add <vscale x 1 x i64> undef, undef
66+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
67+
;
68+
entry:
69+
%add_nxv16i8 = add <vscale x 16 x i8> undef, undef
70+
%add_nxv8i16 = add <vscale x 8 x i16> undef, undef
71+
%add_nxv4i32 = add <vscale x 4 x i32> undef, undef
72+
%add_nxv2i64 = add <vscale x 2 x i64> undef, undef
73+
%add_nxv1i64 = add <vscale x 1 x i64> undef, undef
5374

5475
ret void
5576
}

0 commit comments

Comments
 (0)