Skip to content

Commit d60c474

Browse files
committed
[Clang] Propagate target-features if compatible when using mlink-builtin-bitcode
Buitlins from AMD's device-libs are compiled without specifying a target-cpu, which results in builtins without the target-features attribute set. Before this patch, when linking this builtins with -mlink-builtin-bitcode the target-features were not propagated in the incoming builtins. With this patch, the default target features are propagated if they are compatible with the target-features in the incoming builtin. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D159206
1 parent 3398744 commit d60c474

File tree

3 files changed

+87
-29
lines changed

3 files changed

+87
-29
lines changed

clang/lib/CodeGen/CGCall.cpp

+51
Original file line numberDiff line numberDiff line change
@@ -2001,6 +2001,54 @@ static void getTrivialDefaultFunctionAttributes(
20012001
}
20022002
}
20032003

2004+
static void
2005+
overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr,
2006+
const llvm::Function &F,
2007+
const TargetOptions &TargetOpts) {
2008+
auto FFeatures = F.getFnAttribute("target-features");
2009+
2010+
llvm::StringSet<> IncompatibleFeatureNames;
2011+
SmallVector<StringRef> MergedFeatures;
2012+
MergedFeatures.reserve(TargetOpts.Features.size());
2013+
2014+
if (FFeatures.isValid()) {
2015+
const auto &TFeatures = TargetOpts.FeatureMap;
2016+
for (StringRef Feature : llvm::split(FFeatures.getValueAsString(), ',')) {
2017+
if (Feature.empty())
2018+
continue;
2019+
2020+
bool EnabledForFunc = Feature.starts_with("+");
2021+
assert(EnabledForFunc || Feature.starts_with("-"));
2022+
2023+
StringRef Name = Feature.drop_front(1);
2024+
auto TEntry = TFeatures.find(Name);
2025+
2026+
// Preserves features that are incompatible (either set to something
2027+
// different or missing) from the target features
2028+
bool MissingFromTarget = TEntry == TFeatures.end();
2029+
bool EnabledForTarget = !MissingFromTarget && TEntry->second;
2030+
bool Incompatible = EnabledForTarget != EnabledForFunc;
2031+
if (MissingFromTarget || Incompatible) {
2032+
MergedFeatures.push_back(Feature);
2033+
if (Incompatible)
2034+
IncompatibleFeatureNames.insert(Name);
2035+
}
2036+
}
2037+
}
2038+
2039+
for (StringRef Feature : TargetOpts.Features) {
2040+
if (Feature.empty())
2041+
continue;
2042+
StringRef Name = Feature.drop_front(1);
2043+
if (IncompatibleFeatureNames.contains(Name))
2044+
continue;
2045+
MergedFeatures.push_back(Feature);
2046+
}
2047+
2048+
if (!MergedFeatures.empty())
2049+
FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ","));
2050+
}
2051+
20042052
void CodeGen::mergeDefaultFunctionDefinitionAttributes(
20052053
llvm::Function &F, const CodeGenOptions &CodeGenOpts,
20062054
const LangOptions &LangOpts, const TargetOptions &TargetOpts,
@@ -2058,6 +2106,9 @@ void CodeGen::mergeDefaultFunctionDefinitionAttributes(
20582106

20592107
F.removeFnAttrs(AttrsToRemove);
20602108
addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);
2109+
2110+
overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts);
2111+
20612112
F.addFnAttrs(FuncAttrs);
20622113
}
20632114

+35-28
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,49 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
1+
// Build two version of the bitcode library, one with a target-cpu set and one without
22
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
3+
// RUN: %clang_cc1 -triple amdgcn-- -DBITCODE -emit-llvm-bc -o %t-lib.no-cpu.bc %s
4+
35
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
46
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
57
// RUN: -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
68

9+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
10+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
11+
// RUN: -mlink-builtin-bitcode %t-lib.no-cpu.bc -o - %t.bc | FileCheck %s
12+
713
#ifdef BITCODE
8-
int foo(void) { return 42; }
14+
int no_attr(void) { return 42; }
15+
int __attribute__((target("gfx8-insts"))) attr_in_target(void) { return 42; }
16+
int __attribute__((target("extended-image-insts"))) attr_not_in_target(void) { return 42; }
17+
int __attribute__((target("no-gfx9-insts"))) attr_incompatible(void) { return 42; }
918
int x = 12;
1019
#endif
1120

12-
extern int foo(void);
21+
extern int no_attr(void);
22+
extern int attr_in_target(void);
23+
extern int attr_not_in_target(void);
24+
extern int attr_incompatible(void);
1325
extern int x;
1426

15-
int bar() { return foo() + x; }
16-
//.
27+
int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_incompatible() + x; }
28+
1729
// CHECK: @x = internal addrspace(1) global i32 12, align 4
18-
//.
19-
// CHECK: Function Attrs: noinline nounwind optnone
30+
2031
// CHECK-LABEL: define dso_local i32 @bar
21-
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
22-
// CHECK-NEXT: entry:
23-
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
24-
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
25-
// CHECK-NEXT: [[CALL:%.*]] = call i32 @foo()
26-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
27-
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
28-
// CHECK-NEXT: ret i32 [[ADD]]
32+
// CHECK-SAME: () #[[ATTR_BAR:[0-9]+]] {
2933
//
30-
//
31-
// CHECK: Function Attrs: convergent noinline nounwind optnone
32-
// CHECK-LABEL: define internal i32 @foo
33-
// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
34-
// CHECK-NEXT: entry:
35-
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
36-
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
37-
// CHECK-NEXT: ret i32 42
38-
//
39-
//.
40-
// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
41-
// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
42-
//.
34+
// CHECK-LABEL: define internal i32 @no_attr
35+
// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
36+
37+
// CHECK-LABEL: define internal i32 @attr_in_target
38+
// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
39+
40+
// CHECK-LABEL: define internal i32 @attr_not_in_target
41+
// CHECK-SAME: () #[[ATTR_EXTEND:[0-9]+]] {
42+
43+
// CHECK-LABEL: @attr_incompatible
44+
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
45+
46+
// CHECK: attributes #[[ATTR_BAR]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
47+
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
48+
// CHECK: attributes #[[ATTR_EXTEND]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+extended-image-insts,+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
49+
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="-gfx9-insts,+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }

clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232

3333
// CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
34-
// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="+gfx11-insts"
34+
// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="{{.*}}+gfx11-insts{{.*}}"
3535
// NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"
3636

3737
#define __device__ __attribute__((device))

0 commit comments

Comments
 (0)