@@ -93,3 +93,34 @@ define internal void @caller_not_avx4() {
93
93
}
94
94
95
95
declare i64 @caller_unknown_simple (i64 )
96
+
97
+ ; FIXME: This call should get inlined, because the callee only contains
98
+ ; inline ASM, not real calls.
99
+ define <8 x i64 > @caller_inline_asm (ptr %p0 , i64 %k , ptr %p1 , ptr %p2 ) #0 {
100
+ ; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm
101
+ ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] {
102
+ ; CHECK-NEXT: [[CALL:%.*]] = call <8 x i64> @callee_inline_asm(ptr [[P0]], i64 [[K]], ptr [[P1]], ptr [[P2]])
103
+ ; CHECK-NEXT: ret <8 x i64> [[CALL]]
104
+ ;
105
+ %call = call <8 x i64 > @callee_inline_asm (ptr %p0 , i64 %k , ptr %p1 , ptr %p2 )
106
+ ret <8 x i64 > %call
107
+ }
108
+
109
+ define internal <8 x i64 > @callee_inline_asm (ptr %p0 , i64 %k , ptr %p1 , ptr %p2 ) #1 {
110
+ ; CHECK-LABEL: define {{[^@]+}}@callee_inline_asm
111
+ ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR3:[0-9]+]] {
112
+ ; CHECK-NEXT: [[SRC:%.*]] = load <8 x i64>, ptr [[P0]], align 64
113
+ ; CHECK-NEXT: [[A:%.*]] = load <8 x i64>, ptr [[P1]], align 64
114
+ ; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[P2]], align 64
115
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A]], <8 x i64> [[B]], <8 x i64> [[SRC]])
116
+ ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
117
+ ;
118
+ %src = load <8 x i64 >, ptr %p0 , align 64
119
+ %a = load <8 x i64 >, ptr %p1 , align 64
120
+ %b = load <8 x i64 >, ptr %p2 , align 64
121
+ %3 = tail call <8 x i64 > asm "vpaddb\09 $($3, $2, $0 {$1}" , "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}" (i64 %k , <8 x i64 > %a , <8 x i64 > %b , <8 x i64 > %src ) #2
122
+ ret <8 x i64 > %3
123
+ }
124
+
125
+ attributes #0 = { "min-legal-vector-width" ="512" "target-features" ="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu" ="generic" }
126
+ attributes #1 = { "min-legal-vector-width" ="512" "target-features" ="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu" ="generic" }
0 commit comments