Skip to content

Commit 2bd3ce7

Browse files
committed
Make sure all the bitcasts are handled
1 parent e712f1e commit 2bd3ce7

File tree

2 files changed

+28
-12
lines changed

2 files changed

+28
-12
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

+11-9
Original file line numberDiff line numberDiff line change
@@ -2943,15 +2943,13 @@ def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
29432943
def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
29442944
def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
29452945

2946-
foreach vt = [v2f16, v2bf16] in {
2946+
foreach vt = [v2f16, v2bf16, v2i16] in {
29472947
def: Pat<(vt (bitconvert (i32 UInt32Const:$a))),
29482948
(IMOVB32ri UInt32Const:$a)>;
2949-
def: Pat<(vt (bitconvert (i32 Int32Regs:$a))),
2950-
(ProxyRegI32 Int32Regs:$a)>;
2951-
def: Pat<(i32 (bitconvert (vt Int32Regs:$a))),
2952-
(ProxyRegI32 Int32Regs:$a)>;
29532949
def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
29542950
(BITCONVERT_32_F2I Float32Regs:$a)>;
2951+
def: Pat<(f32 (bitconvert (vt Int32Regs:$a))),
2952+
(BITCONVERT_32_I2F Int32Regs:$a)>;
29552953
}
29562954
foreach vt = [f16, bf16] in {
29572955
def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
@@ -2962,10 +2960,14 @@ def: Pat<(i16 (bitconvert (vt Int16Regs:$a))),
29622960
(ProxyRegI16 Int16Regs:$a)>;
29632961
}
29642962

2965-
def: Pat<(v2i16 (bitconvert (i32 Int32Regs:$a))),
2966-
(ProxyRegI32 Int32Regs:$a)>;
2967-
def: Pat<(i32 (bitconvert (v2i16 Int32Regs:$a))),
2968-
(ProxyRegI32 Int32Regs:$a)>;
2963+
foreach ta = [v2f16, v2bf16, v2i16, i32] in {
2964+
foreach tb = [v2f16, v2bf16, v2i16, i32] in {
2965+
if !ne(ta, tb) then {
2966+
def: Pat<(ta (bitconvert (tb Int32Regs:$a))),
2967+
(ProxyRegI32 Int32Regs:$a)>;
2968+
}
2969+
}
2970+
}
29692971

29702972
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
29712973
// we cannot specify floating-point literals in isel patterns. Therefore, we

llvm/test/CodeGen/NVPTX/i16x2-instructions.ll

+17-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
2121

2222
; COMMON-LABEL: test_ret_const(
23-
; COMMON: mov.u32 [[R:%r[0-9+]]], 131073;
23+
; COMMON: mov.b32 [[R:%r[0-9+]]], 131073;
2424
; COMMON: st.param.b32 [func_retval0+0], [[R]];
2525
; COMMON-NEXT: ret;
2626
define <2 x i16> @test_ret_const() #0 {
@@ -86,7 +86,7 @@ define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 {
8686
; COMMON-LABEL: test_add_imm_0(
8787
; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_add_imm_0_param_0];
8888
;
89-
; I16x2: mov.u32 [[I:%r[0-9+]]], 131073;
89+
; I16x2: mov.b32 [[I:%r[0-9+]]], 131073;
9090
; I16x2: add.s16x2 [[R:%r[0-9]+]], [[A]], [[I]];
9191
;
9292
; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
@@ -104,7 +104,7 @@ define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 {
104104
; COMMON-LABEL: test_add_imm_1(
105105
; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_add_imm_1_param_0];
106106
;
107-
; I16x2: mov.u32 [[I:%r[0-9+]]], 131073;
107+
; I16x2: mov.b32 [[I:%r[0-9+]]], 131073;
108108
; I16x2: add.s16x2 [[R:%r[0-9]+]], [[A]], [[I]];
109109
;
110110
; NO-I16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
@@ -497,6 +497,20 @@ define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 {
497497
ret i32 %r
498498
}
499499

500+
; COMMON-LABEL: test_bitcast_2xi16_to_2xhalf(
501+
; COMMON: ld.param.u16 [[RS1:%rs[0-9]+]], [test_bitcast_2xi16_to_2xhalf_param_0];
502+
; COMMON: mov.u16 [[RS2:%rs[0-9]+]], 5;
503+
; COMMON: mov.b32 [[R:%r[0-9]+]], {[[RS1]], [[RS2]]};
504+
; COMMON: st.param.b32 [func_retval0+0], [[R]];
505+
; COMMON: ret;
506+
define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 {
507+
%ins.0 = insertelement <2 x i16> undef, i16 %a, i32 0
508+
%ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
509+
%r = bitcast <2 x i16> %ins.1 to <2 x half>
510+
ret <2 x half> %r
511+
}
512+
513+
500514
; COMMON-LABEL: test_shufflevector(
501515
; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_shufflevector_param_0];
502516
; COMMON: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[R]];

0 commit comments

Comments
 (0)