Skip to content

Commit b9de11a

Browse files
paolotetialexcrichton
authored andcommitted
Add few ARM DSP Intrinsics (#529)
* Add few ARM DSP Intrinsics - Signed saturating add/sub - Saturating four 8-bit integer add/sub - Saturating two 8-bit integer add/sub The intent is mainly to setup the module and to add all the rest in the future. Listed intrinsics are available on Cortex-M too (+dsp is required on some model except for M4). * Arm DSP: rebase and remove portable vector types Rebase everything on top of master since the portable vector types have been removed.
1 parent 5a20376 commit b9de11a

File tree

3 files changed

+184
-0
lines changed

3 files changed

+184
-0
lines changed

coresimd/arm/dsp.rs

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
//! ARM DSP Intrinsics.
2+
3+
#[cfg(test)]
4+
use stdsimd_test::assert_instr;
5+
6+
types! {
7+
/// ARM-specific 32-bit wide vector of four packed `i8`.
8+
pub struct int8x4_t(i8, i8, i8, i8);
9+
/// ARM-specific 32-bit wide vector of four packed `u8`.
10+
pub struct uint8x4_t(u8, u8, u8, u8);
11+
/// ARM-specific 32-bit wide vector of two packed `i16`.
12+
pub struct int16x2_t(i16, i16);
13+
/// ARM-specific 32-bit wide vector of two packed `u16`.
14+
pub struct uint16x2_t(u16, u16);
15+
}
16+
17+
extern "C" {
18+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.qadd")]
19+
fn arm_qadd(a: i32, b: i32) -> i32;
20+
21+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.qsub")]
22+
fn arm_qsub(a: i32, b: i32) -> i32;
23+
24+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.qadd8")]
25+
fn arm_qadd8(a: i32, b: i32) -> i32;
26+
27+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.qsub8")]
28+
fn arm_qsub8(a: i32, b: i32) -> i32;
29+
30+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.qadd16")]
31+
fn arm_qadd16(a: i32, b: i32) -> i32;
32+
33+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.qsub16")]
34+
fn arm_qsub16(a: i32, b: i32) -> i32;
35+
}
36+
37+
/// Signed saturating addition
38+
///
39+
/// Returns the 32-bit saturating signed equivalent of a + b.
40+
#[inline]
41+
#[cfg_attr(test, assert_instr(qadd))]
42+
pub unsafe fn qadd(a: i32, b: i32) -> i32 {
43+
arm_qadd(a, b)
44+
}
45+
46+
/// Signed saturating subtraction
47+
///
48+
/// Returns the 32-bit saturating signed equivalent of a - b.
49+
#[inline]
50+
#[cfg_attr(test, assert_instr(qsub))]
51+
pub unsafe fn qsub(a: i32, b: i32) -> i32 {
52+
arm_qsub(a, b)
53+
}
54+
55+
/// Saturating four 8-bit integer additions
56+
///
57+
/// Returns the 8-bit signed equivalent of
58+
///
59+
/// res[0] = a[0] + b[0]
60+
/// res[1] = a[1] + b[1]
61+
/// res[2] = a[2] + b[2]
62+
/// res[3] = a[3] + b[3]
63+
#[inline]
64+
#[cfg_attr(test, assert_instr(qadd8))]
65+
pub unsafe fn qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
66+
::mem::transmute(arm_qadd8(::mem::transmute(a), ::mem::transmute(b)))
67+
}
68+
69+
/// Saturating two 8-bit integer subtraction
70+
///
71+
/// Returns the 8-bit signed equivalent of
72+
///
73+
/// res[0] = a[0] - b[0]
74+
/// res[1] = a[1] - b[1]
75+
/// res[2] = a[2] - b[2]
76+
/// res[3] = a[3] - b[3]
77+
#[inline]
78+
#[cfg_attr(test, assert_instr(qsub8))]
79+
pub unsafe fn qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
80+
::mem::transmute(arm_qsub8(::mem::transmute(a), ::mem::transmute(b)))
81+
}
82+
83+
/// Saturating two 16-bit integer subtraction
84+
///
85+
/// Returns the 16-bit signed equivalent of
86+
///
87+
/// res[0] = a[0] - b[0]
88+
/// res[1] = a[1] - b[1]
89+
#[inline]
90+
#[cfg_attr(test, assert_instr(qsub16))]
91+
pub unsafe fn qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
92+
::mem::transmute(arm_qsub16(::mem::transmute(a), ::mem::transmute(b)))
93+
}
94+
95+
/// Saturating two 16-bit integer additions
96+
///
97+
/// Returns the 16-bit signed equivalent of
98+
///
99+
/// res[0] = a[0] + b[0]
100+
/// res[1] = a[1] + b[1]
101+
#[inline]
102+
#[cfg_attr(test, assert_instr(qadd16))]
103+
pub unsafe fn qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
104+
::mem::transmute(arm_qadd16(::mem::transmute(a), ::mem::transmute(b)))
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use coresimd::arm::*;
110+
use coresimd::simd::*;
111+
use std::mem;
112+
use stdsimd_test::simd_test;
113+
114+
#[test]
115+
fn qadd() {
116+
unsafe {
117+
assert_eq!(dsp::qadd(-10, 60), 50);
118+
assert_eq!(dsp::qadd(::std::i32::MAX, 10), ::std::i32::MAX);
119+
assert_eq!(dsp::qadd(::std::i32::MIN, -10), ::std::i32::MIN);
120+
}
121+
}
122+
123+
#[test]
124+
fn qsub() {
125+
unsafe {
126+
assert_eq!(dsp::qsub(10, 60), -50);
127+
assert_eq!(dsp::qsub(::std::i32::MAX, -10), ::std::i32::MAX);
128+
assert_eq!(dsp::qsub(::std::i32::MIN, 10), ::std::i32::MIN);
129+
}
130+
}
131+
132+
#[test]
133+
fn qadd8() {
134+
unsafe {
135+
let a = i8x4::new(1, 2, 3, ::std::i8::MAX);
136+
let b = i8x4::new(2, -1, 0, 1);
137+
let c = i8x4::new(3, 1, 3, ::std::i8::MAX);
138+
let r: i8x4 = ::mem::transmute(dsp::qadd8(::mem::transmute(a), ::mem::transmute(b)));
139+
assert_eq!(r, c);
140+
}
141+
}
142+
143+
#[test]
144+
fn qsub8() {
145+
unsafe {
146+
let a = i8x4::new(1, 2, 3, ::std::i8::MIN);
147+
let b = i8x4::new(2, -1, 0, 1);
148+
let c = i8x4::new(-1, 3, 3, ::std::i8::MIN);
149+
let r: i8x4 = ::mem::transmute(dsp::qsub8(::mem::transmute(a),::mem::transmute(b)));
150+
assert_eq!(r, c);
151+
}
152+
}
153+
154+
#[test]
155+
fn qadd16() {
156+
unsafe {
157+
let a = i16x2::new(1, 2);
158+
let b = i16x2::new(2, -1);
159+
let c = i16x2::new(3, 1);
160+
let r: i16x2 = ::mem::transmute(dsp::qadd16(::mem::transmute(a),::mem::transmute(b)));
161+
assert_eq!(r, c);
162+
}
163+
}
164+
165+
#[test]
166+
fn qsub16() {
167+
unsafe {
168+
let a = i16x2::new(10, 20);
169+
let b = i16x2::new(20, -10);
170+
let c = i16x2::new(-10, 30);
171+
let r: i16x2 = ::mem::transmute(dsp::qsub16(::mem::transmute(a), ::mem::transmute(b)));
172+
assert_eq!(r, c);
173+
}
174+
}
175+
}

coresimd/arm/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ mod v7;
2020
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
2121
pub use self::v7::*;
2222

23+
#[cfg(all(target_arch = "arm", target_feature = "v7"))]
24+
mod dsp;
25+
#[cfg(all(target_arch = "arm", target_feature = "v7"))]
26+
pub use self::dsp::*;
27+
2328
// NEON is supported on AArch64, and on ARM when built with the v7 and neon
2429
// features. Building ARM without neon produces incorrect codegen.
2530
#[cfg(

crates/stdsimd-test/src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,10 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
351351
// cases exceed the limit.
352352
"cvtpi2ps" => 25,
353353

354+
// In this case the overall length, counting also the 'mergefunc'
355+
// workaround overhead, is exactly 20 instructions.
356+
"qsub8" | "qadd8" | "qsub16" | "qadd16" => 22,
357+
354358
_ => 20,
355359
};
356360
let probably_only_one_instruction = instrs.len() < instruction_limit;

0 commit comments

Comments
 (0)