From 297691bd68e44fd95500a8d31265adc20689122d Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 7 Sep 2021 10:11:42 +0200 Subject: [PATCH 1/4] Reimplement aarch64 vld1* instructions to not cause individual loads under certain circumstances. --- crates/core_arch/src/aarch64/neon/mod.rs | 183 +++-------------------- 1 file changed, 24 insertions(+), 159 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 686eb9446a..45ba3c7c03 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -464,16 +464,7 @@ pub unsafe fn vcopy_laneq_f64( #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - transmute(i8x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + core::ptr::read_unaligned(ptr as *const int8x8_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -481,24 +472,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - transmute(i8x16::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - *ptr.offset(8), - *ptr.offset(9), - *ptr.offset(10), - *ptr.offset(11), - *ptr.offset(12), - *ptr.offset(13), - *ptr.offset(14), - *ptr.offset(15), - )) + core::ptr::read_unaligned(ptr as *const int8x16_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -506,12 +480,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - transmute(i16x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + core::ptr::read_unaligned(ptr as *const int16x4_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -519,16 +488,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - transmute(i16x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + core::ptr::read_unaligned(ptr as *const int16x8_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -536,7 +496,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - transmute(i32x2::new(*ptr, *ptr.offset(1))) + core::ptr::read_unaligned(ptr as *const int32x2_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -544,12 +504,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - transmute(i32x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + core::ptr::read_unaligned(ptr as *const int32x4_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -557,7 +512,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - transmute(i64x1::new(*ptr)) + core::ptr::read_unaligned(ptr as *const int64x1_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -565,7 +520,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - transmute(i64x2::new(*ptr, *ptr.offset(1))) + core::ptr::read_unaligned(ptr as *const int64x2_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -573,16 +528,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - transmute(u8x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + core::ptr::read_unaligned(ptr as *const uint8x8_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -590,24 +536,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - transmute(u8x16::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - *ptr.offset(8), - *ptr.offset(9), - *ptr.offset(10), - *ptr.offset(11), - *ptr.offset(12), - *ptr.offset(13), - *ptr.offset(14), - *ptr.offset(15), - )) + core::ptr::read_unaligned(ptr as *const uint8x16_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -615,12 +544,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - transmute(u16x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + core::ptr::read_unaligned(ptr as *const uint16x4_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -628,16 +552,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - transmute(u16x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + core::ptr::read_unaligned(ptr as *const uint16x8_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -645,7 +560,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - transmute(u32x2::new(*ptr, *ptr.offset(1))) + core::ptr::read_unaligned(ptr as *const uint32x2_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -653,12 +568,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - transmute(u32x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + core::ptr::read_unaligned(ptr as *const uint32x4_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -666,7 +576,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - transmute(u64x1::new(*ptr)) + core::ptr::read_unaligned(ptr as *const uint64x1_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -674,7 +584,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - transmute(u64x2::new(*ptr, *ptr.offset(1))) + core::ptr::read_unaligned(ptr as *const uint64x2_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -682,16 +592,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - transmute(u8x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + core::ptr::read_unaligned(ptr as *const poly8x8_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -699,24 +600,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - transmute(u8x16::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - *ptr.offset(8), - *ptr.offset(9), - *ptr.offset(10), - *ptr.offset(11), - *ptr.offset(12), - *ptr.offset(13), - *ptr.offset(14), - *ptr.offset(15), - )) + core::ptr::read_unaligned(ptr as *const poly8x16_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -724,12 +608,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - transmute(u16x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + core::ptr::read_unaligned(ptr as *const poly16x4_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -737,16 +616,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - transmute(u16x8::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - *ptr.offset(4), - *ptr.offset(5), - *ptr.offset(6), - *ptr.offset(7), - )) + core::ptr::read_unaligned(ptr as *const poly16x8_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -754,7 +624,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - transmute(f32x2::new(*ptr, *ptr.offset(1))) + core::ptr::read_unaligned(ptr as *const float32x2_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -762,12 +632,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - transmute(f32x4::new( - *ptr, - *ptr.offset(1), - *ptr.offset(2), - *ptr.offset(3), - )) + core::ptr::read_unaligned(ptr as *const float32x4_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -775,7 +640,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { - transmute(f64x1::new(*ptr)) + core::ptr::read_unaligned(ptr as *const float64x1_t) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -783,7 +648,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { - transmute(f64x2::new(*ptr, *ptr.offset(1))) + core::ptr::read_unaligned(ptr as *const float64x2_t) } /// Store multiple single-element structures from one, two, three, or four registers. From dcacddc43dc1e5a0663f8a6e8782c33fd9cbc795 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 8 Sep 2021 21:47:09 +0200 Subject: [PATCH 2/4] use ptr.cast() and type inference instead of explicit casts --- crates/core_arch/src/aarch64/neon/mod.rs | 48 ++++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 45ba3c7c03..d505eb134f 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -464,7 +464,7 @@ pub unsafe fn vcopy_laneq_f64( #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - core::ptr::read_unaligned(ptr as *const int8x8_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -472,7 +472,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - core::ptr::read_unaligned(ptr as *const int8x16_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -480,7 +480,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - core::ptr::read_unaligned(ptr as *const int16x4_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -488,7 +488,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - core::ptr::read_unaligned(ptr as *const int16x8_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -496,7 +496,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - core::ptr::read_unaligned(ptr as *const int32x2_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -504,7 +504,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - core::ptr::read_unaligned(ptr as *const int32x4_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -512,7 +512,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - core::ptr::read_unaligned(ptr as *const int64x1_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -520,7 +520,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - core::ptr::read_unaligned(ptr as *const int64x2_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -528,7 +528,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - core::ptr::read_unaligned(ptr as *const uint8x8_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -536,7 +536,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - core::ptr::read_unaligned(ptr as *const uint8x16_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -544,7 +544,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - core::ptr::read_unaligned(ptr as *const uint16x4_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -552,7 +552,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - core::ptr::read_unaligned(ptr as *const uint16x8_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -560,7 +560,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - core::ptr::read_unaligned(ptr as *const uint32x2_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -568,7 +568,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - core::ptr::read_unaligned(ptr as *const uint32x4_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -576,7 +576,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - core::ptr::read_unaligned(ptr as *const uint64x1_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -584,7 +584,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - core::ptr::read_unaligned(ptr as *const uint64x2_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -592,7 +592,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - core::ptr::read_unaligned(ptr as *const poly8x8_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -600,7 +600,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - core::ptr::read_unaligned(ptr as *const poly8x16_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -608,7 +608,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - core::ptr::read_unaligned(ptr as *const poly16x4_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -616,7 +616,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - core::ptr::read_unaligned(ptr as *const poly16x8_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -624,7 +624,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - core::ptr::read_unaligned(ptr as *const float32x2_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -632,7 +632,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - core::ptr::read_unaligned(ptr as *const float32x4_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -640,7 +640,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { - core::ptr::read_unaligned(ptr as *const float64x1_t) + core::ptr::read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -648,7 +648,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { - core::ptr::read_unaligned(ptr as *const float64x2_t) + core::ptr::read_unaligned(ptr.cast()) } /// Store multiple single-element structures from one, two, three, or four registers. From 5ec3699d9ac63e1b3680c8bd6ad4b7c17f78fa3a Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 8 Sep 2021 21:52:21 +0200 Subject: [PATCH 3/4] Replace copy_nonoverlapping() in vst1_* with write_unaligned() --- crates/core_arch/src/aarch64/neon/mod.rs | 159 ++++------------------- 1 file changed, 27 insertions(+), 132 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index d505eb134f..c09a885872 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -12,8 +12,7 @@ pub use self::generated::*; use crate::{ core_arch::{arm_shared::*, simd::*, simd_llvm::*}, hint::unreachable_unchecked, - mem::{size_of, transmute, zeroed}, - ptr::copy_nonoverlapping, + mem::{transmute, zeroed}, }; #[cfg(test)] use stdarch_test::assert_instr; @@ -657,11 +656,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - copy_nonoverlapping( - &a as *const int8x8_t as *const i8, - ptr as *mut i8, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -670,11 +665,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - copy_nonoverlapping( - &a as *const int8x16_t as *const i8, - ptr as *mut i8, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -683,11 +674,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - copy_nonoverlapping( - &a as *const int16x4_t as *const i16, - ptr as *mut i16, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -696,11 +683,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - copy_nonoverlapping( - &a as *const int16x8_t as *const i16, - ptr as *mut i16, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -709,11 +692,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - copy_nonoverlapping( - &a as *const int32x2_t as *const i32, - ptr as *mut i32, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -722,11 +701,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - copy_nonoverlapping( - &a as *const int32x4_t as *const i32, - ptr as *mut i32, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -735,11 +710,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - copy_nonoverlapping( - &a as *const int64x1_t as *const i64, - ptr as *mut i64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -748,11 +719,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - copy_nonoverlapping( - &a as *const int64x2_t as *const i64, - ptr as *mut i64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -761,11 +728,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - copy_nonoverlapping( - &a as *const uint8x8_t as *const u8, - ptr as *mut u8, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -774,11 +737,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - copy_nonoverlapping( - &a as *const uint8x16_t as *const u8, - ptr as *mut u8, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -787,11 +746,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - copy_nonoverlapping( - &a as *const uint16x4_t as *const u16, - ptr as *mut u16, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -800,11 +755,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - copy_nonoverlapping( - &a as *const uint16x8_t as *const u16, - ptr as *mut u16, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -813,11 +764,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - copy_nonoverlapping( - &a as *const uint32x2_t as *const u32, - ptr as *mut u32, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -826,11 +773,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - copy_nonoverlapping( - &a as *const uint32x4_t as *const u32, - ptr as *mut u32, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -839,11 +782,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - copy_nonoverlapping( - &a as *const uint64x1_t as *const u64, - ptr as *mut u64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -852,11 +791,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - copy_nonoverlapping( - &a as *const uint64x2_t as *const u64, - ptr as *mut u64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -865,11 +800,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - copy_nonoverlapping( - &a as *const poly8x8_t as *const p8, - ptr as *mut p8, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -878,11 +809,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - copy_nonoverlapping( - &a as *const poly8x16_t as *const p8, - ptr as *mut p8, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -891,11 +818,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - copy_nonoverlapping( - &a as *const poly16x4_t as *const p16, - ptr as *mut p16, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -904,11 +827,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - copy_nonoverlapping( - &a as *const poly16x8_t as *const p16, - ptr as *mut p16, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -917,11 +836,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - copy_nonoverlapping( - &a as *const poly64x1_t as *const p64, - ptr as *mut p64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -930,11 +845,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - copy_nonoverlapping( - &a as *const poly64x2_t as *const p64, - ptr as *mut p64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -943,11 +854,7 @@ pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - copy_nonoverlapping( - &a as *const float32x2_t as *const f32, - ptr as *mut f32, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -956,11 +863,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - copy_nonoverlapping( - &a as *const float32x4_t as *const f32, - ptr as *mut f32, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -969,11 +872,7 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { - copy_nonoverlapping( - &a as *const float64x1_t as *const f64, - ptr as *mut f64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -982,11 +881,7 @@ pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { - copy_nonoverlapping( - &a as *const float64x2_t as *const f64, - ptr as *mut f64, - size_of::(), - ) + core::ptr::write_unaligned(ptr.cast(), a); } /// Absolute Value (wrapping). From a6d925e46e0d3e8a2c81380862286cda9cfeb805 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 8 Sep 2021 21:54:08 +0200 Subject: [PATCH 4/4] use ptr::{read_unaligned, write_unaligned} --- crates/core_arch/src/aarch64/neon/mod.rs | 101 ++++++++++++----------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index c09a885872..a85d979a59 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -13,6 +13,7 @@ use crate::{ core_arch::{arm_shared::*, simd::*, simd_llvm::*}, hint::unreachable_unchecked, mem::{transmute, zeroed}, + ptr::{read_unaligned, write_unaligned}, }; #[cfg(test)] use stdarch_test::assert_instr; @@ -463,7 +464,7 @@ pub unsafe fn vcopy_laneq_f64( #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -471,7 +472,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -479,7 +480,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -487,7 +488,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -495,7 +496,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -503,7 +504,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -511,7 +512,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -519,7 +520,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -527,7 +528,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -535,7 +536,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -543,7 +544,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -551,7 +552,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -559,7 +560,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -567,7 +568,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -575,7 +576,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -583,7 +584,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -591,7 +592,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -599,7 +600,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -607,7 +608,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -615,7 +616,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -623,7 +624,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -631,7 +632,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -639,7 +640,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Load multiple single-element structures to one, two, three, or four registers. @@ -647,7 +648,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ldr))] pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { - core::ptr::read_unaligned(ptr.cast()) + read_unaligned(ptr.cast()) } /// Store multiple single-element structures from one, two, three, or four registers. @@ -656,7 +657,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -665,7 +666,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -674,7 +675,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -683,7 +684,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -692,7 +693,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -701,7 +702,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -710,7 +711,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -719,7 +720,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -728,7 +729,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -737,7 +738,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -746,7 +747,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -755,7 +756,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -764,7 +765,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -773,7 +774,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -782,7 +783,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -791,7 +792,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -800,7 +801,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -809,7 +810,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -818,7 +819,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Store multiple single-element structures from one, two, three, or four registers. @@ -827,7 +828,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -836,7 +837,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -845,7 +846,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -854,7 +855,7 @@ pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -863,7 +864,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -872,7 +873,7 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } // Store multiple single-element structures from one, two, three, or four registers. @@ -881,7 +882,7 @@ pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { #[cfg_attr(test, assert_instr(str))] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { - core::ptr::write_unaligned(ptr.cast(), a); + write_unaligned(ptr.cast(), a); } /// Absolute Value (wrapping).