diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c337c26a2..003102d59 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -160,6 +160,21 @@ jobs: rm -rf /tmp/.buildx-cache mv /tmp/.buildx-cache-new /tmp/.buildx-cache + miri: + name: Miri + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust (rustup) + run: rustup update nightly --no-self-update && rustup default nightly + shell: bash + - run: rustup component add miri + - run: cargo miri setup + - uses: Swatinem/rust-cache@v2 + - run: ./ci/miri.sh + rustfmt: name: Rustfmt runs-on: ubuntu-latest @@ -190,6 +205,7 @@ jobs: - test - rustfmt - clippy + - miri runs-on: ubuntu-latest # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency # failed" as success. So we have to do some contortions to ensure the job fails if any of its diff --git a/ci/miri.sh b/ci/miri.sh new file mode 100755 index 000000000..f9a1240a4 --- /dev/null +++ b/ci/miri.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -ex + +# We need Tree Borrows as some of our raw pointer patterns are not +# compatible with Stacked Borrows. +export MIRIFLAGS="-Zmiri-tree-borrows" + +# One target that sets `mem-unaligned` and one that does not, +# and a big-endian target. +TARGETS=(x86_64-unknown-linux-gnu + armv7-unknown-linux-gnueabihf + s390x-unknown-linux-gnu) +for TARGET in "${TARGETS[@]}"; do + # Only run the `mem` tests to avoid this taking too long. + cargo miri test --manifest-path testcrate/Cargo.toml --features no-asm --target $TARGET -- mem +done diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs index c602a67db..dc12d6996 100644 --- a/compiler-builtins/src/mem/impls.rs +++ b/compiler-builtins/src/mem/impls.rs @@ -41,6 +41,72 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize { core::mem::transmute(x_read) } +/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed +/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the +/// chunk size if a load happened. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_chunk_aligned( + src: *const usize, + dst: *mut usize, + load_sz: usize, + offset: usize, +) -> usize { + let chunk_sz = core::mem::size_of::(); + if (load_sz & chunk_sz) != 0 { + *dst.wrapping_byte_add(offset).cast::() = *src.wrapping_byte_add(offset).cast::(); + offset | chunk_sz + } else { + offset + } +} + +/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize` +/// read with the out-of-bounds part filled with 0s. +/// `load_sz` be strictly less than `WORD_SIZE`. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize { + debug_assert!(load_sz < WORD_SIZE); + // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 + // (since `load_sz < WORD_SIZE`). + const { assert!(WORD_SIZE <= 8) }; + + let mut i = 0; + let mut out = 0usize; + // We load in decreasing order, so the pointers remain sufficiently aligned for the next step. + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + i = load_chunk_aligned::(src, &raw mut out, load_sz, i); + debug_assert!(i == load_sz); + out +} + +/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be +/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts +/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s. +/// `load_sz` be strictly less than `WORD_SIZE`. +#[cfg(not(feature = "mem-unaligned"))] +#[inline(always)] +unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize { + debug_assert!(load_sz < WORD_SIZE); + // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 + // (since `load_sz < WORD_SIZE`). + const { assert!(WORD_SIZE <= 8) }; + + let mut i = 0; + let mut out = 0usize; + // Obtain pointers pointing to the beginning of the range we want to load. + let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz); + let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz); + // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc. + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + i = load_chunk_aligned::(src_shifted, out_shifted, load_sz, i); + debug_assert!(i == load_sz); + out +} + #[inline(always)] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] @@ -66,40 +132,57 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) } } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + debug_assert!(n > 0 && n % WORD_SIZE == 0); + debug_assert!(src.addr() % WORD_SIZE != 0); + let mut dest_usize = dest as *mut usize; let dest_end = dest.wrapping_add(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. + // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. let offset = src as usize & WORD_MASK; let shift = offset * 8; // Realign src - let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; - // This will read (but won't use) bytes out of bound. - // cfg needed because not all targets will have atomic loads that can be lowered - // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) - #[cfg(target_has_atomic_load_store = "ptr")] - let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); - #[cfg(not(target_has_atomic_load_store = "ptr"))] - let mut prev_word = core::ptr::read_volatile(src_aligned); + let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; + let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); - while dest_usize < dest_end { + while dest_usize.wrapping_add(1) < dest_end { src_aligned = src_aligned.wrapping_add(1); let cur_word = *src_aligned; - #[cfg(target_endian = "little")] - let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); - #[cfg(target_endian = "big")] - let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift); + let reassembled = if cfg!(target_endian = "little") { + prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) + } else { + prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) + }; prev_word = cur_word; - *dest_usize = resembled; + *dest_usize = reassembled; dest_usize = dest_usize.wrapping_add(1); } + + // There's one more element left to go, and we can't use the loop for that as on the `src` side, + // it is partially out-of-bounds. + src_aligned = src_aligned.wrapping_add(1); + let cur_word = load_aligned_partial(src_aligned, offset); + let reassembled = if cfg!(target_endian = "little") { + prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) + } else { + prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) + }; + // prev_word does not matter any more + + *dest_usize = reassembled; + // dest_usize does not matter any more } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(feature = "mem-unaligned")] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { @@ -164,40 +247,57 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { } } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { + debug_assert!(n > 0 && n % WORD_SIZE == 0); + debug_assert!(src.addr() % WORD_SIZE != 0); + let mut dest_usize = dest as *mut usize; - let dest_start = dest.wrapping_sub(n) as *mut usize; + let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start // Calculate the misalignment offset and shift needed to reassemble value. + // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. let offset = src as usize & WORD_MASK; let shift = offset * 8; - // Realign src_aligned - let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; - // This will read (but won't use) bytes out of bound. - // cfg needed because not all targets will have atomic loads that can be lowered - // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) - #[cfg(target_has_atomic_load_store = "ptr")] - let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); - #[cfg(not(target_has_atomic_load_store = "ptr"))] - let mut prev_word = core::ptr::read_volatile(src_aligned); + // Realign src + let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; + let mut prev_word = load_aligned_partial(src_aligned, offset); - while dest_start < dest_usize { + while dest_start.wrapping_add(1) < dest_usize { src_aligned = src_aligned.wrapping_sub(1); let cur_word = *src_aligned; - #[cfg(target_endian = "little")] - let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift; - #[cfg(target_endian = "big")] - let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift; + let reassembled = if cfg!(target_endian = "little") { + prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift + } else { + prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift + }; prev_word = cur_word; dest_usize = dest_usize.wrapping_sub(1); - *dest_usize = resembled; + *dest_usize = reassembled; } + + // There's one more element left to go, and we can't use the loop for that as on the `src` side, + // it is partially out-of-bounds. + src_aligned = src_aligned.wrapping_sub(1); + let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); + let reassembled = if cfg!(target_endian = "little") { + prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift + } else { + prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift + }; + // prev_word does not matter any more + + dest_usize = dest_usize.wrapping_sub(1); + *dest_usize = reassembled; } + /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. + /// `src` *must not* be `usize`-aligned. #[cfg(feature = "mem-unaligned")] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { diff --git a/testcrate/tests/mem.rs b/testcrate/tests/mem.rs index 48ac95adc..d838ef159 100644 --- a/testcrate/tests/mem.rs +++ b/testcrate/tests/mem.rs @@ -128,11 +128,13 @@ fn memcmp_eq() { #[test] fn memcmp_ne() { let arr1 @ arr2 = gen_arr::<256>(); - for i in 0..256 { + // Reduce iteration count in Miri as it is too slow otherwise. + let limit = if cfg!(miri) { 64 } else { 256 }; + for i in 0..limit { let mut diff_arr = arr1; diff_arr.0[i] = 127; let expect = diff_arr.0[i].cmp(&arr2.0[i]); - for k in i + 1..256 { + for k in i + 1..limit { let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) }; assert_eq!(expect, result.cmp(&0)); } @@ -230,6 +232,23 @@ fn memmove_backward_aligned() { } } +#[test] +fn memmove_misaligned_bounds() { + // The above test have the downside that the addresses surrounding the range-to-copy are all + // still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with + // an array that has just the right size. We test a few times to avoid it being accidentally + // aligned. + for _ in 0..8 { + let mut arr1 = [0u8; 17]; + let mut arr2 = [0u8; 17]; + unsafe { + // Copy both ways so we hit both the forward and backward cases. + memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17); + memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17); + } + } +} + #[test] fn memset_backward_misaligned_nonaligned_start() { let mut arr = gen_arr::<32>();