Skip to content

Commit 4192cb2

Browse files
committed
Revert "Allow optimizing out panic_bounds_check in Unicode checks."
This reverts commit 222adac.
1 parent b2ce4e4 commit 4192cb2

File tree

3 files changed

+53
-65
lines changed

3 files changed

+53
-65
lines changed

library/core/src/unicode/unicode_data.rs

+39-34
Original file line numberDiff line numberDiff line change
@@ -55,31 +55,24 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5555
(short_offset_run_header >> 21) as usize
5656
}
5757

58-
/// # Safety
59-
///
60-
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
6158
#[inline(always)]
62-
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
63-
needle: char,
59+
fn skip_search<const SOR: usize, const OFFSETS: usize>(
60+
needle: u32,
6461
short_offset_runs: &[u32; SOR],
6562
offsets: &[u8; OFFSETS],
6663
) -> bool {
67-
let needle = needle as u32;
68-
64+
// Note that this *cannot* be past the end of the array, as the last
65+
// element is greater than std::char::MAX (the largest possible needle).
66+
//
67+
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
68+
// location cannot be past it, so Err(idx) != length either.
69+
//
70+
// This means that we can avoid bounds checking for the accesses below, too.
6971
let last_idx =
7072
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
7173
Ok(idx) => idx + 1,
7274
Err(idx) => idx,
7375
};
74-
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
75-
// element is greater than `std::char::MAX` (the largest possible needle)
76-
// as guaranteed by the caller.
77-
//
78-
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
79-
// correct location cannot be past it, so `Err(idx) => idx != length` either.
80-
//
81-
// This means that we can avoid bounds checking for the accesses below, too.
82-
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
8376

8477
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
8578
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
@@ -176,9 +169,11 @@ pub mod alphabetic {
176169
0, 0, 0, 0, 5, 0, 0,
177170
];
178171
pub fn lookup(c: char) -> bool {
179-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
180-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
181-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
172+
super::skip_search(
173+
c as u32,
174+
&SHORT_OFFSET_RUNS,
175+
&OFFSETS,
176+
)
182177
}
183178
}
184179

@@ -227,9 +222,11 @@ pub mod case_ignorable {
227222
1, 61, 4, 0, 5, 254, 2, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
228223
];
229224
pub fn lookup(c: char) -> bool {
230-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
231-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
232-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
225+
super::skip_search(
226+
c as u32,
227+
&SHORT_OFFSET_RUNS,
228+
&OFFSETS,
229+
)
233230
}
234231
}
235232

@@ -255,9 +252,11 @@ pub mod cased {
255252
8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
256253
];
257254
pub fn lookup(c: char) -> bool {
258-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
259-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
260-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
255+
super::skip_search(
256+
c as u32,
257+
&SHORT_OFFSET_RUNS,
258+
&OFFSETS,
259+
)
261260
}
262261
}
263262

@@ -270,9 +269,11 @@ pub mod cc {
270269
0, 32, 95, 33, 0,
271270
];
272271
pub fn lookup(c: char) -> bool {
273-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
274-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
275-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
272+
super::skip_search(
273+
c as u32,
274+
&SHORT_OFFSET_RUNS,
275+
&OFFSETS,
276+
)
276277
}
277278
}
278279

@@ -321,9 +322,11 @@ pub mod grapheme_extend {
321322

322323
#[inline(never)]
323324
fn lookup_slow(c: char) -> bool {
324-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
325-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
326-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
325+
super::skip_search(
326+
c as u32,
327+
&SHORT_OFFSET_RUNS,
328+
&OFFSETS,
329+
)
327330
}
328331
}
329332

@@ -458,9 +461,11 @@ pub mod n {
458461
10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
459462
];
460463
pub fn lookup(c: char) -> bool {
461-
const { assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }
462-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
463-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
464+
super::skip_search(
465+
c as u32,
466+
&SHORT_OFFSET_RUNS,
467+
&OFFSETS,
468+
)
464469
}
465470
}
466471

src/tools/unicode-table-generator/src/range_search.rs

+9-16
Original file line numberDiff line numberDiff line change
@@ -53,31 +53,24 @@ fn decode_length(short_offset_run_header: u32) -> usize {
5353
(short_offset_run_header >> 21) as usize
5454
}
5555

56-
/// # Safety
57-
///
58-
/// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
5956
#[inline(always)]
60-
unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
61-
needle: char,
57+
fn skip_search<const SOR: usize, const OFFSETS: usize>(
58+
needle: u32,
6259
short_offset_runs: &[u32; SOR],
6360
offsets: &[u8; OFFSETS],
6461
) -> bool {
65-
let needle = needle as u32;
66-
62+
// Note that this *cannot* be past the end of the array, as the last
63+
// element is greater than std::char::MAX (the largest possible needle).
64+
//
65+
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
66+
// location cannot be past it, so Err(idx) != length either.
67+
//
68+
// This means that we can avoid bounds checking for the accesses below, too.
6769
let last_idx =
6870
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
6971
Ok(idx) => idx + 1,
7072
Err(idx) => idx,
7173
};
72-
// SAFETY: `last_idx` *cannot* be past the end of the array, as the last
73-
// element is greater than `std::char::MAX` (the largest possible needle)
74-
// as guaranteed by the caller.
75-
//
76-
// So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
77-
// correct location cannot be past it, so `Err(idx) => idx != length` either.
78-
//
79-
// This means that we can avoid bounds checking for the accesses below, too.
80-
unsafe { crate::hint::assert_unchecked(last_idx < SOR) };
8174

8275
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
8376
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {

src/tools/unicode-table-generator/src/skiplist.rs

+5-15
Original file line numberDiff line numberDiff line change
@@ -110,21 +110,11 @@ impl RawEmitter {
110110
} else {
111111
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
112112
}
113-
writeln!(
114-
&mut self.file,
115-
" const {{ assert!(*SHORT_OFFSET_RUNS.last().unwrap() > (char::MAX as u32)); }}",
116-
)
117-
.unwrap();
118-
writeln!(
119-
&mut self.file,
120-
" // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.",
121-
)
122-
.unwrap();
123-
writeln!(
124-
&mut self.file,
125-
" unsafe {{ super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }}"
126-
)
127-
.unwrap();
113+
writeln!(&mut self.file, " super::skip_search(",).unwrap();
114+
writeln!(&mut self.file, " c as u32,").unwrap();
115+
writeln!(&mut self.file, " &SHORT_OFFSET_RUNS,").unwrap();
116+
writeln!(&mut self.file, " &OFFSETS,").unwrap();
117+
writeln!(&mut self.file, " )").unwrap();
128118
writeln!(&mut self.file, "}}").unwrap();
129119
}
130120
}

0 commit comments

Comments
 (0)