@@ -55,31 +55,24 @@ fn decode_length(short_offset_run_header: u32) -> usize {
55
55
( short_offset_run_header >> 21 ) as usize
56
56
}
57
57
58
- /// # Safety
59
- ///
60
- /// The last element of `short_offset_runs` must be greater than `std::char::MAX`.
61
58
#[ inline( always) ]
62
- unsafe fn skip_search < const SOR : usize , const OFFSETS : usize > (
63
- needle : char ,
59
+ fn skip_search < const SOR : usize , const OFFSETS : usize > (
60
+ needle : u32 ,
64
61
short_offset_runs : & [ u32 ; SOR ] ,
65
62
offsets : & [ u8 ; OFFSETS ] ,
66
63
) -> bool {
67
- let needle = needle as u32 ;
68
-
64
+ // Note that this *cannot* be past the end of the array, as the last
65
+ // element is greater than std::char::MAX (the largest possible needle).
66
+ //
67
+ // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
68
+ // location cannot be past it, so Err(idx) != length either.
69
+ //
70
+ // This means that we can avoid bounds checking for the accesses below, too.
69
71
let last_idx =
70
72
match short_offset_runs. binary_search_by_key ( & ( needle << 11 ) , |header| header << 11 ) {
71
73
Ok ( idx) => idx + 1 ,
72
74
Err ( idx) => idx,
73
75
} ;
74
- // SAFETY: `last_idx` *cannot* be past the end of the array, as the last
75
- // element is greater than `std::char::MAX` (the largest possible needle)
76
- // as guaranteed by the caller.
77
- //
78
- // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the
79
- // correct location cannot be past it, so `Err(idx) => idx != length` either.
80
- //
81
- // This means that we can avoid bounds checking for the accesses below, too.
82
- unsafe { crate :: hint:: assert_unchecked ( last_idx < SOR ) } ;
83
76
84
77
let mut offset_idx = decode_length ( short_offset_runs[ last_idx] ) ;
85
78
let length = if let Some ( next) = short_offset_runs. get ( last_idx + 1 ) {
@@ -176,9 +169,11 @@ pub mod alphabetic {
176
169
0 , 0 , 0 , 0 , 5 , 0 , 0 ,
177
170
] ;
178
171
pub fn lookup ( c : char ) -> bool {
179
- const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
180
- // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
181
- unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
172
+ super :: skip_search (
173
+ c as u32 ,
174
+ & SHORT_OFFSET_RUNS ,
175
+ & OFFSETS ,
176
+ )
182
177
}
183
178
}
184
179
@@ -227,9 +222,11 @@ pub mod case_ignorable {
227
222
1 , 61 , 4 , 0 , 5 , 254 , 2 , 0 , 7 , 109 , 8 , 0 , 5 , 0 , 1 , 30 , 96 , 128 , 240 , 0 ,
228
223
] ;
229
224
pub fn lookup ( c : char ) -> bool {
230
- const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
231
- // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
232
- unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
225
+ super :: skip_search (
226
+ c as u32 ,
227
+ & SHORT_OFFSET_RUNS ,
228
+ & OFFSETS ,
229
+ )
233
230
}
234
231
}
235
232
@@ -255,9 +252,11 @@ pub mod cased {
255
252
8 , 0 , 10 , 1 , 20 , 6 , 6 , 0 , 62 , 0 , 68 , 0 , 26 , 6 , 26 , 6 , 26 , 0 ,
256
253
] ;
257
254
pub fn lookup ( c : char ) -> bool {
258
- const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
259
- // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
260
- unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
255
+ super :: skip_search (
256
+ c as u32 ,
257
+ & SHORT_OFFSET_RUNS ,
258
+ & OFFSETS ,
259
+ )
261
260
}
262
261
}
263
262
@@ -270,9 +269,11 @@ pub mod cc {
270
269
0 , 32 , 95 , 33 , 0 ,
271
270
] ;
272
271
pub fn lookup ( c : char ) -> bool {
273
- const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
274
- // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
275
- unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
272
+ super :: skip_search (
273
+ c as u32 ,
274
+ & SHORT_OFFSET_RUNS ,
275
+ & OFFSETS ,
276
+ )
276
277
}
277
278
}
278
279
@@ -321,9 +322,11 @@ pub mod grapheme_extend {
321
322
322
323
#[ inline( never) ]
323
324
fn lookup_slow ( c : char ) -> bool {
324
- const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
325
- // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
326
- unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
325
+ super :: skip_search (
326
+ c as u32 ,
327
+ & SHORT_OFFSET_RUNS ,
328
+ & OFFSETS ,
329
+ )
327
330
}
328
331
}
329
332
@@ -458,9 +461,11 @@ pub mod n {
458
461
10 , 247 , 10 , 0 , 9 , 128 , 10 , 0 , 59 , 1 , 3 , 1 , 4 , 76 , 45 , 1 , 15 , 0 , 13 , 0 , 10 , 0 ,
459
462
] ;
460
463
pub fn lookup ( c : char ) -> bool {
461
- const { assert ! ( * SHORT_OFFSET_RUNS . last( ) . unwrap( ) > ( char :: MAX as u32 ) ) ; }
462
- // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`.
463
- unsafe { super :: skip_search ( c, & SHORT_OFFSET_RUNS , & OFFSETS ) }
464
+ super :: skip_search (
465
+ c as u32 ,
466
+ & SHORT_OFFSET_RUNS ,
467
+ & OFFSETS ,
468
+ )
464
469
}
465
470
}
466
471
0 commit comments