diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index 5b65d862be102..1d337694944bc 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -8,7 +8,7 @@ use crate::collections::TryReserveError; use crate::fmt::Write; use crate::rc::Rc; use crate::sync::Arc; -use crate::sys_common::{AsInner, IntoInner}; +use crate::sys_common::{AsInner, FromInner, IntoInner}; use crate::{fmt, mem, str}; #[cfg(test)] @@ -25,6 +25,37 @@ pub struct Slice { pub inner: [u8], } +impl IntoInner> for Buf { + fn into_inner(self) -> Vec { + self.inner + } +} + +impl FromInner> for Buf { + fn from_inner(inner: Vec) -> Self { + Buf { inner } + } +} + +impl AsInner<[u8]> for Buf { + #[inline] + fn as_inner(&self) -> &[u8] { + &self.inner + } +} + +impl fmt::Debug for Buf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_slice(), f) + } +} + +impl fmt::Display for Buf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_slice(), f) + } +} + impl fmt::Debug for Slice { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f) @@ -55,18 +86,6 @@ impl fmt::Display for Slice { } } -impl fmt::Debug for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), formatter) - } -} - -impl fmt::Display for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), formatter) - } -} - impl Clone for Buf { #[inline] fn clone(&self) -> Self { @@ -79,19 +98,6 @@ impl Clone for Buf { } } -impl IntoInner> for Buf { - fn into_inner(self) -> Vec { - self.inner - } -} - -impl AsInner<[u8]> for Buf { - #[inline] - fn as_inner(&self) -> &[u8] { - &self.inner - } -} - impl Buf { #[inline] pub fn into_encoded_bytes(self) -> Vec { @@ -103,6 +109,12 @@ impl Buf { Self { inner: s } } + #[inline] + pub fn into_string(self) -> Result { + String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) + } + + #[inline] pub fn from_string(s: String) -> Buf { Buf { inner: s.into_bytes() } } @@ -122,6 +134,11 @@ impl Buf { self.inner.capacity() } + #[inline] + pub fn push_slice(&mut self, s: &Slice) { + self.inner.extend_from_slice(&s.inner) + } + #[inline] pub fn reserve(&mut self, additional: usize) { self.inner.reserve(additional) @@ -157,7 +174,7 @@ impl Buf { // SAFETY: Slice just wraps [u8], // and &*self.inner is &[u8], therefore // transmuting &[u8] to &Slice is safe. - unsafe { mem::transmute(&*self.inner) } + unsafe { mem::transmute(self.inner.as_slice()) } } #[inline] @@ -165,15 +182,7 @@ impl Buf { // SAFETY: Slice just wraps [u8], // and &mut *self.inner is &mut [u8], therefore // transmuting &mut [u8] to &mut Slice is safe. - unsafe { mem::transmute(&mut *self.inner) } - } - - pub fn into_string(self) -> Result { - String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) - } - - pub fn push_slice(&mut self, s: &Slice) { - self.inner.extend_from_slice(&s.inner) + unsafe { mem::transmute(self.inner.as_mut_slice()) } } #[inline] @@ -278,18 +287,22 @@ impl Slice { unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) } } + #[inline] pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { str::from_utf8(&self.inner) } + #[inline] pub fn to_string_lossy(&self) -> Cow<'_, str> { String::from_utf8_lossy(&self.inner) } + #[inline] pub fn to_owned(&self) -> Buf { Buf { inner: self.inner.to_vec() } } + #[inline] pub fn clone_into(&self, buf: &mut Buf) { self.inner.clone_into(&mut buf.inner) } @@ -300,6 +313,7 @@ impl Slice { unsafe { mem::transmute(boxed) } } + #[inline] pub fn empty_box() -> Box { let boxed: Box<[u8]> = Default::default(); unsafe { mem::transmute(boxed) } diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index a4ad5966afe57..19728d33990ac 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -10,11 +10,16 @@ use crate::sys_common::wtf8::{Wtf8, Wtf8Buf, check_utf8_boundary}; use crate::sys_common::{AsInner, FromInner, IntoInner}; use crate::{fmt, mem}; -#[derive(Clone, Hash)] +#[derive(Hash)] pub struct Buf { pub inner: Wtf8Buf, } +#[repr(transparent)] +pub struct Slice { + pub inner: Wtf8, +} + impl IntoInner for Buf { fn into_inner(self) -> Wtf8Buf { self.inner @@ -35,31 +40,38 @@ impl AsInner for Buf { } impl fmt::Debug for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), formatter) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_slice(), f) } } impl fmt::Display for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), formatter) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_slice(), f) } } -#[repr(transparent)] -pub struct Slice { - pub inner: Wtf8, -} - impl fmt::Debug for Slice { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.inner, formatter) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, f) } } impl fmt::Display for Slice { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.inner, formatter) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.inner, f) + } +} + +impl Clone for Buf { + #[inline] + fn clone(&self) -> Self { + Buf { inner: self.inner.clone() } + } + + #[inline] + fn clone_from(&mut self, source: &Self) { + self.inner.clone_from(&source.inner) } } @@ -74,62 +86,57 @@ impl Buf { unsafe { Self { inner: Wtf8Buf::from_bytes_unchecked(s) } } } - pub fn with_capacity(capacity: usize) -> Buf { - Buf { inner: Wtf8Buf::with_capacity(capacity) } - } - - pub fn clear(&mut self) { - self.inner.clear() - } - - pub fn capacity(&self) -> usize { - self.inner.capacity() + #[inline] + pub fn into_string(self) -> Result { + self.inner.into_string().map_err(|buf| Buf { inner: buf }) } + #[inline] pub fn from_string(s: String) -> Buf { Buf { inner: Wtf8Buf::from_string(s) } } - pub fn as_slice(&self) -> &Slice { - // SAFETY: Slice is just a wrapper for Wtf8, - // and self.inner.as_slice() returns &Wtf8. - // Therefore, transmuting &Wtf8 to &Slice is safe. - unsafe { mem::transmute(self.inner.as_slice()) } + #[inline] + pub fn with_capacity(capacity: usize) -> Buf { + Buf { inner: Wtf8Buf::with_capacity(capacity) } } - pub fn as_mut_slice(&mut self) -> &mut Slice { - // SAFETY: Slice is just a wrapper for Wtf8, - // and self.inner.as_mut_slice() returns &mut Wtf8. - // Therefore, transmuting &mut Wtf8 to &mut Slice is safe. - // Additionally, care should be taken to ensure the slice - // is always valid Wtf8. - unsafe { mem::transmute(self.inner.as_mut_slice()) } + #[inline] + pub fn clear(&mut self) { + self.inner.clear() } - pub fn into_string(self) -> Result { - self.inner.into_string().map_err(|buf| Buf { inner: buf }) + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() } + #[inline] pub fn push_slice(&mut self, s: &Slice) { self.inner.push_wtf8(&s.inner) } + #[inline] pub fn reserve(&mut self, additional: usize) { self.inner.reserve(additional) } + #[inline] pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { self.inner.try_reserve(additional) } + #[inline] pub fn reserve_exact(&mut self, additional: usize) { self.inner.reserve_exact(additional) } + #[inline] pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { self.inner.try_reserve_exact(additional) } + #[inline] pub fn shrink_to_fit(&mut self) { self.inner.shrink_to_fit() } @@ -139,6 +146,24 @@ impl Buf { self.inner.shrink_to(min_capacity) } + #[inline] + pub fn as_slice(&self) -> &Slice { + // SAFETY: Slice is just a wrapper for Wtf8, + // and self.inner.as_slice() returns &Wtf8. + // Therefore, transmuting &Wtf8 to &Slice is safe. + unsafe { mem::transmute(self.inner.as_slice()) } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Slice { + // SAFETY: Slice is just a wrapper for Wtf8, + // and self.inner.as_mut_slice() returns &mut Wtf8. + // Therefore, transmuting &mut Wtf8 to &mut Slice is safe. + // Additionally, care should be taken to ensure the slice + // is always valid Wtf8. + unsafe { mem::transmute(self.inner.as_mut_slice()) } + } + #[inline] pub fn leak<'a>(self) -> &'a mut Slice { unsafe { mem::transmute(self.inner.leak()) } @@ -194,6 +219,7 @@ impl Slice { } #[track_caller] + #[inline] pub fn check_public_boundary(&self, index: usize) { check_utf8_boundary(&self.inner, index); } @@ -203,18 +229,22 @@ impl Slice { unsafe { mem::transmute(Wtf8::from_str(s)) } } + #[inline] pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { self.inner.as_str() } + #[inline] pub fn to_string_lossy(&self) -> Cow<'_, str> { self.inner.to_string_lossy() } + #[inline] pub fn to_owned(&self) -> Buf { Buf { inner: self.inner.to_owned() } } + #[inline] pub fn clone_into(&self, buf: &mut Buf) { self.inner.clone_into(&mut buf.inner) } @@ -224,6 +254,7 @@ impl Slice { unsafe { mem::transmute(self.inner.into_box()) } } + #[inline] pub fn empty_box() -> Box { unsafe { mem::transmute(Wtf8::empty_box()) } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 6c60d901ee904..f4402fc6f9865 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -156,9 +156,12 @@ impl ops::DerefMut for Wtf8Buf { } } -/// Format the string with double quotes, -/// and surrogates as `\u` followed by four hexadecimal digits. -/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800] +/// Formats the string in double quotes, with characters escaped according to +/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`, +/// where each `x` is a hexadecimal digit. +/// +/// For example, the code units [U+0061, U+D800, U+000A] are formatted as +/// `"a\u{D800}\n"`. impl fmt::Debug for Wtf8Buf { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -181,7 +184,7 @@ impl Wtf8Buf { /// Creates a WTF-8 string from a WTF-8 byte vec. /// - /// Since the byte vec is not checked for valid WTF-8, this functions is + /// Since the byte vec is not checked for valid WTF-8, this function is /// marked unsafe. #[inline] pub unsafe fn from_bytes_unchecked(value: Vec) -> Wtf8Buf { @@ -205,7 +208,7 @@ impl Wtf8Buf { /// Since WTF-8 is a superset of UTF-8, this always succeeds. #[inline] pub fn from_str(s: &str) -> Wtf8Buf { - Wtf8Buf { bytes: <[_]>::to_vec(s.as_bytes()), is_known_utf8: true } + Wtf8Buf { bytes: s.as_bytes().to_vec(), is_known_utf8: true } } pub fn clear(&mut self) { @@ -237,8 +240,9 @@ impl Wtf8Buf { string } - /// Copied from String::push + /// Appends the given `char` to the end of this string. /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. + /// Copied from String::push. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { let mut bytes = [0; 4]; let bytes = encode_utf8_raw(code_point.value, &mut bytes); @@ -264,16 +268,16 @@ impl Wtf8Buf { /// /// # Panics /// - /// Panics if the new capacity overflows `usize`. + /// Panics if the new capacity exceeds `isize::MAX` bytes. #[inline] pub fn reserve(&mut self, additional: usize) { self.bytes.reserve(additional) } - /// Tries to reserve capacity for at least `additional` more length units - /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid - /// frequent reallocations. After calling `try_reserve`, capacity will be - /// greater than or equal to `self.len() + additional`. Does nothing if + /// Tries to reserve capacity for at least `additional` more bytes to be + /// inserted in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to + /// avoid frequent reallocations. After calling `try_reserve`, capacity will + /// be greater than or equal to `self.len() + additional`. Does nothing if /// capacity is already sufficient. This method preserves the contents even /// if an error occurs. /// @@ -291,8 +295,8 @@ impl Wtf8Buf { self.bytes.reserve_exact(additional) } - /// Tries to reserve the minimum capacity for exactly `additional` - /// length units in the given `Wtf8Buf`. After calling + /// Tries to reserve the minimum capacity for exactly `additional` more + /// bytes to be inserted in the given `Wtf8Buf`. After calling /// `try_reserve_exact`, capacity will be greater than or equal to /// `self.len() + additional` if it returns `Ok(())`. /// Does nothing if the capacity is already sufficient. @@ -440,22 +444,17 @@ impl Wtf8Buf { /// /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”) pub fn into_string_lossy(mut self) -> String { - // Fast path: If we already have UTF-8, we can return it immediately. - if self.is_known_utf8 { - return unsafe { String::from_utf8_unchecked(self.bytes) }; - } - - let mut pos = 0; - loop { - match self.next_surrogate(pos) { - Some((surrogate_pos, _)) => { - pos = surrogate_pos + 3; - self.bytes[surrogate_pos..pos] - .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); - } - None => return unsafe { String::from_utf8_unchecked(self.bytes) }, + if !self.is_known_utf8 { + let mut pos = 0; + while let Some((surrogate_pos, _)) = self.next_surrogate(pos) { + pos = surrogate_pos + 3; + // Surrogates and the replacement character are all 3 bytes, so + // they can substituted in-place. + self.bytes[surrogate_pos..pos] + .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); } } + unsafe { String::from_utf8_unchecked(self.bytes) } } /// Converts this `Wtf8Buf` into a boxed `Wtf8`. @@ -535,9 +534,9 @@ impl AsInner<[u8]> for Wtf8 { } } -/// Format the slice with double quotes, -/// and surrogates as `\u` followed by four hexadecimal digits. -/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800] +/// Formats the string in double quotes, with characters escaped according to +/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`, +/// where each `x` is a hexadecimal digit. impl fmt::Debug for Wtf8 { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { @@ -562,6 +561,8 @@ impl fmt::Debug for Wtf8 { } } +/// Formats the string with unpaired surrogates substituted with the replacement +/// character, U+FFFD. impl fmt::Display for Wtf8 { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { let wtf8_bytes = &self.bytes; @@ -672,9 +673,8 @@ impl Wtf8 { /// /// This only copies the data if necessary (if it contains any surrogate). pub fn to_string_lossy(&self) -> Cow<'_, str> { - let surrogate_pos = match self.next_surrogate(0) { - None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }), - Some((pos, _)) => pos, + let Some((surrogate_pos, _)) = self.next_surrogate(0) else { + return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }); }; let wtf8_bytes = &self.bytes; let mut utf8_bytes = Vec::with_capacity(self.len()); @@ -964,7 +964,7 @@ pub struct Wtf8CodePoints<'a> { bytes: slice::Iter<'a, u8>, } -impl<'a> Iterator for Wtf8CodePoints<'a> { +impl Iterator for Wtf8CodePoints<'_> { type Item = CodePoint; #[inline] @@ -990,7 +990,7 @@ pub struct EncodeWide<'a> { // Copied from libunicode/u_str.rs #[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for EncodeWide<'a> { +impl Iterator for EncodeWide<'_> { type Item = u16; #[inline]