From d3f2be4bd8790da2af8ed2b85dcea77d95a5da3e Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Tue, 22 Aug 2017 19:45:36 +0200 Subject: [PATCH 01/16] Add all methods of AsciiExt to u8 directly This is the first step in order to deprecate AsciiExt. Since this is a WIP commit, there is still some code duplication (notably the static arrays) that will be removed later. --- src/libcore/num/mod.rs | 655 +++++++++++++++++++++++++++++++++++++++++ src/libstd/ascii.rs | 2 +- 2 files changed, 656 insertions(+), 1 deletion(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 5799d37c19cc6..b631691c73102 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2257,6 +2257,558 @@ impl u8 { intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } + + + /// Checks if the value is within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let ascii = 97u8; + /// let non_ascii = 150u8; + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii(&self) -> bool { + *self & 128 == 0 + } + + /// Makes a copy of the value in its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let lowercase_a = 97u8; + /// + /// assert_eq!(65, lowercase_a.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> u8 { + ASCII_UPPERCASE_MAP[*self as usize] + } + + /// Makes a copy of the value in its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = 65u8; + /// + /// assert_eq!(97, uppercase_a.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> u8 { + ASCII_LOWERCASE_MAP[*self as usize] + } + + /// Checks that two values are an ASCII case-insensitive match. + /// + /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, + /// but without allocating and copying temporaries. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let lowercase_a = 97u8; + /// let uppercase_a = 65u8; + /// + /// assert!(lowercase_a.eq_ignore_ascii_case(uppercase_a)); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn eq_ignore_ascii_case(&self, other: u8) -> bool { + self.to_ascii_lowercase() == other.to_ascii_lowercase() + } + + /// Converts this value to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let mut byte = b'a'; + /// + /// byte.make_ascii_uppercase(); + /// + /// assert_eq!(b'A', byte); + /// ``` + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + *self = self.to_ascii_uppercase(); + } + + /// Converts this value to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let mut byte = b'A'; + /// + /// byte.make_ascii_lowercase(); + /// + /// assert_eq!(b'a', byte); + /// ``` + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + *self = self.to_ascii_lowercase(); + } + + /// Checks if the value is an ASCII alphabetic character: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(uppercase_a.is_ascii_alphabetic()); + /// assert!(uppercase_g.is_ascii_alphabetic()); + /// assert!(a.is_ascii_alphabetic()); + /// assert!(g.is_ascii_alphabetic()); + /// assert!(!zero.is_ascii_alphabetic()); + /// assert!(!percent.is_ascii_alphabetic()); + /// assert!(!space.is_ascii_alphabetic()); + /// assert!(!lf.is_ascii_alphabetic()); + /// assert!(!esc.is_ascii_alphabetic()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_alphabetic(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + L | Lx | U | Ux => true, + _ => false + } + } + + /// Checks if the value is an ASCII uppercase character: + /// U+0041 'A' ... U+005A 'Z'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(uppercase_a.is_ascii_uppercase()); + /// assert!(uppercase_g.is_ascii_uppercase()); + /// assert!(!a.is_ascii_uppercase()); + /// assert!(!g.is_ascii_uppercase()); + /// assert!(!zero.is_ascii_uppercase()); + /// assert!(!percent.is_ascii_uppercase()); + /// assert!(!space.is_ascii_uppercase()); + /// assert!(!lf.is_ascii_uppercase()); + /// assert!(!esc.is_ascii_uppercase()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_uppercase(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + U | Ux => true, + _ => false + } + } + + /// Checks if the value is an ASCII lowercase character: + /// U+0061 'a' ... U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(!uppercase_a.is_ascii_lowercase()); + /// assert!(!uppercase_g.is_ascii_lowercase()); + /// assert!(a.is_ascii_lowercase()); + /// assert!(g.is_ascii_lowercase()); + /// assert!(!zero.is_ascii_lowercase()); + /// assert!(!percent.is_ascii_lowercase()); + /// assert!(!space.is_ascii_lowercase()); + /// assert!(!lf.is_ascii_lowercase()); + /// assert!(!esc.is_ascii_lowercase()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_lowercase(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + L | Lx => true, + _ => false + } + } + + /// Checks if the value is an ASCII alphanumeric character: + /// + /// - U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or + /// - U+0030 '0' ... U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(uppercase_a.is_ascii_alphanumeric()); + /// assert!(uppercase_g.is_ascii_alphanumeric()); + /// assert!(a.is_ascii_alphanumeric()); + /// assert!(g.is_ascii_alphanumeric()); + /// assert!(zero.is_ascii_alphanumeric()); + /// assert!(!percent.is_ascii_alphanumeric()); + /// assert!(!space.is_ascii_alphanumeric()); + /// assert!(!lf.is_ascii_alphanumeric()); + /// assert!(!esc.is_ascii_alphanumeric()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_alphanumeric(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D | L | Lx | U | Ux => true, + _ => false + } + } + + /// Checks if the value is an ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(!uppercase_a.is_ascii_digit()); + /// assert!(!uppercase_g.is_ascii_digit()); + /// assert!(!a.is_ascii_digit()); + /// assert!(!g.is_ascii_digit()); + /// assert!(zero.is_ascii_digit()); + /// assert!(!percent.is_ascii_digit()); + /// assert!(!space.is_ascii_digit()); + /// assert!(!lf.is_ascii_digit()); + /// assert!(!esc.is_ascii_digit()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_digit(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D => true, + _ => false + } + } + + /// Checks if the value is an ASCII hexadecimal digit: + /// + /// - U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or + /// - U+0061 'a' ... U+0066 'f'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(uppercase_a.is_ascii_hexdigit()); + /// assert!(!uppercase_g.is_ascii_hexdigit()); + /// assert!(a.is_ascii_hexdigit()); + /// assert!(!g.is_ascii_hexdigit()); + /// assert!(zero.is_ascii_hexdigit()); + /// assert!(!percent.is_ascii_hexdigit()); + /// assert!(!space.is_ascii_hexdigit()); + /// assert!(!lf.is_ascii_hexdigit()); + /// assert!(!esc.is_ascii_hexdigit()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_hexdigit(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + D | Lx | Ux => true, + _ => false + } + } + + /// Checks if the value is an ASCII punctuation character: + /// + /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or + /// - U+003A ... U+0040 `: ; < = > ? @`, or + /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or + /// - U+007B ... U+007E `{ | } ~` + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(!uppercase_a.is_ascii_punctuation()); + /// assert!(!uppercase_g.is_ascii_punctuation()); + /// assert!(!a.is_ascii_punctuation()); + /// assert!(!g.is_ascii_punctuation()); + /// assert!(!zero.is_ascii_punctuation()); + /// assert!(percent.is_ascii_punctuation()); + /// assert!(!space.is_ascii_punctuation()); + /// assert!(!lf.is_ascii_punctuation()); + /// assert!(!esc.is_ascii_punctuation()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_punctuation(&self) -> bool { + if *self >= 0x80 { return false } + match ASCII_CHARACTER_CLASS[*self as usize] { + P => true, + _ => false + } + } + + /// Checks if the value is an ASCII graphic character: + /// U+0021 '@' ... U+007E '~'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(uppercase_a.is_ascii_graphic()); + /// assert!(uppercase_g.is_ascii_graphic()); + /// assert!(a.is_ascii_graphic()); + /// assert!(g.is_ascii_graphic()); + /// assert!(zero.is_ascii_graphic()); + /// assert!(percent.is_ascii_graphic()); + /// assert!(!space.is_ascii_graphic()); + /// assert!(!lf.is_ascii_graphic()); + /// assert!(!esc.is_ascii_graphic()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_graphic(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + Ux | U | Lx | L | D | P => true, + _ => false + } + } + + /// Checks if the value is an ASCII whitespace character: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(!uppercase_a.is_ascii_whitespace()); + /// assert!(!uppercase_g.is_ascii_whitespace()); + /// assert!(!a.is_ascii_whitespace()); + /// assert!(!g.is_ascii_whitespace()); + /// assert!(!zero.is_ascii_whitespace()); + /// assert!(!percent.is_ascii_whitespace()); + /// assert!(space.is_ascii_whitespace()); + /// assert!(lf.is_ascii_whitespace()); + /// assert!(!esc.is_ascii_whitespace()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_whitespace(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + Cw|W => true, + _ => false + } + } + + /// Checks if the value is an ASCII control character: + /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE. + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_methods_on_intrinsics)] + /// + /// let uppercase_a = b'A'; + /// let uppercase_g = b'G'; + /// let a = b'a'; + /// let g = b'g'; + /// let zero = b'0'; + /// let percent = b'%'; + /// let space = b' '; + /// let lf = b'\n'; + /// let esc = 0x1b_u8; + /// + /// assert!(!uppercase_a.is_ascii_control()); + /// assert!(!uppercase_g.is_ascii_control()); + /// assert!(!a.is_ascii_control()); + /// assert!(!g.is_ascii_control()); + /// assert!(!zero.is_ascii_control()); + /// assert!(!percent.is_ascii_control()); + /// assert!(!space.is_ascii_control()); + /// assert!(lf.is_ascii_control()); + /// assert!(esc.is_ascii_control()); + /// ``` + #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[inline] + pub fn is_ascii_control(&self) -> bool { + if *self >= 0x80 { return false; } + match ASCII_CHARACTER_CLASS[*self as usize] { + C|Cw => true, + _ => false + } + } } #[lang = "u16"] @@ -2926,3 +3478,106 @@ impl_from! { u32, f64, #[stable(feature = "lossless_float_conv", since = "1.6.0" // Float -> Float impl_from! { f32, f64, #[stable(feature = "lossless_float_conv", since = "1.6.0")] } + +static ASCII_LOWERCASE_MAP: [u8; 256] = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'', + b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', + b'8', b'9', b':', b';', b'<', b'=', b'>', b'?', + b'@', + + b'a', b'b', b'c', b'd', b'e', b'f', b'g', + b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', + b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', + b'x', b'y', b'z', + + b'[', b'\\', b']', b'^', b'_', + b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', + b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', + b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', + b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +]; + +static ASCII_UPPERCASE_MAP: [u8; 256] = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'', + b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', + b'8', b'9', b':', b';', b'<', b'=', b'>', b'?', + b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', + b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', + b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', + b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_', + b'`', + + b'A', b'B', b'C', b'D', b'E', b'F', b'G', + b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', + b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', + b'X', b'Y', b'Z', + + b'{', b'|', b'}', b'~', 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +]; + +enum AsciiCharacterClass { + C, // control + Cw, // control whitespace + W, // whitespace + D, // digit + L, // lowercase + Lx, // lowercase hex digit + U, // uppercase + Ux, // uppercase hex digit + P, // punctuation +} +use self::AsciiCharacterClass::*; + +static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 128] = [ +// _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _a _b _c _d _e _f + C, C, C, C, C, C, C, C, C, Cw,Cw,C, Cw,Cw,C, C, // 0_ + C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 1_ + W, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, // 2_ + D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, P, // 3_ + P, Ux,Ux,Ux,Ux,Ux,Ux,U, U, U, U, U, U, U, U, U, // 4_ + U, U, U, U, U, U, U, U, U, U, U, P, P, P, P, P, // 5_ + P, Lx,Lx,Lx,Lx,Lx,Lx,L, L, L, L, L, L, L, L, L, // 6_ + L, L, L, L, L, L, L, L, L, L, L, P, P, P, P, C, // 7_ +]; diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 327deb9b419be..8ddc75868ac77 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -685,7 +685,7 @@ impl AsciiExt for [u8] { #[inline] fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool { self.len() == other.len() && - self.iter().zip(other).all(|(a, b)| { + self.iter().zip(other).all(|(a, &b)| { a.eq_ignore_ascii_case(b) }) } From 04070d11485463f55edcf9cd37b443ebffe320e8 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 28 Sep 2017 16:37:37 +0200 Subject: [PATCH 02/16] Make ascii methods on u8 insta-stable Those methods will shadow the methods of `AsciiExt`, so if we don't make them insta-stable, everyone will hitting stability errors. It is fine adding those as stable, because they are just being moved around [according to sfackler][1]. OPEN QUESTION: this commit also stabilizes the `AsciiExt` methods that were previously feature gated by the `ascii_ctype` feature. Maybe we don't want to stablilize those yet. [1]: https://github.com/rust-lang/rust/pull/44042#issuecomment-329939279 --- src/libcore/num/mod.rs | 64 +++++++++++------------------------------- 1 file changed, 16 insertions(+), 48 deletions(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index b631691c73102..9142b386fe857 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2264,15 +2264,13 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let ascii = 97u8; /// let non_ascii = 150u8; /// /// assert!(ascii.is_ascii()); /// assert!(!non_ascii.is_ascii()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii(&self) -> bool { *self & 128 == 0 @@ -2288,15 +2286,13 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let lowercase_a = 97u8; /// /// assert_eq!(65, lowercase_a.to_ascii_uppercase()); /// ``` /// /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] @@ -2312,15 +2308,13 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = 65u8; /// /// assert_eq!(97, uppercase_a.to_ascii_lowercase()); /// ``` /// /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] @@ -2334,14 +2328,12 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let lowercase_a = 97u8; /// let uppercase_a = 65u8; /// /// assert!(lowercase_a.eq_ignore_ascii_case(uppercase_a)); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn eq_ignore_ascii_case(&self, other: u8) -> bool { self.to_ascii_lowercase() == other.to_ascii_lowercase() @@ -2358,8 +2350,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let mut byte = b'a'; /// /// byte.make_ascii_uppercase(); @@ -2368,7 +2358,7 @@ impl u8 { /// ``` /// /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); @@ -2385,8 +2375,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let mut byte = b'A'; /// /// byte.make_ascii_lowercase(); @@ -2395,7 +2383,7 @@ impl u8 { /// ``` /// /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); @@ -2409,8 +2397,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2431,7 +2417,7 @@ impl u8 { /// assert!(!lf.is_ascii_alphabetic()); /// assert!(!esc.is_ascii_alphabetic()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_alphabetic(&self) -> bool { if *self >= 0x80 { return false; } @@ -2447,8 +2433,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2469,7 +2453,7 @@ impl u8 { /// assert!(!lf.is_ascii_uppercase()); /// assert!(!esc.is_ascii_uppercase()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_uppercase(&self) -> bool { if *self >= 0x80 { return false } @@ -2485,8 +2469,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2507,7 +2489,7 @@ impl u8 { /// assert!(!lf.is_ascii_lowercase()); /// assert!(!esc.is_ascii_lowercase()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_lowercase(&self) -> bool { if *self >= 0x80 { return false } @@ -2525,8 +2507,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2547,7 +2527,7 @@ impl u8 { /// assert!(!lf.is_ascii_alphanumeric()); /// assert!(!esc.is_ascii_alphanumeric()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_alphanumeric(&self) -> bool { if *self >= 0x80 { return false } @@ -2563,8 +2543,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2585,7 +2563,7 @@ impl u8 { /// assert!(!lf.is_ascii_digit()); /// assert!(!esc.is_ascii_digit()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_digit(&self) -> bool { if *self >= 0x80 { return false } @@ -2603,8 +2581,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2625,7 +2601,7 @@ impl u8 { /// assert!(!lf.is_ascii_hexdigit()); /// assert!(!esc.is_ascii_hexdigit()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_hexdigit(&self) -> bool { if *self >= 0x80 { return false } @@ -2645,8 +2621,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2667,7 +2641,7 @@ impl u8 { /// assert!(!lf.is_ascii_punctuation()); /// assert!(!esc.is_ascii_punctuation()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_punctuation(&self) -> bool { if *self >= 0x80 { return false } @@ -2683,8 +2657,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2705,7 +2677,7 @@ impl u8 { /// assert!(!lf.is_ascii_graphic()); /// assert!(!esc.is_ascii_graphic()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_graphic(&self) -> bool { if *self >= 0x80 { return false; } @@ -2738,8 +2710,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2760,7 +2730,7 @@ impl u8 { /// assert!(lf.is_ascii_whitespace()); /// assert!(!esc.is_ascii_whitespace()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_whitespace(&self) -> bool { if *self >= 0x80 { return false; } @@ -2778,8 +2748,6 @@ impl u8 { /// # Examples /// /// ``` - /// #![feature(ascii_methods_on_intrinsics)] - /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2800,7 +2768,7 @@ impl u8 { /// assert!(lf.is_ascii_control()); /// assert!(esc.is_ascii_control()); /// ``` - #[unstable(feature = "ascii_methods_on_intrinsics", issue = "0")] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] pub fn is_ascii_control(&self) -> bool { if *self >= 0x80 { return false; } From 5061c9fecb995bf1920bcb546cd522fe9a84dd3e Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 28 Sep 2017 16:41:21 +0200 Subject: [PATCH 03/16] Revert signature of eq_ignore_ascii_case() to original Since the methods on u8 directly will shadow the AsciiExt methods, we cannot change the signature without breaking everything. It would have been nice to take `u8` as argument instead of `&u8`, but we cannot break stuff! So this commit reverts it to the original `&u8` version. --- src/libcore/num/mod.rs | 4 ++-- src/libstd/ascii.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 9142b386fe857..a750731e0c944 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2331,11 +2331,11 @@ impl u8 { /// let lowercase_a = 97u8; /// let uppercase_a = 65u8; /// - /// assert!(lowercase_a.eq_ignore_ascii_case(uppercase_a)); + /// assert!(lowercase_a.eq_ignore_ascii_case(&uppercase_a)); /// ``` #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] #[inline] - pub fn eq_ignore_ascii_case(&self, other: u8) -> bool { + pub fn eq_ignore_ascii_case(&self, other: &u8) -> bool { self.to_ascii_lowercase() == other.to_ascii_lowercase() } diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 8ddc75868ac77..327deb9b419be 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -685,7 +685,7 @@ impl AsciiExt for [u8] { #[inline] fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool { self.len() == other.len() && - self.iter().zip(other).all(|(a, &b)| { + self.iter().zip(other).all(|(a, b)| { a.eq_ignore_ascii_case(b) }) } From d425f8d2266f481bab447f30816fc9a1b4907868 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 28 Sep 2017 17:04:38 +0200 Subject: [PATCH 04/16] Copy `AsciiExt` methods to `char` directly This is done in order to deprecate AsciiExt eventually. --- src/librustdoc/html/markdown.rs | 1 - src/librustdoc/lib.rs | 1 - src/libstd/path.rs | 1 - src/libstd_unicode/char.rs | 501 ++++++++++++++++++++++++++++++++ 4 files changed, 501 insertions(+), 3 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 001e773098eb3..c132cf75e40a4 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -30,7 +30,6 @@ use libc; use std::slice; -use std::ascii::AsciiExt; use std::cell::RefCell; use std::collections::{HashMap, VecDeque}; use std::default::Default; diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 20da99a6b1376..500d0931708ab 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -23,7 +23,6 @@ #![feature(test)] #![feature(unicode)] #![feature(vec_remove_item)] -#![feature(ascii_ctype)] extern crate arena; extern crate getopts; diff --git a/src/libstd/path.rs b/src/libstd/path.rs index 294743ed2cc54..270878dc029c3 100644 --- a/src/libstd/path.rs +++ b/src/libstd/path.rs @@ -77,7 +77,6 @@ #![stable(feature = "rust1", since = "1.0.0")] -use ascii::*; use borrow::{Borrow, Cow}; use cmp; use error::Error; diff --git a/src/libstd_unicode/char.rs b/src/libstd_unicode/char.rs index 5c0c7a4fbca35..9c8dd538b22a3 100644 --- a/src/libstd_unicode/char.rs +++ b/src/libstd_unicode/char.rs @@ -923,6 +923,507 @@ impl char { pub fn to_uppercase(self) -> ToUppercase { ToUppercase(CaseMappingIter::new(conversions::to_upper(self))) } + + /// Checks if the value is within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// let ascii = 'a'; + /// let non_ascii = '❤'; + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii(&self) -> bool { + *self as u32 <= 0x7F + } + + /// Makes a copy of the value in its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// To uppercase ASCII characters in addition to non-ASCII characters, use + /// [`to_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let ascii = 'a'; + /// let non_ascii = '❤'; + /// + /// assert_eq!('A', ascii.to_ascii_uppercase()); + /// assert_eq!('❤', non_ascii.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + /// [`to_uppercase`]: #method.to_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> char { + if self.is_ascii() { + (*self as u8).to_ascii_uppercase() as char + } else { + *self + } + } + + /// Makes a copy of the value in its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// To lowercase ASCII characters in addition to non-ASCII characters, use + /// [`to_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let ascii = 'A'; + /// let non_ascii = '❤'; + /// + /// assert_eq!('a', ascii.to_ascii_lowercase()); + /// assert_eq!('❤', non_ascii.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + /// [`to_lowercase`]: #method.to_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> char { + if self.is_ascii() { + (*self as u8).to_ascii_lowercase() as char + } else { + *self + } + } + + /// Checks that two values are an ASCII case-insensitive match. + /// + /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`. + /// + /// # Examples + /// + /// ``` + /// let upper_a = 'A'; + /// let lower_a = 'a'; + /// let lower_z = 'z'; + /// + /// assert!(upper_a.eq_ignore_ascii_case(&lower_a)); + /// assert!(upper_a.eq_ignore_ascii_case(&upper_a)); + /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z)); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &char) -> bool { + self.to_ascii_lowercase() == other.to_ascii_lowercase() + } + + /// Converts this type to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let mut ascii = 'a'; + /// + /// ascii.make_ascii_uppercase(); + /// + /// assert_eq!('A', ascii); + /// ``` + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + *self = self.to_ascii_uppercase(); + } + + /// Converts this type to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let mut ascii = 'A'; + /// + /// ascii.make_ascii_lowercase(); + /// + /// assert_eq!('a', ascii); + /// ``` + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + *self = self.to_ascii_lowercase(); + } + + /// Checks if the value is an ASCII alphabetic character: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_alphabetic()); + /// assert!(uppercase_g.is_ascii_alphabetic()); + /// assert!(a.is_ascii_alphabetic()); + /// assert!(g.is_ascii_alphabetic()); + /// assert!(!zero.is_ascii_alphabetic()); + /// assert!(!percent.is_ascii_alphabetic()); + /// assert!(!space.is_ascii_alphabetic()); + /// assert!(!lf.is_ascii_alphabetic()); + /// assert!(!esc.is_ascii_alphabetic()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_alphabetic(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_alphabetic() + } + + /// Checks if the value is an ASCII uppercase character: + /// U+0041 'A' ... U+005A 'Z'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_uppercase()); + /// assert!(uppercase_g.is_ascii_uppercase()); + /// assert!(!a.is_ascii_uppercase()); + /// assert!(!g.is_ascii_uppercase()); + /// assert!(!zero.is_ascii_uppercase()); + /// assert!(!percent.is_ascii_uppercase()); + /// assert!(!space.is_ascii_uppercase()); + /// assert!(!lf.is_ascii_uppercase()); + /// assert!(!esc.is_ascii_uppercase()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_uppercase(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_uppercase() + } + + /// Checks if the value is an ASCII lowercase character: + /// U+0061 'a' ... U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_lowercase()); + /// assert!(!uppercase_g.is_ascii_lowercase()); + /// assert!(a.is_ascii_lowercase()); + /// assert!(g.is_ascii_lowercase()); + /// assert!(!zero.is_ascii_lowercase()); + /// assert!(!percent.is_ascii_lowercase()); + /// assert!(!space.is_ascii_lowercase()); + /// assert!(!lf.is_ascii_lowercase()); + /// assert!(!esc.is_ascii_lowercase()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_lowercase(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_lowercase() + } + + /// Checks if the value is an ASCII alphanumeric character: + /// + /// - U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or + /// - U+0030 '0' ... U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_alphanumeric()); + /// assert!(uppercase_g.is_ascii_alphanumeric()); + /// assert!(a.is_ascii_alphanumeric()); + /// assert!(g.is_ascii_alphanumeric()); + /// assert!(zero.is_ascii_alphanumeric()); + /// assert!(!percent.is_ascii_alphanumeric()); + /// assert!(!space.is_ascii_alphanumeric()); + /// assert!(!lf.is_ascii_alphanumeric()); + /// assert!(!esc.is_ascii_alphanumeric()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_alphanumeric(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_alphanumeric() + } + + /// Checks if the value is an ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_digit()); + /// assert!(!uppercase_g.is_ascii_digit()); + /// assert!(!a.is_ascii_digit()); + /// assert!(!g.is_ascii_digit()); + /// assert!(zero.is_ascii_digit()); + /// assert!(!percent.is_ascii_digit()); + /// assert!(!space.is_ascii_digit()); + /// assert!(!lf.is_ascii_digit()); + /// assert!(!esc.is_ascii_digit()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_digit(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_digit() + } + + /// Checks if the value is an ASCII hexadecimal digit: + /// + /// - U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or + /// - U+0061 'a' ... U+0066 'f'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_hexdigit()); + /// assert!(!uppercase_g.is_ascii_hexdigit()); + /// assert!(a.is_ascii_hexdigit()); + /// assert!(!g.is_ascii_hexdigit()); + /// assert!(zero.is_ascii_hexdigit()); + /// assert!(!percent.is_ascii_hexdigit()); + /// assert!(!space.is_ascii_hexdigit()); + /// assert!(!lf.is_ascii_hexdigit()); + /// assert!(!esc.is_ascii_hexdigit()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_hexdigit(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_hexdigit() + } + + /// Checks if the value is an ASCII punctuation character: + /// + /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or + /// - U+003A ... U+0040 `: ; < = > ? @`, or + /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or + /// - U+007B ... U+007E `{ | } ~` + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_punctuation()); + /// assert!(!uppercase_g.is_ascii_punctuation()); + /// assert!(!a.is_ascii_punctuation()); + /// assert!(!g.is_ascii_punctuation()); + /// assert!(!zero.is_ascii_punctuation()); + /// assert!(percent.is_ascii_punctuation()); + /// assert!(!space.is_ascii_punctuation()); + /// assert!(!lf.is_ascii_punctuation()); + /// assert!(!esc.is_ascii_punctuation()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_punctuation(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_punctuation() + } + + /// Checks if the value is an ASCII graphic character: + /// U+0021 '@' ... U+007E '~'. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_graphic()); + /// assert!(uppercase_g.is_ascii_graphic()); + /// assert!(a.is_ascii_graphic()); + /// assert!(g.is_ascii_graphic()); + /// assert!(zero.is_ascii_graphic()); + /// assert!(percent.is_ascii_graphic()); + /// assert!(!space.is_ascii_graphic()); + /// assert!(!lf.is_ascii_graphic()); + /// assert!(!esc.is_ascii_graphic()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_graphic(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_graphic() + } + + /// Checks if the value is an ASCII whitespace character: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_whitespace()); + /// assert!(!uppercase_g.is_ascii_whitespace()); + /// assert!(!a.is_ascii_whitespace()); + /// assert!(!g.is_ascii_whitespace()); + /// assert!(!zero.is_ascii_whitespace()); + /// assert!(!percent.is_ascii_whitespace()); + /// assert!(space.is_ascii_whitespace()); + /// assert!(lf.is_ascii_whitespace()); + /// assert!(!esc.is_ascii_whitespace()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_whitespace(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_whitespace() + } + + /// Checks if the value is an ASCII control character: + /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE. + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + /// + /// # Examples + /// + /// ``` + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_control()); + /// assert!(!uppercase_g.is_ascii_control()); + /// assert!(!a.is_ascii_control()); + /// assert!(!g.is_ascii_control()); + /// assert!(!zero.is_ascii_control()); + /// assert!(!percent.is_ascii_control()); + /// assert!(!space.is_ascii_control()); + /// assert!(lf.is_ascii_control()); + /// assert!(esc.is_ascii_control()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_control(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_control() + } } /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. From 6f5556854ebe396d6e4550e5255de6696a924e1c Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 28 Sep 2017 18:25:16 +0200 Subject: [PATCH 05/16] Use direct implementation on u8/char to implement AsciiExt The methods were copied to u8/char, so we can just use it in AsciiExt impls to avoid duplicate code. --- src/libstd/ascii.rs | 321 ++++++-------------------------------------- 1 file changed, 41 insertions(+), 280 deletions(-) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 327deb9b419be..3dd8b2b08e6d6 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -753,202 +753,66 @@ impl AsciiExt for [u8] { } } -#[stable(feature = "rust1", since = "1.0.0")] -impl AsciiExt for u8 { - type Owned = u8; - #[inline] - fn is_ascii(&self) -> bool { *self & 128 == 0 } - #[inline] - fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] } - #[inline] - fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] } - #[inline] - fn eq_ignore_ascii_case(&self, other: &u8) -> bool { - self.to_ascii_lowercase() == other.to_ascii_lowercase() - } - #[inline] - fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); } - #[inline] - fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); } +macro_rules! impl_by_delegating { + ($ty:ty, $owned:ty) => { + #[stable(feature = "rust1", since = "1.0.0")] + impl AsciiExt for $ty { + type Owned = $owned; - #[inline] - fn is_ascii_alphabetic(&self) -> bool { - if *self >= 0x80 { return false; } - match ASCII_CHARACTER_CLASS[*self as usize] { - L|Lx|U|Ux => true, - _ => false - } - } + #[inline] + fn is_ascii(&self) -> bool { self.is_ascii() } - #[inline] - fn is_ascii_uppercase(&self) -> bool { - if *self >= 0x80 { return false } - match ASCII_CHARACTER_CLASS[*self as usize] { - U|Ux => true, - _ => false - } - } + #[inline] + fn to_ascii_uppercase(&self) -> Self::Owned { self.to_ascii_uppercase() } - #[inline] - fn is_ascii_lowercase(&self) -> bool { - if *self >= 0x80 { return false } - match ASCII_CHARACTER_CLASS[*self as usize] { - L|Lx => true, - _ => false - } - } + #[inline] + fn to_ascii_lowercase(&self) -> Self::Owned { self.to_ascii_lowercase() } - #[inline] - fn is_ascii_alphanumeric(&self) -> bool { - if *self >= 0x80 { return false } - match ASCII_CHARACTER_CLASS[*self as usize] { - D|L|Lx|U|Ux => true, - _ => false - } - } + #[inline] + fn eq_ignore_ascii_case(&self, o: &Self) -> bool { self.eq_ignore_ascii_case(o) } - #[inline] - fn is_ascii_digit(&self) -> bool { - if *self >= 0x80 { return false } - match ASCII_CHARACTER_CLASS[*self as usize] { - D => true, - _ => false - } - } + #[inline] + fn make_ascii_uppercase(&mut self) { self.make_ascii_uppercase(); } - #[inline] - fn is_ascii_hexdigit(&self) -> bool { - if *self >= 0x80 { return false } - match ASCII_CHARACTER_CLASS[*self as usize] { - D|Lx|Ux => true, - _ => false - } - } + #[inline] + fn make_ascii_lowercase(&mut self) { self.make_ascii_lowercase(); } - #[inline] - fn is_ascii_punctuation(&self) -> bool { - if *self >= 0x80 { return false } - match ASCII_CHARACTER_CLASS[*self as usize] { - P => true, - _ => false - } - } + #[inline] + fn is_ascii_alphabetic(&self) -> bool { self.is_ascii_alphabetic() } - #[inline] - fn is_ascii_graphic(&self) -> bool { - if *self >= 0x80 { return false; } - match ASCII_CHARACTER_CLASS[*self as usize] { - Ux|U|Lx|L|D|P => true, - _ => false - } - } + #[inline] + fn is_ascii_uppercase(&self) -> bool { self.is_ascii_uppercase() } - #[inline] - fn is_ascii_whitespace(&self) -> bool { - if *self >= 0x80 { return false; } - match ASCII_CHARACTER_CLASS[*self as usize] { - Cw|W => true, - _ => false - } - } + #[inline] + fn is_ascii_lowercase(&self) -> bool { self.is_ascii_lowercase() } - #[inline] - fn is_ascii_control(&self) -> bool { - if *self >= 0x80 { return false; } - match ASCII_CHARACTER_CLASS[*self as usize] { - C|Cw => true, - _ => false - } - } -} + #[inline] + fn is_ascii_alphanumeric(&self) -> bool { self.is_ascii_alphanumeric() } -#[stable(feature = "rust1", since = "1.0.0")] -impl AsciiExt for char { - type Owned = char; - #[inline] - fn is_ascii(&self) -> bool { - *self as u32 <= 0x7F - } + #[inline] + fn is_ascii_digit(&self) -> bool { self.is_ascii_digit() } - #[inline] - fn to_ascii_uppercase(&self) -> char { - if self.is_ascii() { - (*self as u8).to_ascii_uppercase() as char - } else { - *self - } - } - - #[inline] - fn to_ascii_lowercase(&self) -> char { - if self.is_ascii() { - (*self as u8).to_ascii_lowercase() as char - } else { - *self - } - } - - #[inline] - fn eq_ignore_ascii_case(&self, other: &char) -> bool { - self.to_ascii_lowercase() == other.to_ascii_lowercase() - } - - #[inline] - fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); } - #[inline] - fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); } - - #[inline] - fn is_ascii_alphabetic(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphabetic() - } - - #[inline] - fn is_ascii_uppercase(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_uppercase() - } - - #[inline] - fn is_ascii_lowercase(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_lowercase() - } - - #[inline] - fn is_ascii_alphanumeric(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphanumeric() - } - - #[inline] - fn is_ascii_digit(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_digit() - } + #[inline] + fn is_ascii_hexdigit(&self) -> bool { self.is_ascii_hexdigit() } - #[inline] - fn is_ascii_hexdigit(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_hexdigit() - } - - #[inline] - fn is_ascii_punctuation(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_punctuation() - } + #[inline] + fn is_ascii_punctuation(&self) -> bool { self.is_ascii_punctuation() } - #[inline] - fn is_ascii_graphic(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_graphic() - } + #[inline] + fn is_ascii_graphic(&self) -> bool { self.is_ascii_graphic() } - #[inline] - fn is_ascii_whitespace(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_whitespace() - } + #[inline] + fn is_ascii_whitespace(&self) -> bool { self.is_ascii_whitespace() } - #[inline] - fn is_ascii_control(&self) -> bool { - (*self as u32 <= 0x7f) && (*self as u8).is_ascii_control() + #[inline] + fn is_ascii_control(&self) -> bool { self.is_ascii_control() } + } } } +impl_by_delegating!(u8, u8); +impl_by_delegating!(char, char); + /// An iterator over the escaped version of a byte. /// /// This `struct` is created by the [`escape_default`] function. See its @@ -1066,109 +930,6 @@ impl fmt::Debug for EscapeDefault { } -static ASCII_LOWERCASE_MAP: [u8; 256] = [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'', - b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', - b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', - b'8', b'9', b':', b';', b'<', b'=', b'>', b'?', - b'@', - - b'a', b'b', b'c', b'd', b'e', b'f', b'g', - b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', - b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', - b'x', b'y', b'z', - - b'[', b'\\', b']', b'^', b'_', - b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', - b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', - b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', - b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -]; - -static ASCII_UPPERCASE_MAP: [u8; 256] = [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'', - b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', - b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', - b'8', b'9', b':', b';', b'<', b'=', b'>', b'?', - b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', - b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', - b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', - b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_', - b'`', - - b'A', b'B', b'C', b'D', b'E', b'F', b'G', - b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', - b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', - b'X', b'Y', b'Z', - - b'{', b'|', b'}', b'~', 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -]; - -enum AsciiCharacterClass { - C, // control - Cw, // control whitespace - W, // whitespace - D, // digit - L, // lowercase - Lx, // lowercase hex digit - U, // uppercase - Ux, // uppercase hex digit - P, // punctuation -} -use self::AsciiCharacterClass::*; - -static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 128] = [ -// _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _a _b _c _d _e _f - C, C, C, C, C, C, C, C, C, Cw,Cw,C, Cw,Cw,C, C, // 0_ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 1_ - W, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, // 2_ - D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, P, // 3_ - P, Ux,Ux,Ux,Ux,Ux,Ux,U, U, U, U, U, U, U, U, U, // 4_ - U, U, U, U, U, U, U, U, U, U, U, P, P, P, P, P, // 5_ - P, Lx,Lx,Lx,Lx,Lx,Lx,L, L, L, L, L, L, L, L, L, // 6_ - L, L, L, L, L, L, L, L, L, L, L, P, P, P, P, C, // 7_ -]; - #[cfg(test)] mod tests { use super::*; From a5277622c52741165e43ba45e7b982474548962d Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Fri, 29 Sep 2017 15:23:02 +0200 Subject: [PATCH 06/16] Remove examples in doc-comments of `AsciiExt` methods The doc comments were incorrect before: since the inherent ascii methods shadow the `AsciiExt` methods, the examples didn't use the `AsciiExt` at all. Since the trait will be deprecated soon anyway, the easiest solution was to remove the examples and already mention that the methods will be deprecated in the near future. --- src/libstd/ascii.rs | 367 ++++++-------------------------------------- 1 file changed, 48 insertions(+), 319 deletions(-) diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 3dd8b2b08e6d6..6524838f8b043 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -60,19 +60,10 @@ pub trait AsciiExt { /// Checks if the value is within the ASCII range. /// - /// # Examples + /// # Note /// - /// ``` - /// use std::ascii::AsciiExt; - /// - /// let ascii = 'a'; - /// let non_ascii = '❤'; - /// let int_ascii = 97; - /// - /// assert!(ascii.is_ascii()); - /// assert!(!non_ascii.is_ascii()); - /// assert!(int_ascii.is_ascii()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[stable(feature = "rust1", since = "1.0.0")] fn is_ascii(&self) -> bool; @@ -86,19 +77,10 @@ pub trait AsciiExt { /// To uppercase ASCII characters in addition to non-ASCII characters, use /// [`str::to_uppercase`]. /// - /// # Examples - /// - /// ``` - /// use std::ascii::AsciiExt; + /// # Note /// - /// let ascii = 'a'; - /// let non_ascii = '❤'; - /// let int_ascii = 97; - /// - /// assert_eq!('A', ascii.to_ascii_uppercase()); - /// assert_eq!('❤', non_ascii.to_ascii_uppercase()); - /// assert_eq!(65, int_ascii.to_ascii_uppercase()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. /// /// [`make_ascii_uppercase`]: #tymethod.make_ascii_uppercase /// [`str::to_uppercase`]: ../primitive.str.html#method.to_uppercase @@ -115,19 +97,10 @@ pub trait AsciiExt { /// To lowercase ASCII characters in addition to non-ASCII characters, use /// [`str::to_lowercase`]. /// - /// # Examples - /// - /// ``` - /// use std::ascii::AsciiExt; - /// - /// let ascii = 'A'; - /// let non_ascii = '❤'; - /// let int_ascii = 65; + /// # Note /// - /// assert_eq!('a', ascii.to_ascii_lowercase()); - /// assert_eq!('❤', non_ascii.to_ascii_lowercase()); - /// assert_eq!(97, int_ascii.to_ascii_lowercase()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. /// /// [`make_ascii_lowercase`]: #tymethod.make_ascii_lowercase /// [`str::to_lowercase`]: ../primitive.str.html#method.to_lowercase @@ -139,20 +112,10 @@ pub trait AsciiExt { /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, /// but without allocating and copying temporaries. /// - /// # Examples + /// # Note /// - /// ``` - /// use std::ascii::AsciiExt; - /// - /// let ascii1 = 'A'; - /// let ascii2 = 'a'; - /// let ascii3 = 'A'; - /// let ascii4 = 'z'; - /// - /// assert!(ascii1.eq_ignore_ascii_case(&ascii2)); - /// assert!(ascii1.eq_ignore_ascii_case(&ascii3)); - /// assert!(!ascii1.eq_ignore_ascii_case(&ascii4)); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[stable(feature = "rust1", since = "1.0.0")] fn eq_ignore_ascii_case(&self, other: &Self) -> bool; @@ -164,17 +127,10 @@ pub trait AsciiExt { /// To return a new uppercased value without modifying the existing one, use /// [`to_ascii_uppercase`]. /// - /// # Examples - /// - /// ``` - /// use std::ascii::AsciiExt; + /// # Note /// - /// let mut ascii = 'a'; - /// - /// ascii.make_ascii_uppercase(); - /// - /// assert_eq!('A', ascii); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. /// /// [`to_ascii_uppercase`]: #tymethod.to_ascii_uppercase #[stable(feature = "ascii", since = "1.9.0")] @@ -188,17 +144,10 @@ pub trait AsciiExt { /// To return a new lowercased value without modifying the existing one, use /// [`to_ascii_lowercase`]. /// - /// # Examples - /// - /// ``` - /// use std::ascii::AsciiExt; + /// # Note /// - /// let mut ascii = 'A'; - /// - /// ascii.make_ascii_lowercase(); - /// - /// assert_eq!('a', ascii); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. /// /// [`to_ascii_lowercase`]: #tymethod.to_ascii_lowercase #[stable(feature = "ascii", since = "1.9.0")] @@ -209,32 +158,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII alphabetic. /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(A.is_ascii_alphabetic()); - /// assert!(G.is_ascii_alphabetic()); - /// assert!(a.is_ascii_alphabetic()); - /// assert!(g.is_ascii_alphabetic()); - /// assert!(!zero.is_ascii_alphabetic()); - /// assert!(!percent.is_ascii_alphabetic()); - /// assert!(!space.is_ascii_alphabetic()); - /// assert!(!lf.is_ascii_alphabetic()); - /// assert!(!esc.is_ascii_alphabetic()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_alphabetic(&self) -> bool { unimplemented!(); } @@ -243,32 +170,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII uppercase. /// - /// # Examples + /// # Note /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; - /// - /// assert!(A.is_ascii_uppercase()); - /// assert!(G.is_ascii_uppercase()); - /// assert!(!a.is_ascii_uppercase()); - /// assert!(!g.is_ascii_uppercase()); - /// assert!(!zero.is_ascii_uppercase()); - /// assert!(!percent.is_ascii_uppercase()); - /// assert!(!space.is_ascii_uppercase()); - /// assert!(!lf.is_ascii_uppercase()); - /// assert!(!esc.is_ascii_uppercase()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_uppercase(&self) -> bool { unimplemented!(); } @@ -277,32 +182,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII lowercase. /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(!A.is_ascii_lowercase()); - /// assert!(!G.is_ascii_lowercase()); - /// assert!(a.is_ascii_lowercase()); - /// assert!(g.is_ascii_lowercase()); - /// assert!(!zero.is_ascii_lowercase()); - /// assert!(!percent.is_ascii_lowercase()); - /// assert!(!space.is_ascii_lowercase()); - /// assert!(!lf.is_ascii_lowercase()); - /// assert!(!esc.is_ascii_lowercase()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_lowercase(&self) -> bool { unimplemented!(); } @@ -312,32 +195,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII alphanumeric. /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(A.is_ascii_alphanumeric()); - /// assert!(G.is_ascii_alphanumeric()); - /// assert!(a.is_ascii_alphanumeric()); - /// assert!(g.is_ascii_alphanumeric()); - /// assert!(zero.is_ascii_alphanumeric()); - /// assert!(!percent.is_ascii_alphanumeric()); - /// assert!(!space.is_ascii_alphanumeric()); - /// assert!(!lf.is_ascii_alphanumeric()); - /// assert!(!esc.is_ascii_alphanumeric()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_alphanumeric(&self) -> bool { unimplemented!(); } @@ -346,32 +207,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII digits. /// - /// # Examples + /// # Note /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; - /// - /// assert!(!A.is_ascii_digit()); - /// assert!(!G.is_ascii_digit()); - /// assert!(!a.is_ascii_digit()); - /// assert!(!g.is_ascii_digit()); - /// assert!(zero.is_ascii_digit()); - /// assert!(!percent.is_ascii_digit()); - /// assert!(!space.is_ascii_digit()); - /// assert!(!lf.is_ascii_digit()); - /// assert!(!esc.is_ascii_digit()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_digit(&self) -> bool { unimplemented!(); } @@ -381,32 +220,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII hex digits. /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(A.is_ascii_hexdigit()); - /// assert!(!G.is_ascii_hexdigit()); - /// assert!(a.is_ascii_hexdigit()); - /// assert!(!g.is_ascii_hexdigit()); - /// assert!(zero.is_ascii_hexdigit()); - /// assert!(!percent.is_ascii_hexdigit()); - /// assert!(!space.is_ascii_hexdigit()); - /// assert!(!lf.is_ascii_hexdigit()); - /// assert!(!esc.is_ascii_hexdigit()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_hexdigit(&self) -> bool { unimplemented!(); } @@ -420,32 +237,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII punctuation. /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(!A.is_ascii_punctuation()); - /// assert!(!G.is_ascii_punctuation()); - /// assert!(!a.is_ascii_punctuation()); - /// assert!(!g.is_ascii_punctuation()); - /// assert!(!zero.is_ascii_punctuation()); - /// assert!(percent.is_ascii_punctuation()); - /// assert!(!space.is_ascii_punctuation()); - /// assert!(!lf.is_ascii_punctuation()); - /// assert!(!esc.is_ascii_punctuation()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_punctuation(&self) -> bool { unimplemented!(); } @@ -454,32 +249,10 @@ pub trait AsciiExt { /// For strings, true if all characters in the string are /// ASCII punctuation. /// - /// # Examples + /// # Note /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; - /// - /// assert!(A.is_ascii_graphic()); - /// assert!(G.is_ascii_graphic()); - /// assert!(a.is_ascii_graphic()); - /// assert!(g.is_ascii_graphic()); - /// assert!(zero.is_ascii_graphic()); - /// assert!(percent.is_ascii_graphic()); - /// assert!(!space.is_ascii_graphic()); - /// assert!(!lf.is_ascii_graphic()); - /// assert!(!esc.is_ascii_graphic()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_graphic(&self) -> bool { unimplemented!(); } @@ -505,32 +278,10 @@ pub trait AsciiExt { /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(!A.is_ascii_whitespace()); - /// assert!(!G.is_ascii_whitespace()); - /// assert!(!a.is_ascii_whitespace()); - /// assert!(!g.is_ascii_whitespace()); - /// assert!(!zero.is_ascii_whitespace()); - /// assert!(!percent.is_ascii_whitespace()); - /// assert!(space.is_ascii_whitespace()); - /// assert!(lf.is_ascii_whitespace()); - /// assert!(!esc.is_ascii_whitespace()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_whitespace(&self) -> bool { unimplemented!(); } @@ -539,32 +290,10 @@ pub trait AsciiExt { /// Note that most ASCII whitespace characters are control /// characters, but SPACE is not. /// - /// # Examples - /// - /// ``` - /// #![feature(ascii_ctype)] - /// # #![allow(non_snake_case)] - /// use std::ascii::AsciiExt; - /// let A = 'A'; - /// let G = 'G'; - /// let a = 'a'; - /// let g = 'g'; - /// let zero = '0'; - /// let percent = '%'; - /// let space = ' '; - /// let lf = '\n'; - /// let esc = '\u{001b}'; + /// # Note /// - /// assert!(!A.is_ascii_control()); - /// assert!(!G.is_ascii_control()); - /// assert!(!a.is_ascii_control()); - /// assert!(!g.is_ascii_control()); - /// assert!(!zero.is_ascii_control()); - /// assert!(!percent.is_ascii_control()); - /// assert!(!space.is_ascii_control()); - /// assert!(lf.is_ascii_control()); - /// assert!(esc.is_ascii_control()); - /// ``` + /// This method will be deprecated in favor of the identically-named + /// inherent methods on `u8`, `char`, `[u8]` and `str`. #[unstable(feature = "ascii_ctype", issue = "39658")] fn is_ascii_control(&self) -> bool { unimplemented!(); } } From 3b13b663510d61109e2b22f2f9dcd7d1ae785e91 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Fri, 29 Sep 2017 16:54:24 +0200 Subject: [PATCH 07/16] Tweak documentation for `u8::eq_ignore_ascii_case()` --- src/libcore/num/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index a750731e0c944..c045a8fedda1e 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2322,8 +2322,7 @@ impl u8 { /// Checks that two values are an ASCII case-insensitive match. /// - /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, - /// but without allocating and copying temporaries. + /// This is equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`. /// /// # Examples /// From f373916cb55be160faa9fc8ae41483b3bebec850 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Fri, 29 Sep 2017 16:55:31 +0200 Subject: [PATCH 08/16] Add missing space in match arm --- src/libcore/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index c045a8fedda1e..ed8a2a0e553a9 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2734,7 +2734,7 @@ impl u8 { pub fn is_ascii_whitespace(&self) -> bool { if *self >= 0x80 { return false; } match ASCII_CHARACTER_CLASS[*self as usize] { - Cw|W => true, + Cw | W => true, _ => false } } @@ -2772,7 +2772,7 @@ impl u8 { pub fn is_ascii_control(&self) -> bool { if *self >= 0x80 { return false; } match ASCII_CHARACTER_CLASS[*self as usize] { - C|Cw => true, + C | Cw => true, _ => false } } From 8a4fa742a193f2c6061d7d4ab11d68c76da30286 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Fri, 29 Sep 2017 18:34:39 +0200 Subject: [PATCH 09/16] Fix lists in doc comments for ascii methods of u8 and char --- src/libcore/num/mod.rs | 6 ++++-- src/libstd_unicode/char.rs | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index ed8a2a0e553a9..174fffcc51a73 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2500,7 +2500,8 @@ impl u8 { /// Checks if the value is an ASCII alphanumeric character: /// - /// - U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z', or /// - U+0030 '0' ... U+0039 '9'. /// /// # Examples @@ -2574,7 +2575,8 @@ impl u8 { /// Checks if the value is an ASCII hexadecimal digit: /// - /// - U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or + /// - U+0030 '0' ... U+0039 '9', or + /// - U+0041 'A' ... U+0046 'F', or /// - U+0061 'a' ... U+0066 'f'. /// /// # Examples diff --git a/src/libstd_unicode/char.rs b/src/libstd_unicode/char.rs index 9c8dd538b22a3..a9f46f177904e 100644 --- a/src/libstd_unicode/char.rs +++ b/src/libstd_unicode/char.rs @@ -1176,7 +1176,8 @@ impl char { /// Checks if the value is an ASCII alphanumeric character: /// - /// - U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z', or /// - U+0030 '0' ... U+0039 '9'. /// /// # Examples @@ -1242,7 +1243,8 @@ impl char { /// Checks if the value is an ASCII hexadecimal digit: /// - /// - U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or + /// - U+0030 '0' ... U+0039 '9', or + /// - U+0041 'A' ... U+0046 'F', or /// - U+0061 'a' ... U+0066 'f'. /// /// # Examples From 9e441c76f76521466f2f83c90d5f2811a1e9a714 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 1 Oct 2017 17:05:35 +0200 Subject: [PATCH 10/16] =?UTF-8?q?Add=20a=20lang=20item=20to=20allow=20`imp?= =?UTF-8?q?l=20[u8]=20{=E2=80=A6}`=20in=20the=20standard=20library?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/librustc/middle/lang_items.rs | 1 + src/librustc_typeck/check/method/probe.rs | 3 +++ src/librustc_typeck/coherence/inherent_impls.rs | 7 +++++++ 3 files changed, 11 insertions(+) diff --git a/src/librustc/middle/lang_items.rs b/src/librustc/middle/lang_items.rs index 679c4f17a6c03..a38b37ff745f7 100644 --- a/src/librustc/middle/lang_items.rs +++ b/src/librustc/middle/lang_items.rs @@ -211,6 +211,7 @@ language_item_table! { CharImplItem, "char", char_impl; StrImplItem, "str", str_impl; SliceImplItem, "slice", slice_impl; + SliceU8ImplItem, "slice_u8", slice_u8_impl; ConstPtrImplItem, "const_ptr", const_ptr_impl; MutPtrImplItem, "mut_ptr", mut_ptr_impl; I8ImplItem, "i8", i8_impl; diff --git a/src/librustc_typeck/check/method/probe.rs b/src/librustc_typeck/check/method/probe.rs index a24f420af80dc..81e5b2fe00a6a 100644 --- a/src/librustc_typeck/check/method/probe.rs +++ b/src/librustc_typeck/check/method/probe.rs @@ -431,6 +431,9 @@ impl<'a, 'gcx, 'tcx> ProbeContext<'a, 'gcx, 'tcx> { ty::TySlice(_) => { let lang_def_id = lang_items.slice_impl(); self.assemble_inherent_impl_for_primitive(lang_def_id); + + let lang_def_id = lang_items.slice_u8_impl(); + self.assemble_inherent_impl_for_primitive(lang_def_id); } ty::TyRawPtr(ty::TypeAndMut { ty: _, mutbl: hir::MutImmutable }) => { let lang_def_id = lang_items.const_ptr_impl(); diff --git a/src/librustc_typeck/coherence/inherent_impls.rs b/src/librustc_typeck/coherence/inherent_impls.rs index c56a3b91ca37f..569b6a2febb45 100644 --- a/src/librustc_typeck/coherence/inherent_impls.rs +++ b/src/librustc_typeck/coherence/inherent_impls.rs @@ -137,6 +137,13 @@ impl<'a, 'tcx, 'v> ItemLikeVisitor<'v> for InherentCollect<'a, 'tcx> { "str", item.span); } + ty::TySlice(slice_item) if slice_item == self.tcx.types.u8 => { + self.check_primitive_impl(def_id, + lang_items.slice_u8_impl(), + "slice_u8", + "[u8]", + item.span); + } ty::TySlice(_) => { self.check_primitive_impl(def_id, lang_items.slice_impl(), From 5a1d11a733b856cfaedd82f1c1ff50b87541692d Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Sun, 1 Oct 2017 21:13:49 +0200 Subject: [PATCH 11/16] Copy `AsciiExt` methods to `[u8]` directly This is done in order to deprecate AsciiExt eventually. Note that this commit contains a bunch of `cfg(stage0)` statements. This is due to a new compiler feature I am using: the `slice_u8` lang item. Once this lang item is available in the stage0 compiler, all those cfg flags (and more) can be removed. --- src/liballoc/slice.rs | 209 ++++++++++++++++++++++++++++++++++++++++++ src/libstd/ascii.rs | 10 ++ 2 files changed, 219 insertions(+) diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs index 0c5fec2cf7499..8268528acbd4e 100644 --- a/src/liballoc/slice.rs +++ b/src/liballoc/slice.rs @@ -1533,6 +1533,215 @@ impl [T] { } } +// TODO(LukasKalbertodt): the `not(stage0)` constraint can be removed in the +// future once the stage0 compiler is new enough to know about the `slice_u8` +// lang item. +#[lang = "slice_u8"] +#[cfg(all(not(stage0), not(test)))] +impl [u8] { + /// Checks if all bytes in this slice are within the ASCII range. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii(&self) -> bool { + self.iter().all(|b| b.is_ascii()) + } + + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> Vec { + let mut me = self.to_vec(); + me.make_ascii_uppercase(); + me + } + + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> Vec { + let mut me = self.to_vec(); + me.make_ascii_lowercase(); + me + } + + /// Checks that two slices are an ASCII case-insensitive match. + /// + /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, + /// but without allocating and copying temporaries. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool { + self.len() == other.len() && + self.iter().zip(other).all(|(a, b)| { + a.eq_ignore_ascii_case(b) + }) + } + + /// Converts this slice to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + for byte in self { + byte.make_ascii_uppercase(); + } + } + + /// Converts this slice to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + for byte in self { + byte.make_ascii_lowercase(); + } + } + + /// Checks if all bytes of this slice are ASCII alphabetic characters: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_alphabetic(&self) -> bool { + self.iter().all(|b| b.is_ascii_alphabetic()) + } + + /// Checks if all bytes of this slice are ASCII uppercase characters: + /// U+0041 'A' ... U+005A 'Z'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_uppercase(&self) -> bool { + self.iter().all(|b| b.is_ascii_uppercase()) + } + + /// Checks if all bytes of this slice are ASCII lowercase characters: + /// U+0061 'a' ... U+007A 'z'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_lowercase(&self) -> bool { + self.iter().all(|b| b.is_ascii_lowercase()) + } + + /// Checks if all bytes of this slice are ASCII alphanumeric characters: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z', or + /// - U+0030 '0' ... U+0039 '9'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_alphanumeric(&self) -> bool { + self.iter().all(|b| b.is_ascii_alphanumeric()) + } + + /// Checks if all bytes of this slice are ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_digit(&self) -> bool { + self.iter().all(|b| b.is_ascii_digit()) + } + + /// Checks if all bytes of this slice are ASCII hexadecimal digits: + /// + /// - U+0030 '0' ... U+0039 '9', or + /// - U+0041 'A' ... U+0046 'F', or + /// - U+0061 'a' ... U+0066 'f'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_hexdigit(&self) -> bool { + self.iter().all(|b| b.is_ascii_hexdigit()) + } + + /// Checks if all bytes of this slice are ASCII punctuation characters: + /// + /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or + /// - U+003A ... U+0040 `: ; < = > ? @`, or + /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or + /// - U+007B ... U+007E `{ | } ~` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_punctuation(&self) -> bool { + self.iter().all(|b| b.is_ascii_punctuation()) + } + + /// Checks if all bytes of this slice are ASCII graphic characters: + /// U+0021 '@' ... U+007E '~'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_graphic(&self) -> bool { + self.iter().all(|b| b.is_ascii_graphic()) + } + + /// Checks if all bytes of this slice are ASCII whitespace characters: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_whitespace(&self) -> bool { + self.iter().all(|b| b.is_ascii_whitespace()) + } + + /// Checks if all bytes of this slice are ASCII control characters: + /// + /// - U+0000 NUL ... U+001F UNIT SEPARATOR, or + /// - U+007F DELETE. + /// + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_control(&self) -> bool { + self.iter().all(|b| b.is_ascii_control()) + } +} + //////////////////////////////////////////////////////////////////////////////// // Extension traits for slices over specific kinds of data //////////////////////////////////////////////////////////////////////////////// diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 6524838f8b043..7a474c1f25410 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -389,6 +389,10 @@ impl AsciiExt for str { } } +// TODO(LukasKalbertodt): this impl block can be removed in the future. This is +// possible once the stage0 compiler is new enough to contain the inherent +// ascii methods for `[u8]`. See TODO comment further down. +#[cfg(stage0)] #[stable(feature = "rust1", since = "1.0.0")] impl AsciiExt for [u8] { type Owned = Vec; @@ -542,6 +546,12 @@ macro_rules! impl_by_delegating { impl_by_delegating!(u8, u8); impl_by_delegating!(char, char); +// TODO(LukasKalbertodt): the macro invocation should replace the impl block +// for `[u8]` above. But this is not possible until the stage0 compiler is new +// enough to contain the inherent ascii methods for `[u8]`. +#[cfg(not(stage0))] +impl_by_delegating!([u8], Vec); + /// An iterator over the escaped version of a byte. /// /// This `struct` is created by the [`escape_default`] function. See its From 1916e3c4aad7b0e0de1cfd190819609f55520996 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Sun, 1 Oct 2017 21:46:17 +0200 Subject: [PATCH 12/16] Copy `AsciiExt` methods to `str` directly This is done in order to deprecate AsciiExt eventually. Note that this commit contains a bunch of `cfg(stage0)` statements. This is due to a new compiler feature this commit depends on: the `slice_u8` lang item. Once this lang item is available in the stage0 compiler, all those cfg flags (and more) can be removed. --- src/liballoc/slice.rs | 2 +- src/liballoc/str.rs | 276 ++++++++++++++++++++++++++++++++++ src/libstd/ascii.rs | 16 +- src/libsyntax/feature_gate.rs | 1 - 4 files changed, 290 insertions(+), 5 deletions(-) diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs index 8268528acbd4e..7590003a681dd 100644 --- a/src/liballoc/slice.rs +++ b/src/liballoc/slice.rs @@ -1533,7 +1533,7 @@ impl [T] { } } -// TODO(LukasKalbertodt): the `not(stage0)` constraint can be removed in the +// FIXME(LukasKalbertodt): the `not(stage0)` constraint can be removed in the // future once the stage0 compiler is new enough to know about the `slice_u8` // lang item. #[lang = "slice_u8"] diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 895607ff8d4f0..2c257b8c73678 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -2070,6 +2070,282 @@ impl str { s.extend((0..n).map(|_| self)); s } + + /// Checks if all characters in this string are within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// let ascii = "hello!\n"; + /// let non_ascii = "Grüße, Jürgen ❤"; + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii(&self) -> bool { + // We can treat each byte as character here: all multibyte characters + // start with a byte that is not in the ascii range, so we will stop + // there already. + self.bytes().all(|b| b.is_ascii()) + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// To uppercase ASCII characters in addition to non-ASCII characters, use + /// [`to_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + /// [`to_uppercase`]: #method.to_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + #[cfg(not(stage0))] + pub fn to_ascii_uppercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_uppercase(); + // make_ascii_uppercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// To lowercase ASCII characters in addition to non-ASCII characters, use + /// [`to_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + /// [`to_lowercase`]: #method.to_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + #[cfg(not(stage0))] + pub fn to_ascii_lowercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_lowercase(); + // make_ascii_lowercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Checks that two strings are an ASCII case-insensitive match. + /// + /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, + /// but without allocating and copying temporaries. + /// + /// # Examples + /// + /// ``` + /// assert!("Ferris".eq_ignore_ascii_case("FERRIS")); + /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS")); + /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS")); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + #[cfg(not(stage0))] + pub fn eq_ignore_ascii_case(&self, other: &str) -> bool { + self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) + } + + /// Converts this string to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[cfg(not(stage0))] + pub fn make_ascii_uppercase(&mut self) { + let me = unsafe { self.as_bytes_mut() }; + me.make_ascii_uppercase() + } + + /// Converts this string to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[cfg(not(stage0))] + pub fn make_ascii_lowercase(&mut self) { + let me = unsafe { self.as_bytes_mut() }; + me.make_ascii_lowercase() + } + + /// Checks if all characters of this string are ASCII alphabetic + /// characters: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_alphabetic(&self) -> bool { + self.bytes().all(|b| b.is_ascii_alphabetic()) + } + + /// Checks if all characters of this string are ASCII uppercase characters: + /// U+0041 'A' ... U+005A 'Z'. + /// + /// # Example + /// + /// ``` + /// // Only ascii uppercase characters + /// assert!("HELLO".is_ascii_uppercase()); + /// + /// // While all characters are ascii, 'y' and 'e' are not uppercase + /// assert!(!"Bye".is_ascii_uppercase()); + /// + /// // While all characters are uppercase, 'Ü' is not ascii + /// assert!(!"TSCHÜSS".is_ascii_uppercase()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_uppercase(&self) -> bool { + self.bytes().all(|b| b.is_ascii_uppercase()) + } + + /// Checks if all characters of this string are ASCII lowercase characters: + /// U+0061 'a' ... U+007A 'z'. + /// + /// # Example + /// + /// ``` + /// // Only ascii uppercase characters + /// assert!("hello".is_ascii_lowercase()); + /// + /// // While all characters are ascii, 'B' is not lowercase + /// assert!(!"Bye".is_ascii_lowercase()); + /// + /// // While all characters are lowercase, 'Ü' is not ascii + /// assert!(!"tschüss".is_ascii_lowercase()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_lowercase(&self) -> bool { + self.bytes().all(|b| b.is_ascii_lowercase()) + } + + /// Checks if all characters of this string are ASCII alphanumeric + /// characters: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z', or + /// - U+0030 '0' ... U+0039 '9'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_alphanumeric(&self) -> bool { + self.bytes().all(|b| b.is_ascii_alphanumeric()) + } + + /// Checks if all characters of this string are ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_digit(&self) -> bool { + self.bytes().all(|b| b.is_ascii_digit()) + } + + /// Checks if all characters of this string are ASCII hexadecimal digits: + /// + /// - U+0030 '0' ... U+0039 '9', or + /// - U+0041 'A' ... U+0046 'F', or + /// - U+0061 'a' ... U+0066 'f'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_hexdigit(&self) -> bool { + self.bytes().all(|b| b.is_ascii_hexdigit()) + } + + /// Checks if all characters of this string are ASCII punctuation + /// characters: + /// + /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or + /// - U+003A ... U+0040 `: ; < = > ? @`, or + /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or + /// - U+007B ... U+007E `{ | } ~` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_punctuation(&self) -> bool { + self.bytes().all(|b| b.is_ascii_punctuation()) + } + + /// Checks if all characters of this string are ASCII graphic characters: + /// U+0021 '@' ... U+007E '~'. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_graphic(&self) -> bool { + self.bytes().all(|b| b.is_ascii_graphic()) + } + + /// Checks if all characters of this string are ASCII whitespace characters: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_whitespace(&self) -> bool { + self.bytes().all(|b| b.is_ascii_whitespace()) + } + + /// Checks if all characters of this string are ASCII control characters: + /// + /// - U+0000 NUL ... U+001F UNIT SEPARATOR, or + /// - U+007F DELETE. + /// + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii_control(&self) -> bool { + self.bytes().all(|b| b.is_ascii_control()) + } } /// Converts a boxed slice of bytes to a boxed string slice without checking diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 7a474c1f25410..200264a25834d 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -298,6 +298,10 @@ pub trait AsciiExt { fn is_ascii_control(&self) -> bool { unimplemented!(); } } +// FIXME(LukasKalbertodt): this impl block can be removed in the future. This is +// possible once the stage0 compiler is new enough to contain the inherent +// ascii methods for `[str]`. See FIXME comment further down. +#[cfg(stage0)] #[stable(feature = "rust1", since = "1.0.0")] impl AsciiExt for str { type Owned = String; @@ -389,9 +393,9 @@ impl AsciiExt for str { } } -// TODO(LukasKalbertodt): this impl block can be removed in the future. This is +// FIXME(LukasKalbertodt): this impl block can be removed in the future. This is // possible once the stage0 compiler is new enough to contain the inherent -// ascii methods for `[u8]`. See TODO comment further down. +// ascii methods for `[u8]`. See FIXME comment further down. #[cfg(stage0)] #[stable(feature = "rust1", since = "1.0.0")] impl AsciiExt for [u8] { @@ -546,12 +550,18 @@ macro_rules! impl_by_delegating { impl_by_delegating!(u8, u8); impl_by_delegating!(char, char); -// TODO(LukasKalbertodt): the macro invocation should replace the impl block +// FIXME(LukasKalbertodt): the macro invocation should replace the impl block // for `[u8]` above. But this is not possible until the stage0 compiler is new // enough to contain the inherent ascii methods for `[u8]`. #[cfg(not(stage0))] impl_by_delegating!([u8], Vec); +// FIXME(LukasKalbertodt): the macro invocation should replace the impl block +// for `str` above. But this is not possible until the stage0 compiler is new +// enough to contain the inherent ascii methods for `str`. +#[cfg(not(stage0))] +impl_by_delegating!(str, String); + /// An iterator over the escaped version of a byte. /// /// This `struct` is created by the [`escape_default`] function. See its diff --git a/src/libsyntax/feature_gate.rs b/src/libsyntax/feature_gate.rs index 30451ec757a9f..195aac5292fb8 100644 --- a/src/libsyntax/feature_gate.rs +++ b/src/libsyntax/feature_gate.rs @@ -35,7 +35,6 @@ use visit::{self, FnKind, Visitor}; use parse::ParseSess; use symbol::Symbol; -use std::ascii::AsciiExt; use std::env; macro_rules! set { From da57580736c6d30fec6c4e4442bc5376ac81f245 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Mon, 2 Oct 2017 09:50:36 +0200 Subject: [PATCH 13/16] Remove unused AsciiExt imports and fix tests related to ascii methods Many AsciiExt imports have become useless thanks to the inherent ascii methods added in the last commits. These were removed. In some places, I fully specified the ascii method being called to enforce usage of the AsciiExt trait. Note that some imports are not removed but tagged with a `#[cfg(stage0)]` attribute. This is necessary, because certain ascii methods are not yet available in stage0. All those imports will be removed later. Additionally, failing tests were fixed. The test suite should exit successfully now. --- src/liballoc/benches/str.rs | 3 --- src/liballoc/borrow.rs | 1 - src/liballoc/str.rs | 4 ---- src/liballoc/string.rs | 2 -- src/liballoc/tests/str.rs | 1 - src/liballoc/tests/vec.rs | 3 --- src/liballoc/vec.rs | 2 -- src/librustc/lint/mod.rs | 1 + src/librustdoc/clean/cfg.rs | 1 + src/librustdoc/html/render.rs | 1 + src/libstd/ascii.rs | 8 +++++--- src/test/ui/deref-suggestion.stderr | 4 ++-- 12 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/liballoc/benches/str.rs b/src/liballoc/benches/str.rs index fc4063fae9277..38c94d4d8b5f3 100644 --- a/src/liballoc/benches/str.rs +++ b/src/liballoc/benches/str.rs @@ -272,15 +272,12 @@ make_test!(match_indices_a_str, s, s.match_indices("a").count()); make_test!(split_a_str, s, s.split("a").count()); make_test!(trim_ascii_char, s, { - use std::ascii::AsciiExt; s.trim_matches(|c: char| c.is_ascii()) }); make_test!(trim_left_ascii_char, s, { - use std::ascii::AsciiExt; s.trim_left_matches(|c: char| c.is_ascii()) }); make_test!(trim_right_ascii_char, s, { - use std::ascii::AsciiExt; s.trim_right_matches(|c: char| c.is_ascii()) }); diff --git a/src/liballoc/borrow.rs b/src/liballoc/borrow.rs index a662e4b1f4f93..e8aff09987157 100644 --- a/src/liballoc/borrow.rs +++ b/src/liballoc/borrow.rs @@ -191,7 +191,6 @@ impl<'a, B: ?Sized> Cow<'a, B> /// # Examples /// /// ``` - /// use std::ascii::AsciiExt; /// use std::borrow::Cow; /// /// let mut cow = Cow::Borrowed("foo"); diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 2c257b8c73678..b75ecb6ea51cf 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -390,8 +390,6 @@ impl str { /// # Examples /// /// ``` - /// use std::ascii::AsciiExt; - /// /// let mut v = String::from("hello"); /// // correct length /// assert!(v.get_mut(0..5).is_some()); @@ -617,8 +615,6 @@ impl str { /// Basic usage: /// /// ``` - /// use std::ascii::AsciiExt; - /// /// let mut s = "Per Martin-Löf".to_string(); /// { /// let (first, last) = s.split_at_mut(3); diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index 6d0bb264df186..25fcc1ccdab58 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -773,8 +773,6 @@ impl String { /// Basic usage: /// /// ``` - /// use std::ascii::AsciiExt; - /// /// let mut s = String::from("foobar"); /// let s_mut_str = s.as_mut_str(); /// diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index b3178064505e8..6b075e7ac0e0d 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -706,7 +706,6 @@ fn test_split_at() { #[test] fn test_split_at_mut() { - use std::ascii::AsciiExt; let mut s = "Hello World".to_string(); { let (a, b) = s.split_at_mut(5); diff --git a/src/liballoc/tests/vec.rs b/src/liballoc/tests/vec.rs index 0e25da5bd3077..9cfde5dcc73c8 100644 --- a/src/liballoc/tests/vec.rs +++ b/src/liballoc/tests/vec.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::ascii::AsciiExt; use std::borrow::Cow; use std::mem::size_of; use std::panic; @@ -966,5 +965,3 @@ fn drain_filter_complex() { assert_eq!(vec, vec![1, 3, 5, 7, 9, 11, 13, 15, 17, 19]); } } - - diff --git a/src/liballoc/vec.rs b/src/liballoc/vec.rs index cf34e195dea76..5aca199cf40c0 100644 --- a/src/liballoc/vec.rs +++ b/src/liballoc/vec.rs @@ -853,8 +853,6 @@ impl Vec { /// # Examples /// /// ``` - /// use std::ascii::AsciiExt; - /// /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; /// /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); diff --git a/src/librustc/lint/mod.rs b/src/librustc/lint/mod.rs index bca4dad220fcd..d648099d74d36 100644 --- a/src/librustc/lint/mod.rs +++ b/src/librustc/lint/mod.rs @@ -38,6 +38,7 @@ use hir::def_id::{CrateNum, LOCAL_CRATE}; use hir::intravisit::{self, FnKind}; use hir; use session::Session; +#[cfg(stage0)] use std::ascii::AsciiExt; use std::hash; use syntax::ast; diff --git a/src/librustdoc/clean/cfg.rs b/src/librustdoc/clean/cfg.rs index e3ce403f3c17b..915383d8189e0 100644 --- a/src/librustdoc/clean/cfg.rs +++ b/src/librustdoc/clean/cfg.rs @@ -15,6 +15,7 @@ use std::mem; use std::fmt::{self, Write}; use std::ops; +#[cfg(stage0)] use std::ascii::AsciiExt; use syntax::symbol::Symbol; diff --git a/src/librustdoc/html/render.rs b/src/librustdoc/html/render.rs index edd01a66075b0..228bd7a033090 100644 --- a/src/librustdoc/html/render.rs +++ b/src/librustdoc/html/render.rs @@ -34,6 +34,7 @@ //! both occur before the crate is rendered. pub use self::ExternalLocation::*; +#[cfg(stage0)] use std::ascii::AsciiExt; use std::cell::RefCell; use std::cmp::Ordering; diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 200264a25834d..96d719c528c10 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -38,8 +38,8 @@ use iter::FusedIterator; /// ``` /// use std::ascii::AsciiExt; /// -/// assert_eq!("café".to_ascii_uppercase(), "CAFÉ"); -/// assert_eq!("café".to_ascii_uppercase(), "CAFé"); +/// assert_eq!(AsciiExt::to_ascii_uppercase("café"), "CAFÉ"); +/// assert_eq!(AsciiExt::to_ascii_uppercase("café"), "CAFé"); /// ``` /// /// In the first example, the lowercased string is represented `"cafe\u{301}"` @@ -681,7 +681,9 @@ impl fmt::Debug for EscapeDefault { #[cfg(test)] mod tests { - use super::*; + //! Note that most of these tests are not testing `AsciiExt` methods, but + //! test inherent ascii methods of char, u8, str and [u8]. `AsciiExt` is + //! just using those methods, though. use char::from_u32; #[test] diff --git a/src/test/ui/deref-suggestion.stderr b/src/test/ui/deref-suggestion.stderr index 5ad9c19fa8cc2..3ed3297e05ed9 100644 --- a/src/test/ui/deref-suggestion.stderr +++ b/src/test/ui/deref-suggestion.stderr @@ -10,8 +10,8 @@ error[E0308]: mismatched types - .escape_debug() - .escape_default() - .escape_unicode() - - .to_lowercase() - - .to_uppercase() + - .to_ascii_lowercase() + - .to_ascii_uppercase() error[E0308]: mismatched types --> $DIR/deref-suggestion.rs:23:10 From 259c125267fb8334ae7f70f4e1d1c2e0d9a56d59 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Tue, 3 Oct 2017 17:39:31 +0200 Subject: [PATCH 14/16] Mark several ascii methods as unstable again We don't want to stabilize them now already. The goal of this set of commits is just to add inherent methods to the four types. Stabilizing all of those methods can be done later. --- src/liballoc/lib.rs | 1 + src/liballoc/slice.rs | 20 +++++++++---------- src/liballoc/str.rs | 24 +++++++++++++---------- src/libcore/num/mod.rs | 40 ++++++++++++++++++++++++++++---------- src/librustdoc/lib.rs | 1 + src/libstd/lib.rs | 1 + src/libstd_unicode/char.rs | 40 ++++++++++++++++++++++++++++---------- src/libstd_unicode/lib.rs | 1 + 8 files changed, 88 insertions(+), 40 deletions(-) diff --git a/src/liballoc/lib.rs b/src/liballoc/lib.rs index f654a6b5ba471..a40ed060604a7 100644 --- a/src/liballoc/lib.rs +++ b/src/liballoc/lib.rs @@ -83,6 +83,7 @@ #![cfg_attr(not(test), feature(generator_trait))] #![cfg_attr(test, feature(rand, test))] #![feature(allow_internal_unstable)] +#![feature(ascii_ctype)] #![feature(box_patterns)] #![feature(box_syntax)] #![feature(cfg_target_has_atomic)] diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs index 7590003a681dd..b41cb912fe798 100644 --- a/src/liballoc/slice.rs +++ b/src/liballoc/slice.rs @@ -1631,7 +1631,7 @@ impl [u8] { /// /// - U+0041 'A' ... U+005A 'Z', or /// - U+0061 'a' ... U+007A 'z'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphabetic(&self) -> bool { self.iter().all(|b| b.is_ascii_alphabetic()) @@ -1639,7 +1639,7 @@ impl [u8] { /// Checks if all bytes of this slice are ASCII uppercase characters: /// U+0041 'A' ... U+005A 'Z'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_uppercase(&self) -> bool { self.iter().all(|b| b.is_ascii_uppercase()) @@ -1647,7 +1647,7 @@ impl [u8] { /// Checks if all bytes of this slice are ASCII lowercase characters: /// U+0061 'a' ... U+007A 'z'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_lowercase(&self) -> bool { self.iter().all(|b| b.is_ascii_lowercase()) @@ -1658,7 +1658,7 @@ impl [u8] { /// - U+0041 'A' ... U+005A 'Z', or /// - U+0061 'a' ... U+007A 'z', or /// - U+0030 '0' ... U+0039 '9'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphanumeric(&self) -> bool { self.iter().all(|b| b.is_ascii_alphanumeric()) @@ -1666,7 +1666,7 @@ impl [u8] { /// Checks if all bytes of this slice are ASCII decimal digit: /// U+0030 '0' ... U+0039 '9'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_digit(&self) -> bool { self.iter().all(|b| b.is_ascii_digit()) @@ -1677,7 +1677,7 @@ impl [u8] { /// - U+0030 '0' ... U+0039 '9', or /// - U+0041 'A' ... U+0046 'F', or /// - U+0061 'a' ... U+0066 'f'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_hexdigit(&self) -> bool { self.iter().all(|b| b.is_ascii_hexdigit()) @@ -1689,7 +1689,7 @@ impl [u8] { /// - U+003A ... U+0040 `: ; < = > ? @`, or /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or /// - U+007B ... U+007E `{ | } ~` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_punctuation(&self) -> bool { self.iter().all(|b| b.is_ascii_punctuation()) @@ -1697,7 +1697,7 @@ impl [u8] { /// Checks if all bytes of this slice are ASCII graphic characters: /// U+0021 '@' ... U+007E '~'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_graphic(&self) -> bool { self.iter().all(|b| b.is_ascii_graphic()) @@ -1722,7 +1722,7 @@ impl [u8] { /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_whitespace(&self) -> bool { self.iter().all(|b| b.is_ascii_whitespace()) @@ -1735,7 +1735,7 @@ impl [u8] { /// /// Note that most ASCII whitespace characters are control /// characters, but SPACE is not. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_control(&self) -> bool { self.iter().all(|b| b.is_ascii_control()) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index b75ecb6ea51cf..5f0b4088fc07e 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -2205,7 +2205,7 @@ impl str { /// /// - U+0041 'A' ... U+005A 'Z', or /// - U+0061 'a' ... U+007A 'z'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphabetic(&self) -> bool { self.bytes().all(|b| b.is_ascii_alphabetic()) @@ -2217,6 +2217,8 @@ impl str { /// # Example /// /// ``` + /// #![feature(ascii_ctype)] + /// /// // Only ascii uppercase characters /// assert!("HELLO".is_ascii_uppercase()); /// @@ -2226,7 +2228,7 @@ impl str { /// // While all characters are uppercase, 'Ü' is not ascii /// assert!(!"TSCHÜSS".is_ascii_uppercase()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_uppercase(&self) -> bool { self.bytes().all(|b| b.is_ascii_uppercase()) @@ -2238,6 +2240,8 @@ impl str { /// # Example /// /// ``` + /// #![feature(ascii_ctype)] + /// /// // Only ascii uppercase characters /// assert!("hello".is_ascii_lowercase()); /// @@ -2247,7 +2251,7 @@ impl str { /// // While all characters are lowercase, 'Ü' is not ascii /// assert!(!"tschüss".is_ascii_lowercase()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_lowercase(&self) -> bool { self.bytes().all(|b| b.is_ascii_lowercase()) @@ -2259,7 +2263,7 @@ impl str { /// - U+0041 'A' ... U+005A 'Z', or /// - U+0061 'a' ... U+007A 'z', or /// - U+0030 '0' ... U+0039 '9'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphanumeric(&self) -> bool { self.bytes().all(|b| b.is_ascii_alphanumeric()) @@ -2267,7 +2271,7 @@ impl str { /// Checks if all characters of this string are ASCII decimal digit: /// U+0030 '0' ... U+0039 '9'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_digit(&self) -> bool { self.bytes().all(|b| b.is_ascii_digit()) @@ -2278,7 +2282,7 @@ impl str { /// - U+0030 '0' ... U+0039 '9', or /// - U+0041 'A' ... U+0046 'F', or /// - U+0061 'a' ... U+0066 'f'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_hexdigit(&self) -> bool { self.bytes().all(|b| b.is_ascii_hexdigit()) @@ -2291,7 +2295,7 @@ impl str { /// - U+003A ... U+0040 `: ; < = > ? @`, or /// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or /// - U+007B ... U+007E `{ | } ~` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_punctuation(&self) -> bool { self.bytes().all(|b| b.is_ascii_punctuation()) @@ -2299,7 +2303,7 @@ impl str { /// Checks if all characters of this string are ASCII graphic characters: /// U+0021 '@' ... U+007E '~'. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_graphic(&self) -> bool { self.bytes().all(|b| b.is_ascii_graphic()) @@ -2324,7 +2328,7 @@ impl str { /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_whitespace(&self) -> bool { self.bytes().all(|b| b.is_ascii_whitespace()) @@ -2337,7 +2341,7 @@ impl str { /// /// Note that most ASCII whitespace characters are control /// characters, but SPACE is not. - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_control(&self) -> bool { self.bytes().all(|b| b.is_ascii_control()) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 174fffcc51a73..104e0cdb74115 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -2396,6 +2396,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2416,7 +2418,7 @@ impl u8 { /// assert!(!lf.is_ascii_alphabetic()); /// assert!(!esc.is_ascii_alphabetic()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphabetic(&self) -> bool { if *self >= 0x80 { return false; } @@ -2432,6 +2434,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2452,7 +2456,7 @@ impl u8 { /// assert!(!lf.is_ascii_uppercase()); /// assert!(!esc.is_ascii_uppercase()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_uppercase(&self) -> bool { if *self >= 0x80 { return false } @@ -2468,6 +2472,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2488,7 +2494,7 @@ impl u8 { /// assert!(!lf.is_ascii_lowercase()); /// assert!(!esc.is_ascii_lowercase()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_lowercase(&self) -> bool { if *self >= 0x80 { return false } @@ -2507,6 +2513,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2527,7 +2535,7 @@ impl u8 { /// assert!(!lf.is_ascii_alphanumeric()); /// assert!(!esc.is_ascii_alphanumeric()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphanumeric(&self) -> bool { if *self >= 0x80 { return false } @@ -2543,6 +2551,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2563,7 +2573,7 @@ impl u8 { /// assert!(!lf.is_ascii_digit()); /// assert!(!esc.is_ascii_digit()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_digit(&self) -> bool { if *self >= 0x80 { return false } @@ -2582,6 +2592,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2602,7 +2614,7 @@ impl u8 { /// assert!(!lf.is_ascii_hexdigit()); /// assert!(!esc.is_ascii_hexdigit()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_hexdigit(&self) -> bool { if *self >= 0x80 { return false } @@ -2622,6 +2634,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2642,7 +2656,7 @@ impl u8 { /// assert!(!lf.is_ascii_punctuation()); /// assert!(!esc.is_ascii_punctuation()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_punctuation(&self) -> bool { if *self >= 0x80 { return false } @@ -2658,6 +2672,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2678,7 +2694,7 @@ impl u8 { /// assert!(!lf.is_ascii_graphic()); /// assert!(!esc.is_ascii_graphic()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_graphic(&self) -> bool { if *self >= 0x80 { return false; } @@ -2711,6 +2727,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2731,7 +2749,7 @@ impl u8 { /// assert!(lf.is_ascii_whitespace()); /// assert!(!esc.is_ascii_whitespace()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_whitespace(&self) -> bool { if *self >= 0x80 { return false; } @@ -2749,6 +2767,8 @@ impl u8 { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = b'A'; /// let uppercase_g = b'G'; /// let a = b'a'; @@ -2769,7 +2789,7 @@ impl u8 { /// assert!(lf.is_ascii_control()); /// assert!(esc.is_ascii_control()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_control(&self) -> bool { if *self >= 0x80 { return false; } diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 500d0931708ab..fcb25f7aef3d4 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -14,6 +14,7 @@ html_playground_url = "https://play.rust-lang.org/")] #![deny(warnings)] +#![feature(ascii_ctype)] #![feature(rustc_private)] #![feature(box_patterns)] #![feature(box_syntax)] diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 5cf1d225b9028..429153dc58b4c 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -244,6 +244,7 @@ #![feature(allow_internal_unstable)] #![feature(align_offset)] #![feature(array_error_internals)] +#![feature(ascii_ctype)] #![feature(asm)] #![feature(attr_literals)] #![feature(box_syntax)] diff --git a/src/libstd_unicode/char.rs b/src/libstd_unicode/char.rs index a9f46f177904e..c012bdcb6dbe3 100644 --- a/src/libstd_unicode/char.rs +++ b/src/libstd_unicode/char.rs @@ -1084,6 +1084,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1104,7 +1106,7 @@ impl char { /// assert!(!lf.is_ascii_alphabetic()); /// assert!(!esc.is_ascii_alphabetic()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphabetic(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_alphabetic() @@ -1116,6 +1118,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1136,7 +1140,7 @@ impl char { /// assert!(!lf.is_ascii_uppercase()); /// assert!(!esc.is_ascii_uppercase()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_uppercase(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_uppercase() @@ -1148,6 +1152,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1168,7 +1174,7 @@ impl char { /// assert!(!lf.is_ascii_lowercase()); /// assert!(!esc.is_ascii_lowercase()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_lowercase(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_lowercase() @@ -1183,6 +1189,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1203,7 +1211,7 @@ impl char { /// assert!(!lf.is_ascii_alphanumeric()); /// assert!(!esc.is_ascii_alphanumeric()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_alphanumeric(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_alphanumeric() @@ -1215,6 +1223,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1235,7 +1245,7 @@ impl char { /// assert!(!lf.is_ascii_digit()); /// assert!(!esc.is_ascii_digit()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_digit(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_digit() @@ -1250,6 +1260,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1270,7 +1282,7 @@ impl char { /// assert!(!lf.is_ascii_hexdigit()); /// assert!(!esc.is_ascii_hexdigit()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_hexdigit(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_hexdigit() @@ -1286,6 +1298,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1306,7 +1320,7 @@ impl char { /// assert!(!lf.is_ascii_punctuation()); /// assert!(!esc.is_ascii_punctuation()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_punctuation(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_punctuation() @@ -1318,6 +1332,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1338,7 +1354,7 @@ impl char { /// assert!(!lf.is_ascii_graphic()); /// assert!(!esc.is_ascii_graphic()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_graphic(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_graphic() @@ -1367,6 +1383,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1387,7 +1405,7 @@ impl char { /// assert!(lf.is_ascii_whitespace()); /// assert!(!esc.is_ascii_whitespace()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_whitespace(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_whitespace() @@ -1401,6 +1419,8 @@ impl char { /// # Examples /// /// ``` + /// #![feature(ascii_ctype)] + /// /// let uppercase_a = 'A'; /// let uppercase_g = 'G'; /// let a = 'a'; @@ -1421,7 +1441,7 @@ impl char { /// assert!(lf.is_ascii_control()); /// assert!(esc.is_ascii_control()); /// ``` - #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[unstable(feature = "ascii_ctype", issue = "39658")] #[inline] pub fn is_ascii_control(&self) -> bool { self.is_ascii() && (*self as u8).is_ascii_control() diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index e5a114caed0f4..65058b6554aa6 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -30,6 +30,7 @@ #![deny(warnings)] #![no_std] +#![feature(ascii_ctype)] #![feature(core_char_ext)] #![feature(str_internals)] #![feature(decode_utf8)] From deb7360fa7838aa718b5a599460761aeb940b89e Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Fri, 3 Nov 2017 21:24:20 +0100 Subject: [PATCH 15/16] Remove import of now unused AsciiExt I also replaced a wildcard import with a specific one, while I was at it. --- src/libstd/sys/windows/path.rs | 2 -- src/libstd/sys/windows/process.rs | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/libstd/sys/windows/path.rs b/src/libstd/sys/windows/path.rs index 2b47808451bc2..98d62a0c953a6 100644 --- a/src/libstd/sys/windows/path.rs +++ b/src/libstd/sys/windows/path.rs @@ -8,8 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use ascii::*; - use path::Prefix; use ffi::OsStr; use mem; diff --git a/src/libstd/sys/windows/process.rs b/src/libstd/sys/windows/process.rs index 0d1766d5aec6d..631d69b05e115 100644 --- a/src/libstd/sys/windows/process.rs +++ b/src/libstd/sys/windows/process.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use ascii::*; +use ascii::AsciiExt; use collections::HashMap; use collections; use env::split_paths; From ea55596d5bc29708232a0bb232bf35d5e2e6cbce Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Sun, 5 Nov 2017 10:40:06 +0100 Subject: [PATCH 16/16] Relax #[deny(warnings)] in some crate for cargotest Otherwise changes to the compiler are unable to introduce new warnings: some crates tested by cargotest deny all warnings and thus, the CI build fails. Thanks SimonSapin for the patch! --- src/tools/cargotest/main.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tools/cargotest/main.rs b/src/tools/cargotest/main.rs index 4f4960efc9b2c..a6c56a1307629 100644 --- a/src/tools/cargotest/main.rs +++ b/src/tools/cargotest/main.rs @@ -165,6 +165,8 @@ fn run_cargo_test(cargo_path: &Path, crate_path: &Path, packages: &[&str]) -> bo let status = command // Disable rust-lang/cargo's cross-compile tests .env("CFG_DISABLE_CROSS_TESTS", "1") + // Relax #![deny(warnings)] in some crates + .env("RUSTFLAGS", "--cap-lints warn") .current_dir(crate_path) .status() .expect("");