From 6a5148bda1364bd46607a3c4ebdcfb0f408e0850 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 14 Mar 2015 19:34:21 -0400 Subject: [PATCH 1/2] Introduce rsplit --- src/libcollections/str.rs | 32 ++++++++++++- src/libcollectionstest/str.rs | 14 ++++++ src/libcore/str/mod.rs | 89 ++++++++++++++++++++++++++++++++++- 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 3a289e4ef3738..6379155800b17 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -74,8 +74,8 @@ use slice::SliceConcatExt; pub use core::str::{FromStr, Utf8Error, Str}; pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange}; -pub use core::str::{Split, SplitTerminator}; -pub use core::str::{SplitN, RSplitN}; +pub use core::str::{Split, SplitTerminator, SplitN}; +pub use core::str::{RSplit, RSplitN}; pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError}; pub use unicode::str::{Words, Graphemes, GraphemeIndices}; @@ -699,6 +699,34 @@ impl str { core_str::StrExt::split_terminator(&self[..], pat) } + /// An iterator over substrings of `self`, separated by a pattern, + /// starting from the end of the string. + /// + /// # Examples + /// + /// Simple patterns: + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); + /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); + /// + /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); + /// assert_eq!(v, ["leopard", "tiger", "lion"]); + /// ``` + /// + /// More complex patterns with a lambda: + /// + /// ``` + /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["ghi", "def", "abc"]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rsplit(&self[..], pat) + } + /// An iterator over substrings of `self`, separated by characters matched by a pattern, /// starting from the end of the string. /// diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 79c2d719862a9..2498b27395a4c 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -910,6 +910,20 @@ fn test_split_char_iterator_no_trailing() { assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); } +#[test] +fn test_rsplit() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.rsplit(' ').collect(); + assert_eq!(split, ["lämb\n", "lämb\nLittle", "little", "ä", "häd", "\nMäry"]); + + let split: Vec<&str> = data.rsplit("lämb").collect(); + assert_eq!(split, ["\n", "\nLittle ", "\nMäry häd ä little "]); + + let split: Vec<&str> = data.rsplit(|c: char| c == 'ä').collect(); + assert_eq!(split, ["mb\n", "mb\nLittle l", " little l", "d ", "ry h", "\nM"]); +} + #[test] fn test_words() { let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index e8181395b5c1e..fc2aa256f05f4 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -111,7 +111,24 @@ macro_rules! delegate_iter { self.0.size_hint() } } - } + }; + (pattern reverse $te:ty : $ti:ty) => { + #[stable(feature = "rust1", since = "1.0.0")] + impl<'a, P: Pattern<'a>> Iterator for $ti + where P::Searcher: ReverseSearcher<'a> + { + type Item = $te; + + #[inline] + fn next(&mut self) -> Option<$te> { + self.0.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + } + }; } /// A trait to abstract the idea of creating a new instance of a type from a @@ -553,6 +570,19 @@ struct CharSplitsN<'a, P: Pattern<'a>> { invert: bool, } +/// An iterator over the substrings of a string, separated by a +/// pattern, in reverse order. +struct RCharSplits<'a, P: Pattern<'a>> { + /// The slice remaining to be iterated + start: usize, + end: usize, + matcher: P::Searcher, + /// Whether an empty string at the end of iteration is allowed + allow_final_empty: bool, + finished: bool, +} + + /// An iterator over the lines of a string, separated by `\n`. #[stable(feature = "rust1", since = "1.0.0")] pub struct Lines<'a> { @@ -646,6 +676,43 @@ where P::Searcher: DoubleEndedSearcher<'a> { } } +impl<'a, P: Pattern<'a>> RCharSplits<'a, P> { + #[inline] + fn get_remainder(&mut self) -> Option<&'a str> { + if !self.finished && (self.allow_final_empty || self.end - self.start > 0) { + self.finished = true; + unsafe { + let string = self.matcher.haystack().slice_unchecked(self.start, self.end); + Some(string) + } + } else { + None + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, P: Pattern<'a>> Iterator for RCharSplits<'a, P> + where P::Searcher: ReverseSearcher<'a> +{ + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + if self.finished { return None } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + Some((a, b)) => unsafe { + let elt = haystack.slice_unchecked(b, self.end); + self.end = a; + Some(elt) + }, + None => self.get_remainder(), + } + } +} + /// The internal state of an iterator that searches for matches of a substring /// within a larger string using two-way search #[derive(Clone)] @@ -1321,6 +1388,11 @@ delegate_iter!{pattern &'a str : SplitTerminator<'a, P>} pub struct SplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>); delegate_iter!{pattern forward &'a str : SplitN<'a, P>} +/// Return type of `StrExt::rsplit` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct RSplit<'a, P: Pattern<'a>>(RCharSplits<'a, P>); +delegate_iter!{pattern reverse &'a str : RSplit<'a, P>} + /// Return type of `StrExt::rsplitn` #[stable(feature = "rust1", since = "1.0.0")] pub struct RSplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>); @@ -1340,6 +1412,8 @@ pub trait StrExt { fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>; fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; + fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a>; fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>; fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>; #[allow(deprecated) /* for SplitStr */] @@ -1436,6 +1510,19 @@ impl StrExt for str { }) } + #[inline] + fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RSplit(RCharSplits { + start: 0, + end: self.len(), + matcher: pat.into_searcher(self), + allow_final_empty: true, + finished: false, + }) + } + #[inline] fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> { RSplitN(CharSplitsN { From c6ca2205eae522387237057812b7901a2c5d3906 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 14 Mar 2015 20:07:13 -0400 Subject: [PATCH 2/2] StrExt::splitn should not require a DoubleEndedSearcher Closes #23262 --- src/libcollections/str.rs | 19 +++++----- src/libcollectionstest/str.rs | 14 ++++++++ src/libcore/str/mod.rs | 65 +++++++++++++++++++---------------- 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 6379155800b17..67b7039a1959e 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -727,23 +727,20 @@ impl str { core_str::StrExt::rsplit(&self[..], pat) } - /// An iterator over substrings of `self`, separated by characters matched by a pattern, - /// starting from the end of the string. - /// - /// Restricted to splitting at most `count` times. - /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// An iterator over substrings of `self`, separated by a pattern, + /// starting from the end of the string, restricted to splitting + /// at most `count` times. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect(); /// assert_eq!(v, ["lamb", "little", "Mary had a"]); /// - /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect(); - /// assert_eq!(v, ["leopard", "tiger", "lionX"]); + /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(1, "::").collect(); + /// assert_eq!(v, ["leopard", "lion::tiger"]); /// ``` /// /// More complex patterns with a lambda: @@ -753,7 +750,9 @@ impl str { /// assert_eq!(v, ["ghi", "abc1def"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> { + pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + where P::Searcher: ReverseSearcher<'a> + { core_str::StrExt::rsplitn(&self[..], count, pat) } diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 2498b27395a4c..5cfa800905415 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -924,6 +924,20 @@ fn test_rsplit() { assert_eq!(split, ["mb\n", "mb\nLittle l", " little l", "d ", "ry h", "\nM"]); } +#[test] +fn test_rsplitn() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.rsplitn(1, ' ').collect(); + assert_eq!(split, ["lämb\n", "\nMäry häd ä little lämb\nLittle"]); + + let split: Vec<&str> = data.rsplitn(1, "lämb").collect(); + assert_eq!(split, ["\n", "\nMäry häd ä little lämb\nLittle "]); + + let split: Vec<&str> = data.rsplitn(1, |c: char| c == 'ä').collect(); + assert_eq!(split, ["mb\n", "\nMäry häd ä little lämb\nLittle l"]); +} + #[test] fn test_words() { let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index fc2aa256f05f4..4734e9b7a9fe5 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -567,7 +567,6 @@ struct CharSplitsN<'a, P: Pattern<'a>> { iter: CharSplits<'a, P>, /// The number of splits remaining count: usize, - invert: bool, } /// An iterator over the substrings of a string, separated by a @@ -582,6 +581,13 @@ struct RCharSplits<'a, P: Pattern<'a>> { finished: bool, } +/// An iterator over the substrings of a string, separated by a +/// pattern, splitting at most `count` times, in reverse order. +struct RCharSplitsN<'a, P: Pattern<'a>> { + iter: RCharSplits<'a, P>, + /// The number of splits remaining + count: usize, +} /// An iterator over the lines of a string, separated by `\n`. #[stable(feature = "rust1", since = "1.0.0")] @@ -661,15 +667,14 @@ where P::Searcher: DoubleEndedSearcher<'a> { } #[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> -where P::Searcher: DoubleEndedSearcher<'a> { +impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> { type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { if self.count != 0 { self.count -= 1; - if self.invert { self.iter.next_back() } else { self.iter.next() } + self.iter.next() } else { self.iter.get_end() } @@ -713,6 +718,23 @@ impl<'a, P: Pattern<'a>> Iterator for RCharSplits<'a, P> } } +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, P: Pattern<'a>> Iterator for RCharSplitsN<'a, P> + where P::Searcher: ReverseSearcher<'a> +{ + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + if self.count != 0 { + self.count -= 1; + self.iter.next() + } else { + self.iter.get_remainder() + } + } +} + /// The internal state of an iterator that searches for matches of a substring /// within a larger string using two-way search #[derive(Clone)] @@ -1360,23 +1382,7 @@ impl<'a, S: ?Sized> Str for &'a S where S: Str { /// Return type of `StrExt::split` #[stable(feature = "rust1", since = "1.0.0")] pub struct Split<'a, P: Pattern<'a>>(CharSplits<'a, P>); -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for Split<'a, P> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { - self.0.next() - } -} -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> DoubleEndedIterator for Split<'a, P> -where P::Searcher: DoubleEndedSearcher<'a> { - #[inline] - fn next_back(&mut self) -> Option<&'a str> { - self.0.next_back() - } -} +delegate_iter!{pattern &'a str : Split<'a, P>} /// Return type of `StrExt::split_terminator` #[stable(feature = "rust1", since = "1.0.0")] @@ -1395,8 +1401,8 @@ delegate_iter!{pattern reverse &'a str : RSplit<'a, P>} /// Return type of `StrExt::rsplitn` #[stable(feature = "rust1", since = "1.0.0")] -pub struct RSplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>); -delegate_iter!{pattern forward &'a str : RSplitN<'a, P>} +pub struct RSplitN<'a, P: Pattern<'a>>(RCharSplitsN<'a, P>); +delegate_iter!{pattern reverse &'a str : RSplitN<'a, P>} /// Methods for string slices #[allow(missing_docs)] @@ -1414,7 +1420,8 @@ pub trait StrExt { fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> where P::Searcher: ReverseSearcher<'a>; - fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>; + fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + where P::Searcher: ReverseSearcher<'a>; fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>; #[allow(deprecated) /* for SplitStr */] fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P>; @@ -1498,7 +1505,6 @@ impl StrExt for str { SplitN(CharSplitsN { iter: self.split(pat).0, count: count, - invert: false, }) } @@ -1524,11 +1530,12 @@ impl StrExt for str { } #[inline] - fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> { - RSplitN(CharSplitsN { - iter: self.split(pat).0, + fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RSplitN(RCharSplitsN { + iter: self.rsplit(pat).0, count: count, - invert: true, }) }