1 //! Parallel iterator types for [strings][std::str]
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7 //! reference a `Pattern` trait which is not visible outside this crate.
8 //! This trait is intentionally kept private, for use only by Rayon itself.
9 //! It is implemented for `char`, `&[char]`, and any function or closure
10 //! `F: Fn(char) -> bool + Sync + Send`.
12 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
15 //! [std::str]: https://doc.rust-lang.org/stable/std/str/
17 use crate::iter
::plumbing
::*;
19 use crate::split_producer
::*;
21 /// Test if a byte is the start of a UTF-8 character.
22 /// (extracted from `str::is_char_boundary`)
24 fn is_char_boundary(b
: u8) -> bool
{
25 // This is bit magic equivalent to: b < 128 || b >= 192
29 /// Find the index of a character boundary near the midpoint.
31 fn find_char_midpoint(chars
: &str) -> usize {
32 let mid
= chars
.len() / 2;
34 // We want to split near the midpoint, but we need to find an actual
35 // character boundary. So we look at the raw bytes, first scanning
36 // forward from the midpoint for a boundary, then trying backward.
37 let (left
, right
) = chars
.as_bytes().split_at(mid
);
38 match right
.iter().copied().position(is_char_boundary
) {
43 .rposition(is_char_boundary
)
48 /// Try to split a string near the midpoint.
50 fn split(chars
: &str) -> Option
<(&str, &str)> {
51 let index
= find_char_midpoint(chars
);
53 Some(chars
.split_at(index
))
59 /// Parallel extensions for strings.
60 pub trait ParallelString
{
61 /// Returns a plain string slice, which is used to implement the rest of
62 /// the parallel methods.
63 fn as_parallel_string(&self) -> &str;
65 /// Returns a parallel iterator over the characters of a string.
70 /// use rayon::prelude::*;
71 /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72 /// assert_eq!(Some('o'), max);
74 fn par_chars(&self) -> Chars
<'_
> {
76 chars
: self.as_parallel_string(),
80 /// Returns a parallel iterator over the characters of a string, with their positions.
85 /// use rayon::prelude::*;
86 /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87 /// assert_eq!(Some((1, 'e')), min);
89 fn par_char_indices(&self) -> CharIndices
<'_
> {
91 chars
: self.as_parallel_string(),
95 /// Returns a parallel iterator over the bytes of a string.
97 /// Note that multi-byte sequences (for code points greater than `U+007F`)
98 /// are produced as separate items, but will not be split across threads.
99 /// If you would prefer an indexed iterator without that guarantee, consider
100 /// `string.as_bytes().par_iter().copied()` instead.
105 /// use rayon::prelude::*;
106 /// let max = "hello".par_bytes().max();
107 /// assert_eq!(Some(b'o'), max);
109 fn par_bytes(&self) -> Bytes
<'_
> {
111 chars
: self.as_parallel_string(),
115 /// Returns a parallel iterator over a string encoded as UTF-16.
117 /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118 /// produced as separate items, but will not be split across threads.
123 /// use rayon::prelude::*;
125 /// let max = "hello".par_encode_utf16().max();
126 /// assert_eq!(Some(b'o' as u16), max);
128 /// let text = "Zażółć gęślą jaźń";
129 /// let utf8_len = text.len();
130 /// let utf16_len = text.par_encode_utf16().count();
131 /// assert!(utf16_len <= utf8_len);
133 fn par_encode_utf16(&self) -> EncodeUtf16
<'_
> {
135 chars
: self.as_parallel_string(),
139 /// Returns a parallel iterator over substrings separated by a
140 /// given character or predicate, similar to `str::split`.
142 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143 /// It is implemented for `char`, `&[char]`, and any function or closure
144 /// `F: Fn(char) -> bool + Sync + Send`.
149 /// use rayon::prelude::*;
150 /// let total = "1, 2, buckle, 3, 4, door"
152 /// .filter_map(|s| s.trim().parse::<i32>().ok())
154 /// assert_eq!(10, total);
156 fn par_split
<P
: Pattern
>(&self, separator
: P
) -> Split
<'_
, P
> {
157 Split
::new(self.as_parallel_string(), separator
)
160 /// Returns a parallel iterator over substrings terminated by a
161 /// given character or predicate, similar to `str::split_terminator`.
162 /// It's equivalent to `par_split`, except it doesn't produce an empty
163 /// substring after a trailing terminator.
165 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
166 /// It is implemented for `char`, `&[char]`, and any function or closure
167 /// `F: Fn(char) -> bool + Sync + Send`.
172 /// use rayon::prelude::*;
173 /// let parts: Vec<_> = "((1 + 3) * 2)"
174 /// .par_split_terminator(|c| c == '(' || c == ')')
176 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
178 fn par_split_terminator
<P
: Pattern
>(&self, terminator
: P
) -> SplitTerminator
<'_
, P
> {
179 SplitTerminator
::new(self.as_parallel_string(), terminator
)
182 /// Returns a parallel iterator over the lines of a string, ending with an
183 /// optional carriage return and with a newline (`\r\n` or just `\n`).
184 /// The final line ending is optional, and line endings are not included in
185 /// the output strings.
190 /// use rayon::prelude::*;
191 /// let lengths: Vec<_> = "hello world\nfizbuzz"
193 /// .map(|l| l.len())
195 /// assert_eq!(vec![11, 7], lengths);
197 fn par_lines(&self) -> Lines
<'_
> {
198 Lines(self.as_parallel_string())
201 /// Returns a parallel iterator over the sub-slices of a string that are
202 /// separated by any amount of whitespace.
204 /// As with `str::split_whitespace`, 'whitespace' is defined according to
205 /// the terms of the Unicode Derived Core Property `White_Space`.
210 /// use rayon::prelude::*;
211 /// let longest = "which is the longest word?"
212 /// .par_split_whitespace()
213 /// .max_by_key(|word| word.len());
214 /// assert_eq!(Some("longest"), longest);
216 fn par_split_whitespace(&self) -> SplitWhitespace
<'_
> {
217 SplitWhitespace(self.as_parallel_string())
220 /// Returns a parallel iterator over substrings that match a
221 /// given character or predicate, similar to `str::matches`.
223 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
224 /// It is implemented for `char`, `&[char]`, and any function or closure
225 /// `F: Fn(char) -> bool + Sync + Send`.
230 /// use rayon::prelude::*;
231 /// let total = "1, 2, buckle, 3, 4, door"
232 /// .par_matches(char::is_numeric)
233 /// .map(|s| s.parse::<i32>().expect("digit"))
235 /// assert_eq!(10, total);
237 fn par_matches
<P
: Pattern
>(&self, pattern
: P
) -> Matches
<'_
, P
> {
239 chars
: self.as_parallel_string(),
244 /// Returns a parallel iterator over substrings that match a given character
245 /// or predicate, with their positions, similar to `str::match_indices`.
247 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
248 /// It is implemented for `char`, `&[char]`, and any function or closure
249 /// `F: Fn(char) -> bool + Sync + Send`.
254 /// use rayon::prelude::*;
255 /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
256 /// .par_match_indices(char::is_numeric)
258 /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
260 fn par_match_indices
<P
: Pattern
>(&self, pattern
: P
) -> MatchIndices
<'_
, P
> {
262 chars
: self.as_parallel_string(),
268 impl ParallelString
for str {
270 fn as_parallel_string(&self) -> &str {
275 // /////////////////////////////////////////////////////////////////////////
277 /// We hide the `Pattern` trait in a private module, as its API is not meant
278 /// for general consumption. If we could have privacy on trait items, then it
279 /// would be nicer to have its basic existence and implementors public while
280 /// keeping all of the methods private.
282 use crate::iter
::plumbing
::Folder
;
284 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
285 /// `std::str::pattern::{Pattern, Searcher}`.
287 /// Implementing this trait is not permitted outside of `rayon`.
288 pub trait Pattern
: Sized
+ Sync
+ Send
{
290 fn find_in(&self, haystack
: &str) -> Option
<usize>;
291 fn rfind_in(&self, haystack
: &str) -> Option
<usize>;
292 fn is_suffix_of(&self, haystack
: &str) -> bool
;
293 fn fold_splits
<'ch
, F
>(&self, haystack
: &'ch
str, folder
: F
, skip_last
: bool
) -> F
296 fn fold_matches
<'ch
, F
>(&self, haystack
: &'ch
str, folder
: F
) -> F
299 fn fold_match_indices
<'ch
, F
>(&self, haystack
: &'ch
str, folder
: F
, base
: usize) -> F
301 F
: Folder
<(usize, &'ch
str)>;
304 use self::private
::Pattern
;
307 fn offset
<T
>(base
: usize) -> impl Fn((usize, T
)) -> (usize, T
) {
308 move |(i
, x
)| (base
+ i
, x
)
311 macro_rules
! impl_pattern
{
312 (&$
self:ident
=> $pattern
:expr
) => {
316 fn find_in(&$
self, chars
: &str) -> Option
<usize> {
321 fn rfind_in(&$
self, chars
: &str) -> Option
<usize> {
322 chars
.rfind($pattern
)
326 fn is_suffix_of(&$
self, chars
: &str) -> bool
{
327 chars
.ends_with($pattern
)
330 fn fold_splits
<'ch
, F
>(&$
self, chars
: &'ch
str, folder
: F
, skip_last
: bool
) -> F
334 let mut split
= chars
.split($pattern
);
338 folder
.consume_iter(split
)
341 fn fold_matches
<'ch
, F
>(&$
self, chars
: &'ch
str, folder
: F
) -> F
345 folder
.consume_iter(chars
.matches($pattern
))
348 fn fold_match_indices
<'ch
, F
>(&$
self, chars
: &'ch
str, folder
: F
, base
: usize) -> F
350 F
: Folder
<(usize, &'ch
str)>,
352 folder
.consume_iter(chars
.match_indices($pattern
).map(offset(base
)))
357 impl Pattern
for char {
358 impl_pattern
!(&self => *self);
361 impl Pattern
for &[char] {
362 impl_pattern
!(&self => *self);
365 impl<FN
: Sync
+ Send
+ Fn(char) -> bool
> Pattern
for FN
{
366 impl_pattern
!(&self => self);
369 // /////////////////////////////////////////////////////////////////////////
371 /// Parallel iterator over the characters of a string
372 #[derive(Debug, Clone)]
373 pub struct Chars
<'ch
> {
377 struct CharsProducer
<'ch
> {
381 impl<'ch
> ParallelIterator
for Chars
<'ch
> {
384 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
386 C
: UnindexedConsumer
<Self::Item
>,
388 bridge_unindexed(CharsProducer { chars: self.chars }
, consumer
)
392 impl<'ch
> UnindexedProducer
for CharsProducer
<'ch
> {
395 fn split(self) -> (Self, Option
<Self>) {
396 match split(self.chars
) {
397 Some((left
, right
)) => (
398 CharsProducer { chars: left }
,
399 Some(CharsProducer { chars: right }
),
401 None
=> (self, None
),
405 fn fold_with
<F
>(self, folder
: F
) -> F
407 F
: Folder
<Self::Item
>,
409 folder
.consume_iter(self.chars
.chars())
413 // /////////////////////////////////////////////////////////////////////////
415 /// Parallel iterator over the characters of a string, with their positions
416 #[derive(Debug, Clone)]
417 pub struct CharIndices
<'ch
> {
421 struct CharIndicesProducer
<'ch
> {
426 impl<'ch
> ParallelIterator
for CharIndices
<'ch
> {
427 type Item
= (usize, char);
429 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
431 C
: UnindexedConsumer
<Self::Item
>,
433 let producer
= CharIndicesProducer
{
437 bridge_unindexed(producer
, consumer
)
441 impl<'ch
> UnindexedProducer
for CharIndicesProducer
<'ch
> {
442 type Item
= (usize, char);
444 fn split(self) -> (Self, Option
<Self>) {
445 match split(self.chars
) {
446 Some((left
, right
)) => (
447 CharIndicesProducer
{
451 Some(CharIndicesProducer
{
453 index
: self.index
+ left
.len(),
456 None
=> (self, None
),
460 fn fold_with
<F
>(self, folder
: F
) -> F
462 F
: Folder
<Self::Item
>,
464 let base
= self.index
;
465 folder
.consume_iter(self.chars
.char_indices().map(offset(base
)))
469 // /////////////////////////////////////////////////////////////////////////
471 /// Parallel iterator over the bytes of a string
472 #[derive(Debug, Clone)]
473 pub struct Bytes
<'ch
> {
477 struct BytesProducer
<'ch
> {
481 impl<'ch
> ParallelIterator
for Bytes
<'ch
> {
484 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
486 C
: UnindexedConsumer
<Self::Item
>,
488 bridge_unindexed(BytesProducer { chars: self.chars }
, consumer
)
492 impl<'ch
> UnindexedProducer
for BytesProducer
<'ch
> {
495 fn split(self) -> (Self, Option
<Self>) {
496 match split(self.chars
) {
497 Some((left
, right
)) => (
498 BytesProducer { chars: left }
,
499 Some(BytesProducer { chars: right }
),
501 None
=> (self, None
),
505 fn fold_with
<F
>(self, folder
: F
) -> F
507 F
: Folder
<Self::Item
>,
509 folder
.consume_iter(self.chars
.bytes())
513 // /////////////////////////////////////////////////////////////////////////
515 /// Parallel iterator over a string encoded as UTF-16
516 #[derive(Debug, Clone)]
517 pub struct EncodeUtf16
<'ch
> {
521 struct EncodeUtf16Producer
<'ch
> {
525 impl<'ch
> ParallelIterator
for EncodeUtf16
<'ch
> {
528 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
530 C
: UnindexedConsumer
<Self::Item
>,
532 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }
, consumer
)
536 impl<'ch
> UnindexedProducer
for EncodeUtf16Producer
<'ch
> {
539 fn split(self) -> (Self, Option
<Self>) {
540 match split(self.chars
) {
541 Some((left
, right
)) => (
542 EncodeUtf16Producer { chars: left }
,
543 Some(EncodeUtf16Producer { chars: right }
),
545 None
=> (self, None
),
549 fn fold_with
<F
>(self, folder
: F
) -> F
551 F
: Folder
<Self::Item
>,
553 folder
.consume_iter(self.chars
.encode_utf16())
557 // /////////////////////////////////////////////////////////////////////////
559 /// Parallel iterator over substrings separated by a pattern
560 #[derive(Debug, Clone)]
561 pub struct Split
<'ch
, P
: Pattern
> {
566 impl<'ch
, P
: Pattern
> Split
<'ch
, P
> {
567 fn new(chars
: &'ch
str, separator
: P
) -> Self {
568 Split { chars, separator }
572 impl<'ch
, P
: Pattern
> ParallelIterator
for Split
<'ch
, P
> {
573 type Item
= &'ch
str;
575 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
577 C
: UnindexedConsumer
<Self::Item
>,
579 let producer
= SplitProducer
::new(self.chars
, &self.separator
);
580 bridge_unindexed(producer
, consumer
)
584 /// Implement support for `SplitProducer`.
585 impl<'ch
, P
: Pattern
> Fissile
<P
> for &'ch
str {
586 fn length(&self) -> usize {
590 fn midpoint(&self, end
: usize) -> usize {
591 // First find a suitable UTF-8 boundary.
592 find_char_midpoint(&self[..end
])
595 fn find(&self, separator
: &P
, start
: usize, end
: usize) -> Option
<usize> {
596 separator
.find_in(&self[start
..end
])
599 fn rfind(&self, separator
: &P
, end
: usize) -> Option
<usize> {
600 separator
.rfind_in(&self[..end
])
603 fn split_once(self, index
: usize) -> (Self, Self) {
604 let (left
, right
) = self.split_at(index
);
605 let mut right_iter
= right
.chars();
606 right_iter
.next(); // skip the separator
607 (left
, right_iter
.as_str())
610 fn fold_splits
<F
>(self, separator
: &P
, folder
: F
, skip_last
: bool
) -> F
614 separator
.fold_splits(self, folder
, skip_last
)
618 // /////////////////////////////////////////////////////////////////////////
620 /// Parallel iterator over substrings separated by a terminator pattern
621 #[derive(Debug, Clone)]
622 pub struct SplitTerminator
<'ch
, P
: Pattern
> {
627 struct SplitTerminatorProducer
<'ch
, 'sep
, P
: Pattern
> {
628 splitter
: SplitProducer
<'sep
, P
, &'ch
str>,
632 impl<'ch
, P
: Pattern
> SplitTerminator
<'ch
, P
> {
633 fn new(chars
: &'ch
str, terminator
: P
) -> Self {
634 SplitTerminator { chars, terminator }
638 impl<'ch
, 'sep
, P
: Pattern
+ 'sep
> SplitTerminatorProducer
<'ch
, 'sep
, P
> {
639 fn new(chars
: &'ch
str, terminator
: &'sep P
) -> Self {
640 SplitTerminatorProducer
{
641 splitter
: SplitProducer
::new(chars
, terminator
),
642 skip_last
: chars
.is_empty() || terminator
.is_suffix_of(chars
),
647 impl<'ch
, P
: Pattern
> ParallelIterator
for SplitTerminator
<'ch
, P
> {
648 type Item
= &'ch
str;
650 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
652 C
: UnindexedConsumer
<Self::Item
>,
654 let producer
= SplitTerminatorProducer
::new(self.chars
, &self.terminator
);
655 bridge_unindexed(producer
, consumer
)
659 impl<'ch
, 'sep
, P
: Pattern
+ 'sep
> UnindexedProducer
for SplitTerminatorProducer
<'ch
, 'sep
, P
> {
660 type Item
= &'ch
str;
662 fn split(mut self) -> (Self, Option
<Self>) {
663 let (left
, right
) = self.splitter
.split();
664 self.splitter
= left
;
665 let right
= right
.map(|right
| {
666 let skip_last
= self.skip_last
;
667 self.skip_last
= false;
668 SplitTerminatorProducer
{
676 fn fold_with
<F
>(self, folder
: F
) -> F
678 F
: Folder
<Self::Item
>,
680 self.splitter
.fold_with(folder
, self.skip_last
)
684 // /////////////////////////////////////////////////////////////////////////
686 /// Parallel iterator over lines in a string
687 #[derive(Debug, Clone)]
688 pub struct Lines
<'ch
>(&'ch
str);
691 fn no_carriage_return(line
: &str) -> &str {
692 if line
.ends_with('
\r'
) {
693 &line
[..line
.len() - 1]
699 impl<'ch
> ParallelIterator
for Lines
<'ch
> {
700 type Item
= &'ch
str;
702 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
704 C
: UnindexedConsumer
<Self::Item
>,
707 .par_split_terminator('
\n'
)
708 .map(no_carriage_return
)
709 .drive_unindexed(consumer
)
713 // /////////////////////////////////////////////////////////////////////////
715 /// Parallel iterator over substrings separated by whitespace
716 #[derive(Debug, Clone)]
717 pub struct SplitWhitespace
<'ch
>(&'ch
str);
720 fn not_empty(s
: &&str) -> bool
{
724 impl<'ch
> ParallelIterator
for SplitWhitespace
<'ch
> {
725 type Item
= &'ch
str;
727 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
729 C
: UnindexedConsumer
<Self::Item
>,
732 .par_split(char::is_whitespace
)
734 .drive_unindexed(consumer
)
738 // /////////////////////////////////////////////////////////////////////////
740 /// Parallel iterator over substrings that match a pattern
741 #[derive(Debug, Clone)]
742 pub struct Matches
<'ch
, P
: Pattern
> {
747 struct MatchesProducer
<'ch
, 'pat
, P
: Pattern
> {
752 impl<'ch
, P
: Pattern
> ParallelIterator
for Matches
<'ch
, P
> {
753 type Item
= &'ch
str;
755 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
757 C
: UnindexedConsumer
<Self::Item
>,
759 let producer
= MatchesProducer
{
761 pattern
: &self.pattern
,
763 bridge_unindexed(producer
, consumer
)
767 impl<'ch
, 'pat
, P
: Pattern
> UnindexedProducer
for MatchesProducer
<'ch
, 'pat
, P
> {
768 type Item
= &'ch
str;
770 fn split(self) -> (Self, Option
<Self>) {
771 match split(self.chars
) {
772 Some((left
, right
)) => (
777 Some(MatchesProducer
{
782 None
=> (self, None
),
786 fn fold_with
<F
>(self, folder
: F
) -> F
788 F
: Folder
<Self::Item
>,
790 self.pattern
.fold_matches(self.chars
, folder
)
794 // /////////////////////////////////////////////////////////////////////////
796 /// Parallel iterator over substrings that match a pattern, with their positions
797 #[derive(Debug, Clone)]
798 pub struct MatchIndices
<'ch
, P
: Pattern
> {
803 struct MatchIndicesProducer
<'ch
, 'pat
, P
: Pattern
> {
809 impl<'ch
, P
: Pattern
> ParallelIterator
for MatchIndices
<'ch
, P
> {
810 type Item
= (usize, &'ch
str);
812 fn drive_unindexed
<C
>(self, consumer
: C
) -> C
::Result
814 C
: UnindexedConsumer
<Self::Item
>,
816 let producer
= MatchIndicesProducer
{
819 pattern
: &self.pattern
,
821 bridge_unindexed(producer
, consumer
)
825 impl<'ch
, 'pat
, P
: Pattern
> UnindexedProducer
for MatchIndicesProducer
<'ch
, 'pat
, P
> {
826 type Item
= (usize, &'ch
str);
828 fn split(self) -> (Self, Option
<Self>) {
829 match split(self.chars
) {
830 Some((left
, right
)) => (
831 MatchIndicesProducer
{
835 Some(MatchIndicesProducer
{
837 index
: self.index
+ left
.len(),
841 None
=> (self, None
),
845 fn fold_with
<F
>(self, folder
: F
) -> F
847 F
: Folder
<Self::Item
>,
850 .fold_match_indices(self.chars
, folder
, self.index
)