]> git.proxmox.com Git - rustc.git/blame - src/vendor/rayon/src/str.rs
New upstream version 1.25.0+dfsg1
[rustc.git] / src / vendor / rayon / src / str.rs
CommitLineData
2c00a5a8
XL
1//! Parallel iterator types for [strings][std::str]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7//! reference a `Pattern` trait which is not visible outside this crate.
8//! This trait is intentionally kept private, for use only by Rayon itself.
9//! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
10//!
11//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
12//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
13//!
14//! [std::str]: https://doc.rust-lang.org/stable/std/str/
15
16use iter::*;
17use iter::plumbing::*;
18use split_producer::*;
19
20
21/// Test if a byte is the start of a UTF-8 character.
22/// (extracted from `str::is_char_boundary`)
23#[inline]
24fn is_char_boundary(b: u8) -> bool {
25 // This is bit magic equivalent to: b < 128 || b >= 192
26 (b as i8) >= -0x40
27}
28
29/// Find the index of a character boundary near the midpoint.
30#[inline]
31fn find_char_midpoint(chars: &str) -> usize {
32 let mid = chars.len() / 2;
33
34 // We want to split near the midpoint, but we need to find an actual
35 // character boundary. So we look at the raw bytes, first scanning
36 // forward from the midpoint for a boundary, then trying backward.
37 let (left, right) = chars.as_bytes().split_at(mid);
38 right.iter()
39 .cloned()
40 .position(is_char_boundary)
41 .map(|i| mid + i)
42 .or_else(|| left.iter().cloned().rposition(is_char_boundary))
43 .unwrap_or(0)
44}
45
46
47/// Parallel extensions for strings.
48pub trait ParallelString {
49 /// Returns a plain string slice, which is used to implement the rest of
50 /// the parallel methods.
51 fn as_parallel_string(&self) -> &str;
52
53 /// Returns a parallel iterator over the characters of a string.
54 ///
55 /// # Examples
56 ///
57 /// ```
58 /// use rayon::prelude::*;
59 /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
60 /// assert_eq!(Some('o'), max);
61 /// ```
62 fn par_chars(&self) -> Chars {
63 Chars { chars: self.as_parallel_string() }
64 }
65
66 /// Returns a parallel iterator over substrings separated by a
67 /// given character or predicate, similar to `str::split`.
68 ///
69 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
70 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
71 ///
72 /// # Examples
73 ///
74 /// ```
75 /// use rayon::prelude::*;
76 /// let total = "1, 2, buckle, 3, 4, door"
77 /// .par_split(',')
78 /// .filter_map(|s| s.trim().parse::<i32>().ok())
79 /// .sum();
80 /// assert_eq!(10, total);
81 /// ```
82 fn par_split<P: Pattern>(&self, separator: P) -> Split<P> {
83 Split::new(self.as_parallel_string(), separator)
84 }
85
86 /// Returns a parallel iterator over substrings terminated by a
87 /// given character or predicate, similar to `str::split_terminator`.
88 /// It's equivalent to `par_split`, except it doesn't produce an empty
89 /// substring after a trailing terminator.
90 ///
91 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
92 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
93 ///
94 /// # Examples
95 ///
96 /// ```
97 /// use rayon::prelude::*;
98 /// let parts: Vec<_> = "((1 + 3) * 2)"
99 /// .par_split_terminator(|c| c == '(' || c == ')')
100 /// .collect();
101 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
102 /// ```
103 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<P> {
104 SplitTerminator::new(self.as_parallel_string(), terminator)
105 }
106
107 /// Returns a parallel iterator over the lines of a string, ending with an
108 /// optional carriage return and with a newline (`\r\n` or just `\n`).
109 /// The final line ending is optional, and line endings are not included in
110 /// the output strings.
111 ///
112 /// # Examples
113 ///
114 /// ```
115 /// use rayon::prelude::*;
116 /// let lengths: Vec<_> = "hello world\nfizbuzz"
117 /// .par_lines()
118 /// .map(|l| l.len())
119 /// .collect();
120 /// assert_eq!(vec![11, 7], lengths);
121 /// ```
122 fn par_lines(&self) -> Lines {
123 Lines(self.as_parallel_string())
124 }
125
126 /// Returns a parallel iterator over the sub-slices of a string that are
127 /// separated by any amount of whitespace.
128 ///
129 /// As with `str::split_whitespace`, 'whitespace' is defined according to
130 /// the terms of the Unicode Derived Core Property `White_Space`.
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// use rayon::prelude::*;
136 /// let longest = "which is the longest word?"
137 /// .par_split_whitespace()
138 /// .max_by_key(|word| word.len());
139 /// assert_eq!(Some("longest"), longest);
140 /// ```
141 fn par_split_whitespace(&self) -> SplitWhitespace {
142 SplitWhitespace(self.as_parallel_string())
143 }
144}
145
146impl ParallelString for str {
147 #[inline]
148 fn as_parallel_string(&self) -> &str {
149 self
150 }
151}
152
153
154// /////////////////////////////////////////////////////////////////////////
155
156/// We hide the `Pattern` trait in a private module, as its API is not meant
157/// for general consumption. If we could have privacy on trait items, then it
158/// would be nicer to have its basic existence and implementors public while
159/// keeping all of the methods private.
160mod private {
161 use iter::plumbing::Folder;
162
163 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
164 /// `std::str::pattern::{Pattern, Searcher}`.
165 ///
166 /// Implementing this trait is not permitted outside of `rayon`.
167 pub trait Pattern: Sized + Sync + Send {
168 private_decl!{}
169 fn find_in(&self, &str) -> Option<usize>;
170 fn rfind_in(&self, &str) -> Option<usize>;
171 fn is_suffix_of(&self, &str) -> bool;
172 fn fold_with<'ch, F>(&self, &'ch str, folder: F, skip_last: bool) -> F
173 where F: Folder<&'ch str>;
174 }
175}
176use self::private::Pattern;
177
178impl Pattern for char {
179 private_impl!{}
180
181 #[inline]
182 fn find_in(&self, chars: &str) -> Option<usize> {
183 chars.find(*self)
184 }
185
186 #[inline]
187 fn rfind_in(&self, chars: &str) -> Option<usize> {
188 chars.rfind(*self)
189 }
190
191 #[inline]
192 fn is_suffix_of(&self, chars: &str) -> bool {
193 chars.ends_with(*self)
194 }
195
196 fn fold_with<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
197 where F: Folder<&'ch str>
198 {
199 let mut split = chars.split(*self);
200 if skip_last {
201 split.next_back();
202 }
203 folder.consume_iter(split)
204 }
205}
206
207impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
208 private_impl!{}
209
210 fn find_in(&self, chars: &str) -> Option<usize> {
211 chars.find(self)
212 }
213
214 fn rfind_in(&self, chars: &str) -> Option<usize> {
215 chars.rfind(self)
216 }
217
218 fn is_suffix_of(&self, chars: &str) -> bool {
219 chars.ends_with(self)
220 }
221
222 fn fold_with<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
223 where F: Folder<&'ch str>
224 {
225 let mut split = chars.split(self);
226 if skip_last {
227 split.next_back();
228 }
229 folder.consume_iter(split)
230 }
231}
232
233
234// /////////////////////////////////////////////////////////////////////////
235
236/// Parallel iterator over the characters of a string
237#[derive(Debug, Clone)]
238pub struct Chars<'ch> {
239 chars: &'ch str,
240}
241
242struct CharsProducer<'ch> {
243 chars: &'ch str,
244}
245
246impl<'ch> ParallelIterator for Chars<'ch> {
247 type Item = char;
248
249 fn drive_unindexed<C>(self, consumer: C) -> C::Result
250 where C: UnindexedConsumer<Self::Item>
251 {
252 bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
253 }
254}
255
256impl<'ch> UnindexedProducer for CharsProducer<'ch> {
257 type Item = char;
258
259 fn split(mut self) -> (Self, Option<Self>) {
260 let index = find_char_midpoint(self.chars);
261 if index > 0 {
262 let (left, right) = self.chars.split_at(index);
263 self.chars = left;
264 (self, Some(CharsProducer { chars: right }))
265 } else {
266 (self, None)
267 }
268 }
269
270 fn fold_with<F>(self, folder: F) -> F
271 where F: Folder<Self::Item>
272 {
273 folder.consume_iter(self.chars.chars())
274 }
275}
276
277
278// /////////////////////////////////////////////////////////////////////////
279
280/// Parallel iterator over substrings separated by a pattern
281#[derive(Debug, Clone)]
282pub struct Split<'ch, P: Pattern> {
283 chars: &'ch str,
284 separator: P,
285}
286
287impl<'ch, P: Pattern> Split<'ch, P> {
288 fn new(chars: &'ch str, separator: P) -> Self {
289 Split {
290 chars: chars,
291 separator: separator,
292 }
293 }
294}
295
296impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
297 type Item = &'ch str;
298
299 fn drive_unindexed<C>(self, consumer: C) -> C::Result
300 where C: UnindexedConsumer<Self::Item>
301 {
302 let producer = SplitProducer::new(self.chars, &self.separator);
303 bridge_unindexed(producer, consumer)
304 }
305}
306
307/// Implement support for `SplitProducer`.
308impl<'ch, P: Pattern> Fissile<P> for &'ch str {
309 fn length(&self) -> usize {
310 self.len()
311 }
312
313 fn midpoint(&self, end: usize) -> usize {
314 // First find a suitable UTF-8 boundary.
315 find_char_midpoint(&self[..end])
316 }
317
318 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
319 separator.find_in(&self[start..end])
320 }
321
322 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
323 separator.rfind_in(&self[..end])
324 }
325
326 fn split_once(self, index: usize) -> (Self, Self) {
327 let (left, right) = self.split_at(index);
328 let mut right_iter = right.chars();
329 right_iter.next(); // skip the separator
330 (left, right_iter.as_str())
331 }
332
333 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
334 where F: Folder<Self>
335 {
336 separator.fold_with(self, folder, skip_last)
337 }
338}
339
340
341// /////////////////////////////////////////////////////////////////////////
342
343/// Parallel iterator over substrings separated by a terminator pattern
344#[derive(Debug, Clone)]
345pub struct SplitTerminator<'ch, P: Pattern> {
346 chars: &'ch str,
347 terminator: P,
348}
349
350struct SplitTerminatorProducer<'ch, 'sep, P: Pattern + 'sep> {
351 splitter: SplitProducer<'sep, P, &'ch str>,
352 skip_last: bool,
353}
354
355impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
356 fn new(chars: &'ch str, terminator: P) -> Self {
357 SplitTerminator {
358 chars: chars,
359 terminator: terminator,
360 }
361 }
362}
363
364impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
365 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
366 SplitTerminatorProducer {
367 splitter: SplitProducer::new(chars, terminator),
368 skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
369 }
370 }
371}
372
373impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
374 type Item = &'ch str;
375
376 fn drive_unindexed<C>(self, consumer: C) -> C::Result
377 where C: UnindexedConsumer<Self::Item>
378 {
379 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
380 bridge_unindexed(producer, consumer)
381 }
382}
383
384impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
385 type Item = &'ch str;
386
387 fn split(mut self) -> (Self, Option<Self>) {
388 let (left, right) = self.splitter.split();
389 self.splitter = left;
390 let right = right.map(|right| {
391 let skip_last = self.skip_last;
392 self.skip_last = false;
393 SplitTerminatorProducer {
394 splitter: right,
395 skip_last: skip_last,
396 }
397 });
398 (self, right)
399 }
400
401 fn fold_with<F>(self, folder: F) -> F
402 where F: Folder<Self::Item>
403 {
404 self.splitter.fold_with(folder, self.skip_last)
405 }
406}
407
408
409// /////////////////////////////////////////////////////////////////////////
410
411/// Parallel iterator over lines in a string
412#[derive(Debug, Clone)]
413pub struct Lines<'ch>(&'ch str);
414
415impl<'ch> ParallelIterator for Lines<'ch> {
416 type Item = &'ch str;
417
418 fn drive_unindexed<C>(self, consumer: C) -> C::Result
419 where C: UnindexedConsumer<Self::Item>
420 {
421 self.0
422 .par_split_terminator('\n')
423 .map(|line| if line.ends_with('\r') {
424 &line[..line.len() - 1]
425 } else {
426 line
427 })
428 .drive_unindexed(consumer)
429 }
430}
431
432
433// /////////////////////////////////////////////////////////////////////////
434
435/// Parallel iterator over substrings separated by whitespace
436#[derive(Debug, Clone)]
437pub struct SplitWhitespace<'ch>(&'ch str);
438
439impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
440 type Item = &'ch str;
441
442 fn drive_unindexed<C>(self, consumer: C) -> C::Result
443 where C: UnindexedConsumer<Self::Item>
444 {
445 self.0
446 .par_split(char::is_whitespace)
447 .filter(|string| !string.is_empty())
448 .drive_unindexed(consumer)
449 }
450}