]> git.proxmox.com Git - rustc.git/blob - src/vendor/textwrap/src/lib.rs
New upstream version 1.23.0+dfsg1
[rustc.git] / src / vendor / textwrap / src / lib.rs
1 //! `textwrap` provides functions for word wrapping and filling text.
2 //!
3 //! Wrapping text can be very useful in commandline programs where you
4 //! want to format dynamic output nicely so it looks good in a
5 //! terminal. A quick example:
6 //!
7 //! ```no_run
8 //! extern crate textwrap;
9 //! use textwrap::fill;
10 //!
11 //! fn main() {
12 //! let text = "textwrap: a small library for wrapping text.";
13 //! println!("{}", fill(text, 18));
14 //! }
15 //! ```
16 //!
17 //! This will display the following output:
18 //!
19 //! ```text
20 //! textwrap: a small
21 //! library for
22 //! wrapping text.
23 //! ```
24 //!
25 //! # Displayed Width vs Byte Size
26 //!
27 //! To word wrap text, one must know the width of each word so one can
28 //! know when to break lines. This library measures the width of text
29 //! using the [displayed width][unicode-width], not the size in bytes.
30 //!
31 //! This is important for non-ASCII text. ASCII characters such as `a`
32 //! and `!` are simple and take up one column each. This means that
33 //! the displayed width is equal to the string length in bytes.
34 //! However, non-ASCII characters and symbols take up more than one
35 //! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is
36 //! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively.
37 //!
38 //! This is why we take care to use the displayed width instead of the
39 //! byte count when computing line lengths. All functions in this
40 //! library handle Unicode characters like this.
41 //!
42 //! [unicode-width]: https://docs.rs/unicode-width/
43
44 #![doc(html_root_url = "https://docs.rs/textwrap/0.9.0")]
45 #![deny(missing_docs)]
46 #![deny(missing_debug_implementations)]
47
48 extern crate unicode_width;
49 #[cfg(feature = "term_size")]
50 extern crate term_size;
51 #[cfg(feature = "hyphenation")]
52 extern crate hyphenation;
53
54 use std::fmt;
55 use std::borrow::Cow;
56 use std::str::CharIndices;
57
58 use unicode_width::UnicodeWidthStr;
59 use unicode_width::UnicodeWidthChar;
60 #[cfg(feature = "hyphenation")]
61 use hyphenation::{Hyphenation, Corpus};
62
63 /// A non-breaking space.
64 const NBSP: char = '\u{a0}';
65
66 /// An interface for splitting words.
67 ///
68 /// When the [`wrap_iter`] method will try to fit text into a line, it
69 /// will eventually find a word that it too large the current text
70 /// width. It will then call the currently configured `WordSplitter` to
71 /// have it attempt to split the word into smaller parts. This trait
72 /// describes that functionality via the [`split`] method.
73 ///
74 /// If the `textwrap` crate has been compiled with the `hyphenation`
75 /// feature enabled, you will find an implementation of `WordSplitter`
76 /// by the `hyphenation::language::Corpus` struct. Use this struct for
77 /// language-aware hyphenation. See the [`hyphenation` documentation]
78 /// for details.
79 ///
80 /// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter
81 /// [`split`]: #tymethod.split
82 /// [`hyphenation` documentation]: https://docs.rs/hyphenation/
83 pub trait WordSplitter {
84 /// Return all possible splits of word. Each split is a triple
85 /// with a head, a hyphen, and a tail where `head + &hyphen +
86 /// &tail == word`. The hyphen can be empty if there is already a
87 /// hyphen in the head.
88 ///
89 /// The splits should go from smallest to longest and should
90 /// include no split at all. So the word "technology" could be
91 /// split into
92 ///
93 /// ```no_run
94 /// vec![("tech", "-", "nology"),
95 /// ("technol", "-", "ogy"),
96 /// ("technolo", "-", "gy"),
97 /// ("technology", "", "")];
98 /// ```
99 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
100 }
101
102 /// Use this as a [`Wrapper.splitter`] to avoid any kind of
103 /// hyphenation:
104 ///
105 /// ```
106 /// use textwrap::{Wrapper, NoHyphenation};
107 ///
108 /// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
109 /// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
110 /// ```
111 ///
112 /// [`Wrapper.splitter`]: struct.Wrapper.html#structfield.splitter
113 #[derive(Clone, Debug)]
114 pub struct NoHyphenation;
115
116 /// `NoHyphenation` implements `WordSplitter` by not splitting the
117 /// word at all.
118 impl WordSplitter for NoHyphenation {
119 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
120 vec![(word, "", "")]
121 }
122 }
123
124 /// Simple and default way to split words: splitting on existing
125 /// hyphens only.
126 ///
127 /// You probably don't need to use this type since it's already used
128 /// by default by `Wrapper::new`.
129 #[derive(Clone, Debug)]
130 pub struct HyphenSplitter;
131
132 /// `HyphenSplitter` is the default `WordSplitter` used by
133 /// `Wrapper::new`. It will split words on any existing hyphens in the
134 /// word.
135 ///
136 /// It will only use hyphens that are surrounded by alphanumeric
137 /// characters, which prevents a word like "--foo-bar" from being
138 /// split on the first or second hyphen.
139 impl WordSplitter for HyphenSplitter {
140 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
141 let mut triples = Vec::new();
142 // Split on hyphens, smallest split first. We only use hyphens
143 // that are surrounded by alphanumeric characters. This is to
144 // avoid splitting on repeated hyphens, such as those found in
145 // --foo-bar.
146 let mut char_indices = word.char_indices();
147 // Early return if the word is empty.
148 let mut prev = match char_indices.next() {
149 None => return vec![(word, "", "")],
150 Some((_, ch)) => ch,
151 };
152
153 // Find current word, or return early if the word only has a
154 // single character.
155 let (mut idx, mut cur) = match char_indices.next() {
156 None => return vec![(word, "", "")],
157 Some((idx, cur)) => (idx, cur),
158 };
159
160 for (i, next) in char_indices {
161 if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
162 let (head, tail) = word.split_at(idx + 1);
163 triples.push((head, "", tail));
164 }
165 prev = cur;
166 idx = i;
167 cur = next;
168 }
169
170 // Finally option is no split at all.
171 triples.push((word, "", ""));
172
173 triples
174 }
175 }
176
177 /// A hyphenation Corpus can be used to do language-specific
178 /// hyphenation using patterns from the hyphenation crate.
179 #[cfg(feature = "hyphenation")]
180 impl WordSplitter for Corpus {
181 fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
182 // Find splits based on language corpus.
183 let mut triples = Vec::new();
184 for n in word.opportunities(self) {
185 let (head, tail) = word.split_at(n);
186 let hyphen = if head.ends_with('-') { "" } else { "-" };
187 triples.push((head, hyphen, tail));
188 }
189 // Finally option is no split at all.
190 triples.push((word, "", ""));
191
192 triples
193 }
194 }
195
196 /// Backport of the `AddAssign` trait implementation from Rust 1.14.
197 fn cow_add_assign<'a>(lhs: &mut Cow<'a, str>, rhs: &'a str) {
198 if lhs.is_empty() {
199 *lhs = Cow::Borrowed(rhs)
200 } else if rhs.is_empty() {
201 return;
202 } else {
203 if let Cow::Borrowed(inner) = *lhs {
204 let mut s = String::with_capacity(lhs.len() + rhs.len());
205 s.push_str(inner);
206 *lhs = Cow::Owned(s);
207 }
208 lhs.to_mut().push_str(rhs);
209 }
210 }
211
212
213 /// A Wrapper holds settings for wrapping and filling text. Use it
214 /// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions
215 /// are not flexible enough.
216 ///
217 /// [`wrap_iter`]: fn.wrap_iter.html
218 /// [`wrap`]: fn.wrap.html
219 /// [`fill`]: fn.fill.html
220 ///
221 /// The algorithm used by the `WrapIter` iterator (returned from the
222 /// `wrap_iter` method) works by doing successive partial scans over
223 /// words in the input string (where each single scan yields a single
224 /// line) so that the overall time and memory complexity is O(*n*) where
225 /// *n* is the length of the input string.
226 #[derive(Clone, Debug)]
227 pub struct Wrapper<'a, S: WordSplitter> {
228 /// The width in columns at which the text will be wrapped.
229 pub width: usize,
230 /// Indentation used for the first line of output.
231 pub initial_indent: &'a str,
232 /// Indentation used for subsequent lines of output.
233 pub subsequent_indent: &'a str,
234 /// Allow long words to be broken if they cannot fit on a line.
235 /// When set to `false`, some lines may be longer than
236 /// `self.width`.
237 pub break_words: bool,
238 /// The method for splitting words. If the `hyphenation` feature
239 /// is enabled, you can use a `hyphenation::language::Corpus` here
240 /// to get language-aware hyphenation.
241 pub splitter: S,
242 }
243
244 impl<'a> Wrapper<'a, HyphenSplitter> {
245 /// Create a new Wrapper for wrapping at the specified width. By
246 /// default, we allow words longer than `width` to be broken. A
247 /// [`HyphenSplitter`] will be used by default for splitting
248 /// words. See the [`WordSplitter`] trait for other options.
249 ///
250 /// [`HyphenSplitter`]: struct.HyphenSplitter.html
251 /// [`WordSplitter`]: trait.WordSplitter.html
252 pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> {
253 Wrapper::with_splitter(width, HyphenSplitter)
254 }
255
256 /// Create a new Wrapper for wrapping text at the current terminal
257 /// width. If the terminal width cannot be determined (typically
258 /// because the standard input and output is not connected to a
259 /// terminal), a width of 80 characters will be used. Other
260 /// settings use the same defaults as `Wrapper::new`.
261 ///
262 /// Equivalent to:
263 ///
264 /// ```no_run
265 /// # #![allow(unused_variables)]
266 /// use textwrap::{Wrapper, termwidth};
267 ///
268 /// let wrapper = Wrapper::new(termwidth());
269 /// ```
270 #[cfg(feature = "term_size")]
271 pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> {
272 Wrapper::new(termwidth())
273 }
274 }
275
276 impl<'w, 'a: 'w, S: WordSplitter> Wrapper<'a, S> {
277 /// Use the given [`WordSplitter`] to create a new Wrapper for
278 /// wrapping at the specified width. By default, we allow words
279 /// longer than `width` to be broken.
280 ///
281 /// [`WordSplitter`]: trait.WordSplitter.html
282 pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> {
283 Wrapper {
284 width: width,
285 initial_indent: "",
286 subsequent_indent: "",
287 break_words: true,
288 splitter: splitter,
289 }
290 }
291
292 /// Change [`self.initial_indent`]. The initial indentation is
293 /// used on the very first line of output.
294 ///
295 /// # Examples
296 ///
297 /// Classic paragraph indentation can be achieved by specifying an
298 /// initial indentation and wrapping each paragraph by itself:
299 ///
300 /// ```no_run
301 /// # #![allow(unused_variables)]
302 /// use textwrap::Wrapper;
303 ///
304 /// let wrapper = Wrapper::new(15).initial_indent(" ");
305 /// ```
306 ///
307 /// [`self.initial_indent`]: #structfield.initial_indent
308 pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> {
309 Wrapper { initial_indent: indent, ..self }
310 }
311
312 /// Change [`self.subsequent_indent`]. The subsequent indentation
313 /// is used on lines following the first line of output.
314 ///
315 /// # Examples
316 ///
317 /// Combining initial and subsequent indentation lets you format a
318 /// single paragraph as a bullet list:
319 ///
320 /// ```no_run
321 /// # #![allow(unused_variables)]
322 /// use textwrap::Wrapper;
323 ///
324 /// let wrapper = Wrapper::new(15)
325 /// .initial_indent("* ")
326 /// .subsequent_indent(" ");
327 /// ```
328 ///
329 /// [`self.subsequent_indent`]: #structfield.subsequent_indent
330 pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> {
331 Wrapper { subsequent_indent: indent, ..self }
332 }
333
334 /// Change [`self.break_words`]. This controls if words longer
335 /// than `self.width` can be broken, or if they will be left
336 /// sticking out into the right margin.
337 ///
338 /// [`self.break_words`]: #structfield.break_words
339 pub fn break_words(self, setting: bool) -> Wrapper<'a, S> {
340 Wrapper { break_words: setting, ..self }
341 }
342
343 /// Fill a line of text at `self.width` characters. Strings are
344 /// wrapped based on their displayed width, not their size in
345 /// bytes.
346 ///
347 /// The result is a string with newlines between each line. Use
348 /// the `wrap` method if you need access to the individual lines.
349 ///
350 /// # Complexities
351 ///
352 /// This method simply joins the lines produced by `wrap_iter`. As
353 /// such, it inherits the O(*n*) time and memory complexity where
354 /// *n* is the input string length.
355 ///
356 /// # Examples
357 ///
358 /// ```
359 /// use textwrap::Wrapper;
360 ///
361 /// let wrapper = Wrapper::new(15);
362 /// assert_eq!(wrapper.fill("Memory safety without garbage collection."),
363 /// "Memory safety\nwithout garbage\ncollection.");
364 /// ```
365 pub fn fill(&self, s: &str) -> String {
366 // This will avoid reallocation in simple cases (no
367 // indentation, no hyphenation).
368 let mut result = String::with_capacity(s.len());
369
370 for (i, line) in self.wrap_iter(s).enumerate() {
371 if i > 0 {
372 result.push('\n');
373 }
374 result.push_str(&line);
375 }
376
377 result
378 }
379
380 /// Wrap a line of text at `self.width` characters. Strings are
381 /// wrapped based on their displayed width, not their size in
382 /// bytes.
383 ///
384 /// # Complexities
385 ///
386 /// This method simply collects the lines produced by `wrap_iter`.
387 /// As such, it inherits the O(*n*) overall time and memory
388 /// complexity where *n* is the input string length.
389 ///
390 /// # Examples
391 ///
392 /// ```
393 /// use textwrap::Wrapper;
394 ///
395 /// let wrap15 = Wrapper::new(15);
396 /// assert_eq!(wrap15.wrap("Concurrency without data races."),
397 /// vec!["Concurrency",
398 /// "without data",
399 /// "races."]);
400 ///
401 /// let wrap20 = Wrapper::new(20);
402 /// assert_eq!(wrap20.wrap("Concurrency without data races."),
403 /// vec!["Concurrency without",
404 /// "data races."]);
405 /// ```
406 pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> {
407 self.wrap_iter(s).collect::<Vec<_>>()
408 }
409
410 /// Lazily wrap a line of text at `self.width` characters. Strings
411 /// are wrapped based on their displayed width, not their size in
412 /// bytes.
413 ///
414 /// The [`WordSplitter`] stored in [`self.splitter`] is used
415 /// whenever when a word is too large to fit on the current line.
416 /// By changing the field, different hyphenation strategies can be
417 /// implemented.
418 ///
419 /// # Complexities
420 ///
421 /// This method returns a [`WrapIter`] iterator which borrows this
422 /// `Wrapper`. The algorithm used has a linear complexity, so
423 /// getting the next line from the iterator will take O(*w*) time,
424 /// where *w* is the wrapping width. Fully processing the iterator
425 /// will take O(*n*) time for an input string of length *n*.
426 ///
427 /// When no indentation is used, each line returned is a slice of
428 /// the input string and the memory overhead is thus constant.
429 /// Otherwise new memory is allocated for each line returned.
430 ///
431 /// # Examples
432 ///
433 /// ```
434 /// use std::borrow::Cow;
435 /// use textwrap::Wrapper;
436 ///
437 /// let wrap20 = Wrapper::new(20);
438 /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions.");
439 /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
440 /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
441 /// assert_eq!(wrap20_iter.next(), None);
442 ///
443 /// let wrap25 = Wrapper::new(25);
444 /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions.");
445 /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions.")));
446 /// assert_eq!(wrap25_iter.next(), None);
447 /// ```
448 ///
449 /// [`self.splitter`]: #structfield.splitter
450 /// [`WordSplitter`]: trait.WordSplitter.html
451 /// [`WrapIter`]: struct.WrapIter.html
452 pub fn wrap_iter(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> {
453 WrapIter {
454 wrapper: self,
455 wrap_iter_impl: WrapIterImpl::new(self, s),
456 }
457 }
458
459 /// Lazily wrap a line of text at `self.width` characters. Strings
460 /// are wrapped based on their displayed width, not their size in
461 /// bytes.
462 ///
463 /// The [`WordSplitter`] stored in [`self.splitter`] is used
464 /// whenever when a word is too large to fit on the current line.
465 /// By changing the field, different hyphenation strategies can be
466 /// implemented.
467 ///
468 /// # Complexities
469 ///
470 /// This method consumes the `Wrapper` and returns a
471 /// [`IntoWrapIter`] iterator. Fully processing the iterator has
472 /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is
473 /// the length of the input string.
474 ///
475 /// # Examples
476 ///
477 /// ```
478 /// use std::borrow::Cow;
479 /// use textwrap::Wrapper;
480 ///
481 /// let wrap20 = Wrapper::new(20);
482 /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions.");
483 /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
484 /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
485 /// assert_eq!(wrap20_iter.next(), None);
486 /// ```
487 ///
488 /// [`self.splitter`]: #structfield.splitter
489 /// [`WordSplitter`]: trait.WordSplitter.html
490 /// [`IntoWrapIter`]: struct.IntoWrapIter.html
491 /// [`wrap_iter`]: #method.wrap_iter
492 pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> {
493 let wrap_iter_impl = WrapIterImpl::new(&self, s);
494
495 IntoWrapIter {
496 wrapper: self,
497 wrap_iter_impl: wrap_iter_impl,
498 }
499 }
500 }
501
502
503 /// An iterator over the lines of the input string which owns a
504 /// `Wrapper`. An instance of `IntoWrapIter` is typically obtained
505 /// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`].
506 ///
507 /// Each call of `.next()` method yields a line wrapped in `Some` if the
508 /// input hasn't been fully processed yet. Otherwise it returns `None`.
509 ///
510 /// [`wrap_iter`]: fn.wrap_iter.html
511 /// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
512 #[derive(Debug)]
513 pub struct IntoWrapIter<'a, S: WordSplitter> {
514 wrapper: Wrapper<'a, S>,
515 wrap_iter_impl: WrapIterImpl<'a>,
516 }
517
518 impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> {
519 type Item = Cow<'a, str>;
520
521 fn next(&mut self) -> Option<Cow<'a, str>> {
522 self.wrap_iter_impl.impl_next(&self.wrapper)
523 }
524 }
525
526 /// An iterator over the lines of the input string which borrows a
527 /// `Wrapper`. An instance of `WrapIter` is typically obtained
528 /// through the [`Wrapper::wrap_iter`] method.
529 ///
530 /// Each call of `.next()` method yields a line wrapped in `Some` if the
531 /// input hasn't been fully processed yet. Otherwise it returns `None`.
532 ///
533 /// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter
534 #[derive(Debug)]
535 pub struct WrapIter<'w, 'a: 'w, S: WordSplitter + 'w> {
536 wrapper: &'w Wrapper<'a, S>,
537 wrap_iter_impl: WrapIterImpl<'a>,
538 }
539
540 impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> {
541 type Item = Cow<'a, str>;
542
543 fn next(&mut self) -> Option<Cow<'a, str>> {
544 self.wrap_iter_impl.impl_next(self.wrapper)
545 }
546 }
547
548 struct WrapIterImpl<'a> {
549 // String to wrap.
550 source: &'a str,
551 // CharIndices iterator over self.source.
552 char_indices: CharIndices<'a>,
553 // Is the next element the first one ever produced?
554 is_next_first: bool,
555 // Byte index where the current line starts.
556 start: usize,
557 // Byte index of the last place where the string can be split.
558 split: usize,
559 // Size in bytes of the character at self.source[self.split].
560 split_len: usize,
561 // Width of self.source[self.start..idx].
562 line_width: usize,
563 // Width of self.source[self.start..self.split].
564 line_width_at_split: usize,
565 // Tracking runs of whitespace characters.
566 in_whitespace: bool,
567 // Has iterator finished producing elements?
568 finished: bool,
569 }
570
571 impl<'a> fmt::Debug for WrapIterImpl<'a> {
572 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
573 f.debug_struct("WrapIterImpl")
574 .field("source", &self.source)
575 .field("char_indices", &"CharIndices { ... }")
576 .field("is_next_first", &self.is_next_first)
577 .field("start", &self.start)
578 .field("split", &self.split)
579 .field("split_len", &self.split_len)
580 .field("line_width", &self.line_width)
581 .field("line_width_at_split", &self.line_width_at_split)
582 .field("in_whitespace", &self.in_whitespace)
583 .field("finished", &self.finished)
584 .finish()
585 }
586 }
587
588 impl<'a> WrapIterImpl<'a> {
589 fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> {
590 WrapIterImpl {
591 source: s,
592 char_indices: s.char_indices(),
593 is_next_first: true,
594 start: 0,
595 split: 0,
596 split_len: 0,
597 line_width: wrapper.initial_indent.width(),
598 line_width_at_split: wrapper.initial_indent.width(),
599 in_whitespace: false,
600 finished: false,
601 }
602 }
603
604 fn create_result_line<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> {
605 if self.is_next_first {
606 self.is_next_first = false;
607 Cow::from(wrapper.initial_indent)
608 } else {
609 Cow::from(wrapper.subsequent_indent)
610 }
611 }
612
613 fn impl_next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> {
614 if self.finished {
615 return None;
616 }
617
618 while let Some((idx, ch)) = self.char_indices.next() {
619 let char_width = ch.width().unwrap_or(0);
620 let char_len = ch.len_utf8();
621 if ch.is_whitespace() && ch != NBSP {
622 // Extend the previous split or create a new one.
623 if self.in_whitespace {
624 self.split_len += char_len;
625 } else {
626 self.split = idx;
627 self.split_len = char_len;
628 }
629 self.line_width_at_split = self.line_width + char_width;
630 self.in_whitespace = true;
631 } else if self.line_width + char_width > wrapper.width {
632 // There is no room for this character on the current
633 // line. Try to split the final word.
634 let remaining_text = &self.source[self.split + self.split_len..];
635 let final_word = match remaining_text
636 .find(|ch: char| ch.is_whitespace() && ch != NBSP) {
637 Some(i) => &remaining_text[..i],
638 None => remaining_text,
639 };
640
641 let mut hyphen = "";
642 let splits = wrapper.splitter.split(final_word);
643 for &(head, hyp, _) in splits.iter().rev() {
644 if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width {
645 self.split += head.len();
646 self.split_len = 0;
647 hyphen = hyp;
648 break;
649 }
650 }
651
652 if self.start >= self.split {
653 // The word is too big to fit on a single line, so we
654 // need to split it at the current index.
655 if wrapper.break_words {
656 // Break work at current index.
657 self.split = idx;
658 self.split_len = 0;
659 self.line_width_at_split = self.line_width;
660 } else {
661 // Add smallest split.
662 self.split = self.start + splits[0].0.len();
663 self.split_len = 0;
664 self.line_width_at_split = self.line_width;
665 }
666 }
667
668 if self.start < self.split {
669 let mut result_line = self.create_result_line(wrapper);
670 cow_add_assign(&mut result_line, &self.source[self.start..self.split]);
671 cow_add_assign(&mut result_line, hyphen);
672
673 self.start = self.split + self.split_len;
674 self.line_width += wrapper.subsequent_indent.width();
675 self.line_width -= self.line_width_at_split;
676 self.line_width += char_width;
677
678 return Some(result_line);
679 }
680 } else {
681 self.in_whitespace = false;
682 }
683 self.line_width += char_width;
684 }
685
686 // Add final line.
687 let final_line = if self.start < self.source.len() {
688 let mut result_line = self.create_result_line(wrapper);
689 cow_add_assign(&mut result_line, &self.source[self.start..]);
690
691 Some(result_line)
692 } else {
693 None
694 };
695
696 self.finished = true;
697
698 final_line
699 }
700 }
701
702
703 /// Return the current terminal width. If the terminal width cannot be
704 /// determined (typically because the standard output is not connected
705 /// to a terminal), a default width of 80 characters will be used.
706 ///
707 /// # Examples
708 ///
709 /// Create a `Wrapper` for the current terminal with a two column
710 /// margin:
711 ///
712 /// ```no_run
713 /// # #![allow(unused_variables)]
714 /// use textwrap::{Wrapper, NoHyphenation, termwidth};
715 ///
716 /// let width = termwidth() - 4; // Two columns on each side.
717 /// let wrapper = Wrapper::with_splitter(width, NoHyphenation)
718 /// .initial_indent(" ")
719 /// .subsequent_indent(" ");
720 /// ```
721 #[cfg(feature = "term_size")]
722 pub fn termwidth() -> usize {
723 term_size::dimensions_stdout().map_or(80, |(w, _)| w)
724 }
725
726 /// Fill a line of text at `width` characters. Strings are wrapped
727 /// based on their displayed width, not their size in bytes.
728 ///
729 /// The result is a string with newlines between each line. Use
730 /// [`wrap`] if you need access to the individual lines or
731 /// [`wrap_iter`] for its iterator counterpart.
732 ///
733 /// ```
734 /// use textwrap::fill;
735 ///
736 /// assert_eq!(fill("Memory safety without garbage collection.", 15),
737 /// "Memory safety\nwithout garbage\ncollection.");
738 /// ```
739 ///
740 /// This function creates a Wrapper on the fly with default settings.
741 /// If you need to set a language corpus for automatic hyphenation, or
742 /// need to fill many strings, then it is suggested to create a Wrapper
743 /// and call its [`fill` method].
744 ///
745 /// [`wrap`]: fn.wrap.html
746 /// [`wrap_iter`]: fn.wrap_iter.html
747 /// [`fill` method]: struct.Wrapper.html#method.fill
748 pub fn fill(s: &str, width: usize) -> String {
749 Wrapper::new(width).fill(s)
750 }
751
752 /// Wrap a line of text at `width` characters. Strings are wrapped
753 /// based on their displayed width, not their size in bytes.
754 ///
755 /// This function creates a Wrapper on the fly with default settings.
756 /// If you need to set a language corpus for automatic hyphenation, or
757 /// need to wrap many strings, then it is suggested to create a Wrapper
758 /// and call its [`wrap` method].
759 ///
760 /// The result is a vector of strings. Use [`wrap_iter`] if you need an
761 /// iterator version.
762 ///
763 /// # Examples
764 ///
765 /// ```
766 /// use textwrap::wrap;
767 ///
768 /// assert_eq!(wrap("Concurrency without data races.", 15),
769 /// vec!["Concurrency",
770 /// "without data",
771 /// "races."]);
772 ///
773 /// assert_eq!(wrap("Concurrency without data races.", 20),
774 /// vec!["Concurrency without",
775 /// "data races."]);
776 /// ```
777 ///
778 /// [`wrap_iter`]: fn.wrap_iter.html
779 /// [`wrap` method]: struct.Wrapper.html#method.wrap
780 pub fn wrap(s: &str, width: usize) -> Vec<Cow<str>> {
781 Wrapper::new(width).wrap(s)
782 }
783
784 /// Lazily wrap a line of text at `self.width` characters. Strings are
785 /// wrapped based on their displayed width, not their size in bytes.
786 ///
787 /// This function creates a Wrapper on the fly with default settings.
788 /// It then calls the [`into_wrap_iter`] method. Hence, the return
789 /// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function
790 /// name would otherwise suggest.
791 ///
792 /// If you need to set a language corpus for automatic hyphenation, or
793 /// need to wrap many strings, then it is suggested to create a Wrapper
794 /// and call its [`wrap_iter`] or [`into_wrap_iter`] methods.
795 ///
796 /// # Examples
797 ///
798 /// ```
799 /// use std::borrow::Cow;
800 /// use textwrap::wrap_iter;
801 ///
802 /// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20);
803 /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
804 /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
805 /// assert_eq!(wrap20_iter.next(), None);
806 ///
807 /// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25);
808 /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions.")));
809 /// assert_eq!(wrap25_iter.next(), None);
810 /// ```
811 ///
812 /// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter
813 /// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
814 /// [`IntoWrapIter`]: struct.IntoWrapIter.html
815 /// [`WrapIter`]: struct.WrapIter.html
816 pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<HyphenSplitter> {
817 Wrapper::new(width).into_wrap_iter(s)
818 }
819
820 /// Add prefix to each non-empty line.
821 ///
822 /// ```
823 /// use textwrap::indent;
824 ///
825 /// assert_eq!(indent("Foo\nBar\n", " "), " Foo\n Bar\n");
826 /// ```
827 ///
828 /// Empty lines (lines consisting only of whitespace) are not indented
829 /// and the whitespace is replaced by a single newline (`\n`):
830 ///
831 /// ```
832 /// use textwrap::indent;
833 ///
834 /// assert_eq!(indent("Foo\n\nBar\n \t \nBaz\n", " "),
835 /// " Foo\n\n Bar\n\n Baz\n");
836 /// ```
837 ///
838 /// Leading and trailing whitespace on non-empty lines is kept
839 /// unchanged:
840 ///
841 /// ```
842 /// use textwrap::indent;
843 ///
844 /// assert_eq!(indent(" \t Foo ", " "), " \t Foo \n");
845 /// ```
846 pub fn indent(s: &str, prefix: &str) -> String {
847 let mut result = String::new();
848 for line in s.lines() {
849 if line.chars().any(|c| !c.is_whitespace()) {
850 result.push_str(prefix);
851 result.push_str(line);
852 }
853 result.push('\n');
854 }
855 result
856 }
857
858 /// Removes common leading whitespace from each line.
859 ///
860 /// This will look at each non-empty line and determine the maximum
861 /// amount of whitespace that can be removed from the line.
862 ///
863 /// ```
864 /// use textwrap::dedent;
865 ///
866 /// assert_eq!(dedent(" 1st line\n 2nd line\n"),
867 /// "1st line\n2nd line\n");
868 /// ```
869 pub fn dedent(s: &str) -> String {
870 let mut prefix = String::new();
871 let mut lines = s.lines();
872
873 // We first search for a non-empty line to find a prefix.
874 for line in &mut lines {
875 let whitespace = line.chars()
876 .take_while(|c| c.is_whitespace())
877 .collect::<String>();
878 // Check if the line had anything but whitespace
879 if whitespace.len() < line.len() {
880 prefix = whitespace;
881 break;
882 }
883 }
884
885 // We then continue looking through the remaining lines to
886 // possibly shorten the prefix.
887 for line in &mut lines {
888 let whitespace = line.chars()
889 .zip(prefix.chars())
890 .take_while(|&(a, b)| a == b)
891 .map(|(_, b)| b)
892 .collect::<String>();
893 // Check if we have found a shorter prefix
894 if whitespace.len() < prefix.len() {
895 prefix = whitespace;
896 }
897 }
898
899 // We now go over the lines a second time to build the result.
900 let mut result = String::new();
901 for line in s.lines() {
902 if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
903 let (_, tail) = line.split_at(prefix.len());
904 result.push_str(tail);
905 }
906 result.push('\n');
907 }
908 result
909 }
910
911 #[cfg(test)]
912 mod tests {
913 #[cfg(feature = "hyphenation")]
914 extern crate hyphenation;
915
916 #[cfg(feature = "hyphenation")]
917 use hyphenation::Language;
918 use super::*;
919
920 /// Add newlines. Ensures that the final line in the vector also
921 /// has a newline.
922 fn add_nl(lines: &[&str]) -> String {
923 lines.join("\n") + "\n"
924 }
925
926 #[test]
927 fn no_wrap() {
928 assert_eq!(wrap("foo", 10), vec!["foo"]);
929 }
930
931 #[test]
932 fn simple() {
933 assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
934 }
935
936 #[test]
937 fn multi_word_on_line() {
938 assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
939 }
940
941 #[test]
942 fn long_word() {
943 assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
944 }
945
946 #[test]
947 fn long_words() {
948 assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
949 }
950
951 #[test]
952 fn max_width() {
953 assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
954 }
955
956 #[test]
957 fn leading_whitespace() {
958 assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
959 }
960
961 #[test]
962 fn trailing_whitespace() {
963 assert_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]);
964 }
965
966 #[test]
967 fn interior_whitespace() {
968 assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]);
969 }
970
971 #[test]
972 fn extra_whitespace_start_of_line() {
973 // Whitespace is only significant inside a line. After a line
974 // gets too long and is broken, the first word starts in
975 // column zero and is not indented. The line before might end
976 // up with trailing whitespace.
977 assert_eq!(wrap("foo bar", 5), vec!["foo", "bar"]);
978 }
979
980 #[test]
981 fn wide_character_handling() {
982 assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
983 assert_eq!(wrap("Hello, World!", 15),
984 vec!["Hello,", "World!"]);
985 }
986
987 #[test]
988 fn indent_empty() {
989 let wrapper = Wrapper::new(10).initial_indent("!!!");
990 assert_eq!(wrapper.fill(""), "");
991 }
992
993 #[test]
994 fn indent_single_line() {
995 let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space
996 assert_eq!(wrapper.fill("foo"), ">>>foo");
997 }
998
999 #[test]
1000 fn indent_multiple_lines() {
1001 let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" ");
1002 assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]);
1003 }
1004
1005 #[test]
1006 fn indent_break_words() {
1007 let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" ");
1008 assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]);
1009 }
1010
1011 #[test]
1012 fn hyphens() {
1013 assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
1014 }
1015
1016 #[test]
1017 fn trailing_hyphen() {
1018 let wrapper = Wrapper::new(5).break_words(false);
1019 assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]);
1020 }
1021
1022 #[test]
1023 fn multiple_hyphens() {
1024 assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
1025 }
1026
1027 #[test]
1028 fn hyphens_flag() {
1029 let wrapper = Wrapper::new(5).break_words(false);
1030 assert_eq!(wrapper.wrap("The --foo-bar flag."),
1031 vec!["The", "--foo-", "bar", "flag."]);
1032 }
1033
1034 #[test]
1035 fn repeated_hyphens() {
1036 let wrapper = Wrapper::new(4).break_words(false);
1037 assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]);
1038 }
1039
1040 #[test]
1041 fn hyphens_alphanumeric() {
1042 assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
1043 }
1044
1045 #[test]
1046 fn hyphens_non_alphanumeric() {
1047 let wrapper = Wrapper::new(5).break_words(false);
1048 assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]);
1049 }
1050
1051 #[test]
1052 fn multiple_splits() {
1053 assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
1054 }
1055
1056 #[test]
1057 fn forced_split() {
1058 let wrapper = Wrapper::new(5).break_words(false);
1059 assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]);
1060 }
1061
1062 #[test]
1063 fn no_hyphenation() {
1064 let wrapper = Wrapper::with_splitter(8, NoHyphenation);
1065 assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
1066 }
1067
1068 #[test]
1069 #[cfg(feature = "hyphenation")]
1070 fn auto_hyphenation() {
1071 let corpus = hyphenation::load(Language::English_US).unwrap();
1072 let wrapper = Wrapper::new(10);
1073 assert_eq!(wrapper.wrap("Internationalization"),
1074 vec!["Internatio", "nalization"]);
1075
1076 let wrapper = Wrapper::with_splitter(10, corpus);
1077 assert_eq!(wrapper.wrap("Internationalization"),
1078 vec!["Interna-", "tionaliza-", "tion"]);
1079 }
1080
1081 #[test]
1082 #[cfg(feature = "hyphenation")]
1083 fn borrowed_lines() {
1084 // Lines that end with an extra hyphen are owned, the final
1085 // line is borrowed.
1086 use std::borrow::Cow::{Borrowed, Owned};
1087 let corpus = hyphenation::load(Language::English_US).unwrap();
1088 let wrapper = Wrapper::with_splitter(10, corpus);
1089 let lines = wrapper.wrap("Internationalization");
1090 if let Borrowed(s) = lines[0] {
1091 assert!(false, "should not have been borrowed: {:?}", s);
1092 }
1093 if let Borrowed(s) = lines[1] {
1094 assert!(false, "should not have been borrowed: {:?}", s);
1095 }
1096 if let Owned(ref s) = lines[2] {
1097 assert!(false, "should not have been owned: {:?}", s);
1098 }
1099 }
1100
1101 #[test]
1102 #[cfg(feature = "hyphenation")]
1103 fn auto_hyphenation_with_hyphen() {
1104 let corpus = hyphenation::load(Language::English_US).unwrap();
1105 let wrapper = Wrapper::new(8).break_words(false);
1106 assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]);
1107
1108 let wrapper = Wrapper::with_splitter(8, corpus).break_words(false);
1109 assert_eq!(wrapper.wrap("over-caffinated"),
1110 vec!["over-", "caffi-", "nated"]);
1111 }
1112
1113 #[test]
1114 fn break_words() {
1115 assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
1116 }
1117
1118 #[test]
1119 fn break_words_wide_characters() {
1120 assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]);
1121 }
1122
1123 #[test]
1124 fn break_words_zero_width() {
1125 assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
1126 }
1127
1128 #[test]
1129 fn test_non_breaking_space() {
1130 let wrapper = Wrapper::new(5).break_words(false);
1131 assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz");
1132 }
1133
1134 #[test]
1135 fn test_non_breaking_hyphen() {
1136 let wrapper = Wrapper::new(5).break_words(false);
1137 assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz");
1138 }
1139
1140 #[test]
1141 fn test_fill() {
1142 assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz");
1143 }
1144
1145 #[test]
1146 fn test_indent_empty() {
1147 assert_eq!(indent("\n", " "), "\n");
1148 }
1149
1150 #[test]
1151 #[cfg_attr(rustfmt, rustfmt_skip)]
1152 fn test_indent_nonempty() {
1153 let x = vec![" foo",
1154 "bar",
1155 " baz"];
1156 let y = vec!["// foo",
1157 "//bar",
1158 "// baz"];
1159 assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
1160 }
1161
1162 #[test]
1163 #[cfg_attr(rustfmt, rustfmt_skip)]
1164 fn test_indent_empty_line() {
1165 let x = vec![" foo",
1166 "bar",
1167 "",
1168 " baz"];
1169 let y = vec!["// foo",
1170 "//bar",
1171 "",
1172 "// baz"];
1173 assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
1174 }
1175
1176 #[test]
1177 fn test_dedent_empty() {
1178 assert_eq!(dedent(""), "");
1179 }
1180
1181 #[test]
1182 #[cfg_attr(rustfmt, rustfmt_skip)]
1183 fn test_dedent_multi_line() {
1184 let x = vec![" foo",
1185 " bar",
1186 " baz"];
1187 let y = vec![" foo",
1188 "bar",
1189 " baz"];
1190 assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
1191 }
1192
1193 #[test]
1194 #[cfg_attr(rustfmt, rustfmt_skip)]
1195 fn test_dedent_empty_line() {
1196 let x = vec![" foo",
1197 " bar",
1198 " ",
1199 " baz"];
1200 let y = vec![" foo",
1201 "bar",
1202 "",
1203 " baz"];
1204 assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
1205 }
1206
1207 #[test]
1208 #[cfg_attr(rustfmt, rustfmt_skip)]
1209 fn test_dedent_mixed_whitespace() {
1210 let x = vec!["\tfoo",
1211 " bar"];
1212 let y = vec!["\tfoo",
1213 " bar"];
1214 assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
1215 }
1216 }