]> git.proxmox.com Git - rustc.git/blame - vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
New upstream version 1.63.0+dfsg1
[rustc.git] / vendor / textwrap / src / wrap_algorithms / optimal_fit.rs
CommitLineData
04454e1e
FG
1use std::cell::RefCell;
2
923072b8 3use crate::core::Fragment;
04454e1e 4
923072b8
FG
5/// Penalties for
6/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
7/// and [`wrap_optimal_fit`].
04454e1e 8///
923072b8
FG
9/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
10/// entire paragraph to find optimal line breaks. When wrapping text,
11/// "penalties" are assigned to line breaks based on the gaps left at
12/// the end of lines. The penalties are given by this struct, with
13/// [`Penalties::default`] assigning penalties that work well for
14/// monospace text.
15///
16/// If you are wrapping proportional text, you are advised to assign
17/// your own penalties according to your font size. See the individual
18/// penalties below for details.
04454e1e
FG
19///
20/// **Note:** Only available when the `smawk` Cargo feature is
21/// enabled.
923072b8
FG
22#[derive(Clone, Copy, Debug)]
23pub struct Penalties {
24 /// Per-line penalty. This is added for every line, which makes it
25 /// expensive to output more lines than the minimum required.
26 pub nline_penalty: usize,
27
28 /// Per-character cost for lines that overflow the target line width.
29 ///
30 /// With a default value of 50², every single character costs as
31 /// much as leaving a gap of 50 characters behind. This is because
32 /// we assign as cost of `gap * gap` to a short line. When
33 /// wrapping monospace text, we can overflow the line by 1
34 /// character in extreme cases:
35 ///
36 /// ```
37 /// use textwrap::core::Word;
38 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
39 ///
40 /// let short = "foo ";
41 /// let long = "x".repeat(50);
42 /// let length = (short.len() + long.len()) as f64;
43 /// let fragments = vec![Word::from(short), Word::from(&long)];
44 /// let penalties = Penalties::new();
45 ///
46 /// // Perfect fit, both words are on a single line with no overflow.
47 /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
48 /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
49 ///
50 /// // The words no longer fit, yet we get a single line back. While
51 /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
52 /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
53 /// // which makes it cheaper to overflow than to use two lines.
54 /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
55 /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
56 ///
57 /// // The cost of overflow would be 2 * 2500, whereas the cost of
58 /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
59 /// // 3401`. We therefore get two lines.
60 /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
61 /// assert_eq!(wrapped, vec![&[Word::from(short)],
62 /// &[Word::from(&long)]]);
63 /// ```
64 ///
65 /// This only happens if the overflowing word is 50 characters
66 /// long _and_ if the word overflows the line by exactly one
67 /// character. If it overflows by more than one character, the
68 /// overflow penalty will quickly outgrow the cost of the gap, as
69 /// seen above.
70 pub overflow_penalty: usize,
71
72 /// When should the a single word on the last line be considered
73 /// "too short"?
74 ///
75 /// If the last line of the text consist of a single word and if
76 /// this word is shorter than `1 / short_last_line_fraction` of
77 /// the line width, then the final line will be considered "short"
78 /// and `short_last_line_penalty` is added as an extra penalty.
79 ///
80 /// The effect of this is to avoid a final line consisting of a
81 /// single small word. For example, with a
82 /// `short_last_line_penalty` of 25 (the default), a gap of up to
83 /// 5 columns will be seen as more desirable than having a final
84 /// short line.
85 ///
86 /// ## Examples
87 ///
88 /// ```
89 /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
90 ///
91 /// let text = "This is a demo of the short last line penalty.";
92 ///
93 /// // The first-fit algorithm leaves a single short word on the last line:
94 /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
95 /// vec!["This is a demo of the short last line",
96 /// "penalty."]);
97 ///
98 /// #[cfg(feature = "smawk")] {
99 /// let mut penalties = wrap_algorithms::Penalties::new();
100 ///
101 /// // Since "penalty." is shorter than 25% of the line width, the
102 /// // optimal-fit algorithm adds a penalty of 25. This is enough
103 /// // to move "line " down:
104 /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
105 /// vec!["This is a demo of the short last",
106 /// "line penalty."]);
107 ///
108 /// // We can change the meaning of "short" lines. Here, only words
109 /// // shorter than 1/10th of the line width will be considered short:
110 /// penalties.short_last_line_fraction = 10;
111 /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
112 /// vec!["This is a demo of the short last line",
113 /// "penalty."]);
114 ///
115 /// // If desired, the penalty can also be disabled:
116 /// penalties.short_last_line_fraction = 4;
117 /// penalties.short_last_line_penalty = 0;
118 /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
119 /// vec!["This is a demo of the short last line",
120 /// "penalty."]);
121 /// }
122 /// ```
123 pub short_last_line_fraction: usize,
124
125 /// Penalty for a last line with a single short word.
126 ///
127 /// Set this to zero if you do not want to penalize short last lines.
128 pub short_last_line_penalty: usize,
129
130 /// Penalty for lines ending with a hyphen.
131 pub hyphen_penalty: usize,
132}
04454e1e 133
923072b8
FG
134impl Penalties {
135 /// Default penalties for monospace text.
136 ///
137 /// The penalties here work well for monospace text. This is
138 /// because they expect the gaps at the end of lines to be roughly
139 /// in the range `0..100`. If the gaps are larger, the
140 /// `overflow_penalty` and `hyphen_penalty` become insignificant.
141 pub const fn new() -> Self {
142 Penalties {
143 nline_penalty: 1000,
144 overflow_penalty: 50 * 50,
145 short_last_line_fraction: 4,
146 short_last_line_penalty: 25,
147 hyphen_penalty: 25,
148 }
149 }
150}
151
152impl Default for Penalties {
153 fn default() -> Self {
154 Self::new()
04454e1e
FG
155 }
156}
157
158/// Cache for line numbers. This is necessary to avoid a O(n**2)
159/// behavior when computing line numbers in [`wrap_optimal_fit`].
160struct LineNumbers {
161 line_numbers: RefCell<Vec<usize>>,
162}
163
164impl LineNumbers {
165 fn new(size: usize) -> Self {
166 let mut line_numbers = Vec::with_capacity(size);
167 line_numbers.push(0);
168 LineNumbers {
169 line_numbers: RefCell::new(line_numbers),
170 }
171 }
172
173 fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
174 while self.line_numbers.borrow_mut().len() < i + 1 {
175 let pos = self.line_numbers.borrow().len();
923072b8 176 let line_number = 1 + self.get(minima[pos].0, minima);
04454e1e
FG
177 self.line_numbers.borrow_mut().push(line_number);
178 }
179
180 self.line_numbers.borrow()[i]
181 }
182}
183
923072b8
FG
184/// Overflow error during the [`wrap_optimal_fit`] computation.
185#[derive(Debug, PartialEq, Eq)]
186pub struct OverflowError;
04454e1e 187
923072b8
FG
188impl std::fmt::Display for OverflowError {
189 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190 write!(f, "wrap_optimal_fit cost computation overflowed")
191 }
192}
04454e1e 193
923072b8 194impl std::error::Error for OverflowError {}
04454e1e
FG
195
196/// Wrap abstract fragments into lines with an optimal-fit algorithm.
197///
198/// The `line_widths` slice gives the target line width for each line
199/// (the last slice element is repeated as necessary). This can be
200/// used to implement hanging indentation.
201///
202/// The fragments must already have been split into the desired
203/// widths, this function will not (and cannot) attempt to split them
204/// further when arranging them into lines.
205///
206/// # Optimal-Fit Algorithm
207///
208/// The algorithm considers all possible break points and picks the
209/// breaks which minimizes the gaps at the end of each line. More
210/// precisely, the algorithm assigns a cost or penalty to each break
211/// point, determined by `cost = gap * gap` where `gap = target_width -
212/// line_width`. Shorter lines are thus penalized more heavily since
213/// they leave behind a larger gap.
214///
215/// We can illustrate this with the text “To be, or not to be: that is
216/// the question”. We will be wrapping it in a narrow column with room
217/// for only 10 characters. The [greedy
218/// algorithm](super::wrap_first_fit) will produce these lines, each
219/// annotated with the corresponding penalty:
220///
221/// ```text
222/// "To be, or" 1² = 1
223/// "not to be:" 0² = 0
224/// "that is" 3² = 9
225/// "the" 7² = 49
226/// "question" 2² = 4
227/// ```
228///
229/// We see that line four with “the” leaves a gap of 7 columns, which
230/// gives it a penalty of 49. The sum of the penalties is 63.
231///
232/// There are 10 words, which means that there are `2_u32.pow(9)` or
233/// 512 different ways to typeset it. We can compute
234/// the sum of the penalties for each possible line break and search
235/// for the one with the lowest sum:
236///
237/// ```text
238/// "To be," 4² = 16
239/// "or not to" 1² = 1
240/// "be: that" 2² = 4
241/// "is the" 4² = 16
242/// "question" 2² = 4
243/// ```
244///
245/// The sum of the penalties is 41, which is better than what the
246/// greedy algorithm produced.
247///
248/// Searching through all possible combinations would normally be
249/// prohibitively slow. However, it turns out that the problem can be
250/// formulated as the task of finding column minima in a cost matrix.
251/// This matrix has a special form (totally monotone) which lets us
252/// use a [linear-time algorithm called
253/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
254/// points.
255///
256/// This means that the time complexity remains O(_n_) where _n_ is
257/// the number of words. Compared to
258/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
259/// 4 times slower.
260///
261/// The optimization of per-line costs over the entire paragraph is
262/// inspired by the line breaking algorithm used in TeX, as described
263/// in the 1981 article [_Breaking Paragraphs into
264/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
265/// by Knuth and Plass. The implementation here is based on [Python
266/// code by David
267/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
268///
923072b8
FG
269/// # Errors
270///
271/// In case of an overflow during the cost computation, an `Err` is
272/// returned. Overflows happens when fragments or lines have infinite
273/// widths (`f64::INFINITY`) or if the widths are so large that the
274/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
275/// (approximately 1e154):
276///
277/// ```
278/// use textwrap::core::Fragment;
279/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
280///
281/// #[derive(Debug, PartialEq)]
282/// struct Word(f64);
283///
284/// impl Fragment for Word {
285/// fn width(&self) -> f64 { self.0 }
286/// fn whitespace_width(&self) -> f64 { 1.0 }
287/// fn penalty_width(&self) -> f64 { 0.0 }
288/// }
289///
290/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
291/// // larger than f64::MAX:
292/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
293/// Err(OverflowError));
294/// ```
295///
296/// When using fragment widths and line widths which fit inside an
297/// `u64`, overflows cannot happen. This means that fragments derived
298/// from a `&str` cannot cause overflows.
299///
04454e1e
FG
300/// **Note:** Only available when the `smawk` Cargo feature is
301/// enabled.
302pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
303 fragments: &'a [T],
923072b8
FG
304 line_widths: &'b [f64],
305 penalties: &'b Penalties,
306) -> Result<Vec<&'a [T]>, OverflowError> {
04454e1e 307 // The final line width is used for all remaining lines.
923072b8 308 let default_line_width = line_widths.last().copied().unwrap_or(0.0);
04454e1e 309 let mut widths = Vec::with_capacity(fragments.len() + 1);
923072b8 310 let mut width = 0.0;
04454e1e
FG
311 widths.push(width);
312 for fragment in fragments {
313 width += fragment.width() + fragment.whitespace_width();
314 widths.push(width);
315 }
316
317 let line_numbers = LineNumbers::new(fragments.len());
318
923072b8 319 let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
04454e1e 320 // Line number for fragment `i`.
923072b8 321 let line_number = line_numbers.get(i, minima);
04454e1e
FG
322 let line_width = line_widths
323 .get(line_number)
324 .copied()
325 .unwrap_or(default_line_width);
923072b8 326 let target_width = line_width.max(1.0);
04454e1e
FG
327
328 // Compute the width of a line spanning fragments[i..j] in
329 // constant time. We need to adjust widths[j] by subtracting
923072b8 330 // the whitespace of fragment[j-1] and then add the penalty.
04454e1e
FG
331 let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
332 + fragments[j - 1].penalty_width();
333
334 // We compute cost of the line containing fragments[i..j]. We
335 // start with values[i].1, which is the optimal cost for
336 // breaking before fragments[i].
337 //
338 // First, every extra line cost NLINE_PENALTY.
923072b8 339 let mut cost = minima[i].1 + penalties.nline_penalty as f64;
04454e1e
FG
340
341 // Next, we add a penalty depending on the line length.
342 if line_width > target_width {
343 // Lines that overflow get a hefty penalty.
923072b8
FG
344 let overflow = line_width - target_width;
345 cost += overflow * penalties.overflow_penalty as f64;
04454e1e
FG
346 } else if j < fragments.len() {
347 // Other lines (except for the last line) get a milder
348 // penalty which depend on the size of the gap.
923072b8 349 let gap = target_width - line_width;
04454e1e 350 cost += gap * gap;
923072b8
FG
351 } else if i + 1 == j
352 && line_width < target_width / penalties.short_last_line_fraction as f64
353 {
04454e1e
FG
354 // The last line can have any size gap, but we do add a
355 // penalty if the line is very short (typically because it
356 // contains just a single word).
923072b8 357 cost += penalties.short_last_line_penalty as f64;
04454e1e
FG
358 }
359
360 // Finally, we discourage hyphens.
923072b8 361 if fragments[j - 1].penalty_width() > 0.0 {
04454e1e
FG
362 // TODO: this should use a penalty value from the fragment
363 // instead.
923072b8 364 cost += penalties.hyphen_penalty as f64;
04454e1e
FG
365 }
366
367 cost
368 });
369
923072b8
FG
370 for (_, cost) in &minima {
371 if cost.is_infinite() {
372 return Err(OverflowError);
373 }
374 }
375
04454e1e
FG
376 let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
377 let mut pos = fragments.len();
378 loop {
379 let prev = minima[pos].0;
380 lines.push(&fragments[prev..pos]);
381 pos = prev;
382 if pos == 0 {
383 break;
384 }
385 }
386
387 lines.reverse();
923072b8
FG
388 Ok(lines)
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[derive(Debug, PartialEq)]
396 struct Word(f64);
397
398 #[rustfmt::skip]
399 impl Fragment for Word {
400 fn width(&self) -> f64 { self.0 }
401 fn whitespace_width(&self) -> f64 { 1.0 }
402 fn penalty_width(&self) -> f64 { 0.0 }
403 }
404
405 #[test]
406 fn wrap_fragments_with_infinite_widths() {
407 let words = vec![Word(f64::INFINITY)];
408 assert_eq!(
409 wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
410 Err(OverflowError)
411 );
412 }
413
414 #[test]
415 fn wrap_fragments_with_huge_widths() {
416 let words = vec![Word(1e200), Word(1e250), Word(1e300)];
417 assert_eq!(
418 wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
419 Err(OverflowError)
420 );
421 }
422
423 #[test]
424 fn wrap_fragments_with_large_widths() {
425 // The gaps will be of the sizes between 1e25 and 1e75. This
426 // makes the `gap * gap` cost fit comfortably in a f64.
427 let words = vec![Word(1e25), Word(1e50), Word(1e75)];
428 assert_eq!(
429 wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
430 Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
431 );
432 }
04454e1e 433}