]>
Commit | Line | Data |
---|---|---|
04454e1e FG |
1 | use std::cell::RefCell; |
2 | ||
923072b8 | 3 | use crate::core::Fragment; |
04454e1e | 4 | |
923072b8 FG |
5 | /// Penalties for |
6 | /// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit) | |
7 | /// and [`wrap_optimal_fit`]. | |
04454e1e | 8 | /// |
923072b8 FG |
9 | /// This wrapping algorithm in [`wrap_optimal_fit`] considers the |
10 | /// entire paragraph to find optimal line breaks. When wrapping text, | |
11 | /// "penalties" are assigned to line breaks based on the gaps left at | |
12 | /// the end of lines. The penalties are given by this struct, with | |
13 | /// [`Penalties::default`] assigning penalties that work well for | |
14 | /// monospace text. | |
15 | /// | |
16 | /// If you are wrapping proportional text, you are advised to assign | |
17 | /// your own penalties according to your font size. See the individual | |
18 | /// penalties below for details. | |
04454e1e FG |
19 | /// |
20 | /// **Note:** Only available when the `smawk` Cargo feature is | |
21 | /// enabled. | |
923072b8 FG |
22 | #[derive(Clone, Copy, Debug)] |
23 | pub struct Penalties { | |
24 | /// Per-line penalty. This is added for every line, which makes it | |
25 | /// expensive to output more lines than the minimum required. | |
26 | pub nline_penalty: usize, | |
27 | ||
28 | /// Per-character cost for lines that overflow the target line width. | |
29 | /// | |
30 | /// With a default value of 50², every single character costs as | |
31 | /// much as leaving a gap of 50 characters behind. This is because | |
32 | /// we assign as cost of `gap * gap` to a short line. When | |
33 | /// wrapping monospace text, we can overflow the line by 1 | |
34 | /// character in extreme cases: | |
35 | /// | |
36 | /// ``` | |
37 | /// use textwrap::core::Word; | |
38 | /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; | |
39 | /// | |
40 | /// let short = "foo "; | |
41 | /// let long = "x".repeat(50); | |
42 | /// let length = (short.len() + long.len()) as f64; | |
43 | /// let fragments = vec![Word::from(short), Word::from(&long)]; | |
44 | /// let penalties = Penalties::new(); | |
45 | /// | |
46 | /// // Perfect fit, both words are on a single line with no overflow. | |
47 | /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); | |
48 | /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); | |
49 | /// | |
50 | /// // The words no longer fit, yet we get a single line back. While | |
51 | /// // the cost of overflow (`1 * 2500`) is the same as the cost of the | |
52 | /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` | |
53 | /// // which makes it cheaper to overflow than to use two lines. | |
54 | /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); | |
55 | /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); | |
56 | /// | |
57 | /// // The cost of overflow would be 2 * 2500, whereas the cost of | |
58 | /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = | |
59 | /// // 3401`. We therefore get two lines. | |
60 | /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); | |
61 | /// assert_eq!(wrapped, vec![&[Word::from(short)], | |
62 | /// &[Word::from(&long)]]); | |
63 | /// ``` | |
64 | /// | |
65 | /// This only happens if the overflowing word is 50 characters | |
66 | /// long _and_ if the word overflows the line by exactly one | |
67 | /// character. If it overflows by more than one character, the | |
68 | /// overflow penalty will quickly outgrow the cost of the gap, as | |
69 | /// seen above. | |
70 | pub overflow_penalty: usize, | |
71 | ||
72 | /// When should the a single word on the last line be considered | |
73 | /// "too short"? | |
74 | /// | |
75 | /// If the last line of the text consist of a single word and if | |
76 | /// this word is shorter than `1 / short_last_line_fraction` of | |
77 | /// the line width, then the final line will be considered "short" | |
78 | /// and `short_last_line_penalty` is added as an extra penalty. | |
79 | /// | |
80 | /// The effect of this is to avoid a final line consisting of a | |
81 | /// single small word. For example, with a | |
82 | /// `short_last_line_penalty` of 25 (the default), a gap of up to | |
83 | /// 5 columns will be seen as more desirable than having a final | |
84 | /// short line. | |
85 | /// | |
86 | /// ## Examples | |
87 | /// | |
88 | /// ``` | |
89 | /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm}; | |
90 | /// | |
91 | /// let text = "This is a demo of the short last line penalty."; | |
92 | /// | |
93 | /// // The first-fit algorithm leaves a single short word on the last line: | |
94 | /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)), | |
95 | /// vec!["This is a demo of the short last line", | |
96 | /// "penalty."]); | |
97 | /// | |
98 | /// #[cfg(feature = "smawk")] { | |
99 | /// let mut penalties = wrap_algorithms::Penalties::new(); | |
100 | /// | |
101 | /// // Since "penalty." is shorter than 25% of the line width, the | |
102 | /// // optimal-fit algorithm adds a penalty of 25. This is enough | |
103 | /// // to move "line " down: | |
104 | /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), | |
105 | /// vec!["This is a demo of the short last", | |
106 | /// "line penalty."]); | |
107 | /// | |
108 | /// // We can change the meaning of "short" lines. Here, only words | |
109 | /// // shorter than 1/10th of the line width will be considered short: | |
110 | /// penalties.short_last_line_fraction = 10; | |
111 | /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), | |
112 | /// vec!["This is a demo of the short last line", | |
113 | /// "penalty."]); | |
114 | /// | |
115 | /// // If desired, the penalty can also be disabled: | |
116 | /// penalties.short_last_line_fraction = 4; | |
117 | /// penalties.short_last_line_penalty = 0; | |
118 | /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), | |
119 | /// vec!["This is a demo of the short last line", | |
120 | /// "penalty."]); | |
121 | /// } | |
122 | /// ``` | |
123 | pub short_last_line_fraction: usize, | |
124 | ||
125 | /// Penalty for a last line with a single short word. | |
126 | /// | |
127 | /// Set this to zero if you do not want to penalize short last lines. | |
128 | pub short_last_line_penalty: usize, | |
129 | ||
130 | /// Penalty for lines ending with a hyphen. | |
131 | pub hyphen_penalty: usize, | |
132 | } | |
04454e1e | 133 | |
923072b8 FG |
134 | impl Penalties { |
135 | /// Default penalties for monospace text. | |
136 | /// | |
137 | /// The penalties here work well for monospace text. This is | |
138 | /// because they expect the gaps at the end of lines to be roughly | |
139 | /// in the range `0..100`. If the gaps are larger, the | |
140 | /// `overflow_penalty` and `hyphen_penalty` become insignificant. | |
141 | pub const fn new() -> Self { | |
142 | Penalties { | |
143 | nline_penalty: 1000, | |
144 | overflow_penalty: 50 * 50, | |
145 | short_last_line_fraction: 4, | |
146 | short_last_line_penalty: 25, | |
147 | hyphen_penalty: 25, | |
148 | } | |
149 | } | |
150 | } | |
151 | ||
152 | impl Default for Penalties { | |
153 | fn default() -> Self { | |
154 | Self::new() | |
04454e1e FG |
155 | } |
156 | } | |
157 | ||
158 | /// Cache for line numbers. This is necessary to avoid a O(n**2) | |
159 | /// behavior when computing line numbers in [`wrap_optimal_fit`]. | |
160 | struct LineNumbers { | |
161 | line_numbers: RefCell<Vec<usize>>, | |
162 | } | |
163 | ||
164 | impl LineNumbers { | |
165 | fn new(size: usize) -> Self { | |
166 | let mut line_numbers = Vec::with_capacity(size); | |
167 | line_numbers.push(0); | |
168 | LineNumbers { | |
169 | line_numbers: RefCell::new(line_numbers), | |
170 | } | |
171 | } | |
172 | ||
173 | fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize { | |
174 | while self.line_numbers.borrow_mut().len() < i + 1 { | |
175 | let pos = self.line_numbers.borrow().len(); | |
923072b8 | 176 | let line_number = 1 + self.get(minima[pos].0, minima); |
04454e1e FG |
177 | self.line_numbers.borrow_mut().push(line_number); |
178 | } | |
179 | ||
180 | self.line_numbers.borrow()[i] | |
181 | } | |
182 | } | |
183 | ||
923072b8 FG |
184 | /// Overflow error during the [`wrap_optimal_fit`] computation. |
185 | #[derive(Debug, PartialEq, Eq)] | |
186 | pub struct OverflowError; | |
04454e1e | 187 | |
923072b8 FG |
188 | impl std::fmt::Display for OverflowError { |
189 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
190 | write!(f, "wrap_optimal_fit cost computation overflowed") | |
191 | } | |
192 | } | |
04454e1e | 193 | |
923072b8 | 194 | impl std::error::Error for OverflowError {} |
04454e1e FG |
195 | |
196 | /// Wrap abstract fragments into lines with an optimal-fit algorithm. | |
197 | /// | |
198 | /// The `line_widths` slice gives the target line width for each line | |
199 | /// (the last slice element is repeated as necessary). This can be | |
200 | /// used to implement hanging indentation. | |
201 | /// | |
202 | /// The fragments must already have been split into the desired | |
203 | /// widths, this function will not (and cannot) attempt to split them | |
204 | /// further when arranging them into lines. | |
205 | /// | |
206 | /// # Optimal-Fit Algorithm | |
207 | /// | |
208 | /// The algorithm considers all possible break points and picks the | |
209 | /// breaks which minimizes the gaps at the end of each line. More | |
210 | /// precisely, the algorithm assigns a cost or penalty to each break | |
211 | /// point, determined by `cost = gap * gap` where `gap = target_width - | |
212 | /// line_width`. Shorter lines are thus penalized more heavily since | |
213 | /// they leave behind a larger gap. | |
214 | /// | |
215 | /// We can illustrate this with the text “To be, or not to be: that is | |
216 | /// the question”. We will be wrapping it in a narrow column with room | |
217 | /// for only 10 characters. The [greedy | |
218 | /// algorithm](super::wrap_first_fit) will produce these lines, each | |
219 | /// annotated with the corresponding penalty: | |
220 | /// | |
221 | /// ```text | |
222 | /// "To be, or" 1² = 1 | |
223 | /// "not to be:" 0² = 0 | |
224 | /// "that is" 3² = 9 | |
225 | /// "the" 7² = 49 | |
226 | /// "question" 2² = 4 | |
227 | /// ``` | |
228 | /// | |
229 | /// We see that line four with “the” leaves a gap of 7 columns, which | |
230 | /// gives it a penalty of 49. The sum of the penalties is 63. | |
231 | /// | |
232 | /// There are 10 words, which means that there are `2_u32.pow(9)` or | |
233 | /// 512 different ways to typeset it. We can compute | |
234 | /// the sum of the penalties for each possible line break and search | |
235 | /// for the one with the lowest sum: | |
236 | /// | |
237 | /// ```text | |
238 | /// "To be," 4² = 16 | |
239 | /// "or not to" 1² = 1 | |
240 | /// "be: that" 2² = 4 | |
241 | /// "is the" 4² = 16 | |
242 | /// "question" 2² = 4 | |
243 | /// ``` | |
244 | /// | |
245 | /// The sum of the penalties is 41, which is better than what the | |
246 | /// greedy algorithm produced. | |
247 | /// | |
248 | /// Searching through all possible combinations would normally be | |
249 | /// prohibitively slow. However, it turns out that the problem can be | |
250 | /// formulated as the task of finding column minima in a cost matrix. | |
251 | /// This matrix has a special form (totally monotone) which lets us | |
252 | /// use a [linear-time algorithm called | |
253 | /// SMAWK](https://lib.rs/crates/smawk) to find the optimal break | |
254 | /// points. | |
255 | /// | |
256 | /// This means that the time complexity remains O(_n_) where _n_ is | |
257 | /// the number of words. Compared to | |
258 | /// [`wrap_first_fit`](super::wrap_first_fit), this function is about | |
259 | /// 4 times slower. | |
260 | /// | |
261 | /// The optimization of per-line costs over the entire paragraph is | |
262 | /// inspired by the line breaking algorithm used in TeX, as described | |
263 | /// in the 1981 article [_Breaking Paragraphs into | |
264 | /// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf) | |
265 | /// by Knuth and Plass. The implementation here is based on [Python | |
266 | /// code by David | |
267 | /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py). | |
268 | /// | |
923072b8 FG |
269 | /// # Errors |
270 | /// | |
271 | /// In case of an overflow during the cost computation, an `Err` is | |
272 | /// returned. Overflows happens when fragments or lines have infinite | |
273 | /// widths (`f64::INFINITY`) or if the widths are so large that the | |
274 | /// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()` | |
275 | /// (approximately 1e154): | |
276 | /// | |
277 | /// ``` | |
278 | /// use textwrap::core::Fragment; | |
279 | /// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties}; | |
280 | /// | |
281 | /// #[derive(Debug, PartialEq)] | |
282 | /// struct Word(f64); | |
283 | /// | |
284 | /// impl Fragment for Word { | |
285 | /// fn width(&self) -> f64 { self.0 } | |
286 | /// fn whitespace_width(&self) -> f64 { 1.0 } | |
287 | /// fn penalty_width(&self) -> f64 { 0.0 } | |
288 | /// } | |
289 | /// | |
290 | /// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is | |
291 | /// // larger than f64::MAX: | |
292 | /// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), | |
293 | /// Err(OverflowError)); | |
294 | /// ``` | |
295 | /// | |
296 | /// When using fragment widths and line widths which fit inside an | |
297 | /// `u64`, overflows cannot happen. This means that fragments derived | |
298 | /// from a `&str` cannot cause overflows. | |
299 | /// | |
04454e1e FG |
300 | /// **Note:** Only available when the `smawk` Cargo feature is |
301 | /// enabled. | |
302 | pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( | |
303 | fragments: &'a [T], | |
923072b8 FG |
304 | line_widths: &'b [f64], |
305 | penalties: &'b Penalties, | |
306 | ) -> Result<Vec<&'a [T]>, OverflowError> { | |
04454e1e | 307 | // The final line width is used for all remaining lines. |
923072b8 | 308 | let default_line_width = line_widths.last().copied().unwrap_or(0.0); |
04454e1e | 309 | let mut widths = Vec::with_capacity(fragments.len() + 1); |
923072b8 | 310 | let mut width = 0.0; |
04454e1e FG |
311 | widths.push(width); |
312 | for fragment in fragments { | |
313 | width += fragment.width() + fragment.whitespace_width(); | |
314 | widths.push(width); | |
315 | } | |
316 | ||
317 | let line_numbers = LineNumbers::new(fragments.len()); | |
318 | ||
923072b8 | 319 | let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| { |
04454e1e | 320 | // Line number for fragment `i`. |
923072b8 | 321 | let line_number = line_numbers.get(i, minima); |
04454e1e FG |
322 | let line_width = line_widths |
323 | .get(line_number) | |
324 | .copied() | |
325 | .unwrap_or(default_line_width); | |
923072b8 | 326 | let target_width = line_width.max(1.0); |
04454e1e FG |
327 | |
328 | // Compute the width of a line spanning fragments[i..j] in | |
329 | // constant time. We need to adjust widths[j] by subtracting | |
923072b8 | 330 | // the whitespace of fragment[j-1] and then add the penalty. |
04454e1e FG |
331 | let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() |
332 | + fragments[j - 1].penalty_width(); | |
333 | ||
334 | // We compute cost of the line containing fragments[i..j]. We | |
335 | // start with values[i].1, which is the optimal cost for | |
336 | // breaking before fragments[i]. | |
337 | // | |
338 | // First, every extra line cost NLINE_PENALTY. | |
923072b8 | 339 | let mut cost = minima[i].1 + penalties.nline_penalty as f64; |
04454e1e FG |
340 | |
341 | // Next, we add a penalty depending on the line length. | |
342 | if line_width > target_width { | |
343 | // Lines that overflow get a hefty penalty. | |
923072b8 FG |
344 | let overflow = line_width - target_width; |
345 | cost += overflow * penalties.overflow_penalty as f64; | |
04454e1e FG |
346 | } else if j < fragments.len() { |
347 | // Other lines (except for the last line) get a milder | |
348 | // penalty which depend on the size of the gap. | |
923072b8 | 349 | let gap = target_width - line_width; |
04454e1e | 350 | cost += gap * gap; |
923072b8 FG |
351 | } else if i + 1 == j |
352 | && line_width < target_width / penalties.short_last_line_fraction as f64 | |
353 | { | |
04454e1e FG |
354 | // The last line can have any size gap, but we do add a |
355 | // penalty if the line is very short (typically because it | |
356 | // contains just a single word). | |
923072b8 | 357 | cost += penalties.short_last_line_penalty as f64; |
04454e1e FG |
358 | } |
359 | ||
360 | // Finally, we discourage hyphens. | |
923072b8 | 361 | if fragments[j - 1].penalty_width() > 0.0 { |
04454e1e FG |
362 | // TODO: this should use a penalty value from the fragment |
363 | // instead. | |
923072b8 | 364 | cost += penalties.hyphen_penalty as f64; |
04454e1e FG |
365 | } |
366 | ||
367 | cost | |
368 | }); | |
369 | ||
923072b8 FG |
370 | for (_, cost) in &minima { |
371 | if cost.is_infinite() { | |
372 | return Err(OverflowError); | |
373 | } | |
374 | } | |
375 | ||
04454e1e FG |
376 | let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima)); |
377 | let mut pos = fragments.len(); | |
378 | loop { | |
379 | let prev = minima[pos].0; | |
380 | lines.push(&fragments[prev..pos]); | |
381 | pos = prev; | |
382 | if pos == 0 { | |
383 | break; | |
384 | } | |
385 | } | |
386 | ||
387 | lines.reverse(); | |
923072b8 FG |
388 | Ok(lines) |
389 | } | |
390 | ||
391 | #[cfg(test)] | |
392 | mod tests { | |
393 | use super::*; | |
394 | ||
395 | #[derive(Debug, PartialEq)] | |
396 | struct Word(f64); | |
397 | ||
398 | #[rustfmt::skip] | |
399 | impl Fragment for Word { | |
400 | fn width(&self) -> f64 { self.0 } | |
401 | fn whitespace_width(&self) -> f64 { 1.0 } | |
402 | fn penalty_width(&self) -> f64 { 0.0 } | |
403 | } | |
404 | ||
405 | #[test] | |
406 | fn wrap_fragments_with_infinite_widths() { | |
407 | let words = vec![Word(f64::INFINITY)]; | |
408 | assert_eq!( | |
409 | wrap_optimal_fit(&words, &[0.0], &Penalties::default()), | |
410 | Err(OverflowError) | |
411 | ); | |
412 | } | |
413 | ||
414 | #[test] | |
415 | fn wrap_fragments_with_huge_widths() { | |
416 | let words = vec![Word(1e200), Word(1e250), Word(1e300)]; | |
417 | assert_eq!( | |
418 | wrap_optimal_fit(&words, &[1e300], &Penalties::default()), | |
419 | Err(OverflowError) | |
420 | ); | |
421 | } | |
422 | ||
423 | #[test] | |
424 | fn wrap_fragments_with_large_widths() { | |
425 | // The gaps will be of the sizes between 1e25 and 1e75. This | |
426 | // makes the `gap * gap` cost fit comfortably in a f64. | |
427 | let words = vec![Word(1e25), Word(1e50), Word(1e75)]; | |
428 | assert_eq!( | |
429 | wrap_optimal_fit(&words, &[1e100], &Penalties::default()), | |
430 | Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) | |
431 | ); | |
432 | } | |
04454e1e | 433 | } |