[rustc.git] / vendor / textwrap / src / wrap_algorithms / optimal_fit.rs

use std::cell::RefCell;

use crate::core::Fragment;

/// Penalties for
/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
/// and [`wrap_optimal_fit`].
///
/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
/// entire paragraph to find optimal line breaks. When wrapping text,
/// "penalties" are assigned to line breaks based on the gaps left at
/// the end of lines. The penalties are given by this struct, with
/// [`Penalties::default`] assigning penalties that work well for
/// monospace text.
///
/// If you are wrapping proportional text, you are advised to assign
/// your own penalties according to your font size. See the individual
/// penalties below for details.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
#[derive(Clone, Copy, Debug)]
pub struct Penalties {
    /// Per-line penalty. This is added for every line, which makes it
    /// expensive to output more lines than the minimum required.
    pub nline_penalty: usize,

    /// Per-character cost for lines that overflow the target line width.
    ///
    /// With a default value of 50², every single character costs as
    /// much as leaving a gap of 50 characters behind. This is because
    /// we assign as cost of `gap * gap` to a short line. When
    /// wrapping monospace text, we can overflow the line by 1
    /// character in extreme cases:
    ///
    /// ```
    /// use textwrap::core::Word;
    /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
    ///
    /// let short = "foo ";
    /// let long = "x".repeat(50);
    /// let length = (short.len() + long.len()) as f64;
    /// let fragments = vec![Word::from(short), Word::from(&long)];
    /// let penalties = Penalties::new();
    ///
    /// // Perfect fit, both words are on a single line with no overflow.
    /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
    ///
    /// // The words no longer fit, yet we get a single line back. While
    /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
    /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
    /// // which makes it cheaper to overflow than to use two lines.
    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
    ///
    /// // The cost of overflow would be 2 * 2500, whereas the cost of
    /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
    /// // 3401`. We therefore get two lines.
    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
    /// assert_eq!(wrapped, vec![&[Word::from(short)],
    ///                          &[Word::from(&long)]]);
    /// ```
    ///
    /// This only happens if the overflowing word is 50 characters
    /// long _and_ if the word overflows the line by exactly one
    /// character. If it overflows by more than one character, the
    /// overflow penalty will quickly outgrow the cost of the gap, as
    /// seen above.
    pub overflow_penalty: usize,

    /// When should the a single word on the last line be considered
    /// "too short"?
    ///
    /// If the last line of the text consist of a single word and if
    /// this word is shorter than `1 / short_last_line_fraction` of
    /// the line width, then the final line will be considered "short"
    /// and `short_last_line_penalty` is added as an extra penalty.
    ///
    /// The effect of this is to avoid a final line consisting of a
    /// single small word. For example, with a
    /// `short_last_line_penalty` of 25 (the default), a gap of up to
    /// 5 columns will be seen as more desirable than having a final
    /// short line.
    ///
    /// ## Examples
    ///
    /// ```
    /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
    ///
    /// let text = "This is a demo of the short last line penalty.";
    ///
    /// // The first-fit algorithm leaves a single short word on the last line:
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
    ///            vec!["This is a demo of the short last line",
    ///                 "penalty."]);
    ///
    /// #[cfg(feature = "smawk")] {
    /// let mut penalties = wrap_algorithms::Penalties::new();
    ///
    /// // Since "penalty." is shorter than 25% of the line width, the
    /// // optimal-fit algorithm adds a penalty of 25. This is enough
    /// // to move "line " down:
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
    ///            vec!["This is a demo of the short last",
    ///                 "line penalty."]);
    ///
    /// // We can change the meaning of "short" lines. Here, only words
    /// // shorter than 1/10th of the line width will be considered short:
    /// penalties.short_last_line_fraction = 10;
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
    ///            vec!["This is a demo of the short last line",
    ///                 "penalty."]);
    ///
    /// // If desired, the penalty can also be disabled:
    /// penalties.short_last_line_fraction = 4;
    /// penalties.short_last_line_penalty = 0;
    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
    ///            vec!["This is a demo of the short last line",
    ///                 "penalty."]);
    /// }
    /// ```
    pub short_last_line_fraction: usize,

    /// Penalty for a last line with a single short word.
    ///
    /// Set this to zero if you do not want to penalize short last lines.
    pub short_last_line_penalty: usize,

    /// Penalty for lines ending with a hyphen.
    pub hyphen_penalty: usize,
}

impl Penalties {
    /// Default penalties for monospace text.
    ///
    /// The penalties here work well for monospace text. This is
    /// because they expect the gaps at the end of lines to be roughly
    /// in the range `0..100`. If the gaps are larger, the
    /// `overflow_penalty` and `hyphen_penalty` become insignificant.
    pub const fn new() -> Self {
        Penalties {
            nline_penalty: 1000,
            overflow_penalty: 50 * 50,
            short_last_line_fraction: 4,
            short_last_line_penalty: 25,
            hyphen_penalty: 25,
        }
    }
}

impl Default for Penalties {
    fn default() -> Self {
        Self::new()
    }
}

/// Cache for line numbers. This is necessary to avoid a O(n**2)
/// behavior when computing line numbers in [`wrap_optimal_fit`].
struct LineNumbers {
    line_numbers: RefCell<Vec<usize>>,
}

impl LineNumbers {
    fn new(size: usize) -> Self {
        let mut line_numbers = Vec::with_capacity(size);
        line_numbers.push(0);
        LineNumbers {
            line_numbers: RefCell::new(line_numbers),
        }
    }

    fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
        while self.line_numbers.borrow_mut().len() < i + 1 {
            let pos = self.line_numbers.borrow().len();
            let line_number = 1 + self.get(minima[pos].0, minima);
            self.line_numbers.borrow_mut().push(line_number);
        }

        self.line_numbers.borrow()[i]
    }
}

/// Overflow error during the [`wrap_optimal_fit`] computation.
#[derive(Debug, PartialEq, Eq)]
pub struct OverflowError;

impl std::fmt::Display for OverflowError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "wrap_optimal_fit cost computation overflowed")
    }
}

impl std::error::Error for OverflowError {}

/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
/// The `line_widths` slice gives the target line width for each line
/// (the last slice element is repeated as necessary). This can be
/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
/// further when arranging them into lines.
///
/// # Optimal-Fit Algorithm
///
/// The algorithm considers all possible break points and picks the
/// breaks which minimizes the gaps at the end of each line. More
/// precisely, the algorithm assigns a cost or penalty to each break
/// point, determined by `cost = gap * gap` where `gap = target_width -
/// line_width`. Shorter lines are thus penalized more heavily since
/// they leave behind a larger gap.
///
/// We can illustrate this with the text “To be, or not to be: that is
/// the question”. We will be wrapping it in a narrow column with room
/// for only 10 characters. The [greedy
/// algorithm](super::wrap_first_fit) will produce these lines, each
/// annotated with the corresponding penalty:
///
/// ```text
/// "To be, or"   1² =  1
/// "not to be:"  0² =  0
/// "that is"     3² =  9
/// "the"         7² = 49
/// "question"    2² =  4
/// ```
///
/// We see that line four with “the” leaves a gap of 7 columns, which
/// gives it a penalty of 49. The sum of the penalties is 63.
///
/// There are 10 words, which means that there are `2_u32.pow(9)` or
/// 512 different ways to typeset it. We can compute
/// the sum of the penalties for each possible line break and search
/// for the one with the lowest sum:
///
/// ```text
/// "To be,"     4² = 16
/// "or not to"  1² =  1
/// "be: that"   2² =  4
/// "is the"     4² = 16
/// "question"   2² =  4
/// ```
///
/// The sum of the penalties is 41, which is better than what the
/// greedy algorithm produced.
///
/// Searching through all possible combinations would normally be
/// prohibitively slow. However, it turns out that the problem can be
/// formulated as the task of finding column minima in a cost matrix.
/// This matrix has a special form (totally monotone) which lets us
/// use a [linear-time algorithm called
/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
/// points.
///
/// This means that the time complexity remains O(_n_) where _n_ is
/// the number of words. Compared to
/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
/// 4 times slower.
///
/// The optimization of per-line costs over the entire paragraph is
/// inspired by the line breaking algorithm used in TeX, as described
/// in the 1981 article [_Breaking Paragraphs into
/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
/// by Knuth and Plass. The implementation here is based on [Python
/// code by David
/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
///
/// # Errors
///
/// In case of an overflow during the cost computation, an `Err` is
/// returned. Overflows happens when fragments or lines have infinite
/// widths (`f64::INFINITY`) or if the widths are so large that the
/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
/// (approximately 1e154):
///
/// ```
/// use textwrap::core::Fragment;
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
///
/// #[derive(Debug, PartialEq)]
/// struct Word(f64);
///
/// impl Fragment for Word {
///     fn width(&self) -> f64 { self.0 }
///     fn whitespace_width(&self) -> f64 { 1.0 }
///     fn penalty_width(&self) -> f64 { 0.0 }
/// }
///
/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
/// // larger than f64::MAX:
/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
///            Err(OverflowError));
/// ```
///
/// When using fragment widths and line widths which fit inside an
/// `u64`, overflows cannot happen. This means that fragments derived
/// from a `&str` cannot cause overflows.
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
    fragments: &'a [T],
    line_widths: &'b [f64],
    penalties: &'b Penalties,
) -> Result<Vec<&'a [T]>, OverflowError> {
    // The final line width is used for all remaining lines.
    let default_line_width = line_widths.last().copied().unwrap_or(0.0);
    let mut widths = Vec::with_capacity(fragments.len() + 1);
    let mut width = 0.0;
    widths.push(width);
    for fragment in fragments {
        width += fragment.width() + fragment.whitespace_width();
        widths.push(width);
    }

    let line_numbers = LineNumbers::new(fragments.len());

    let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
        // Line number for fragment `i`.
        let line_number = line_numbers.get(i, minima);
        let line_width = line_widths
            .get(line_number)
            .copied()
            .unwrap_or(default_line_width);
        let target_width = line_width.max(1.0);

        // Compute the width of a line spanning fragments[i..j] in
        // constant time. We need to adjust widths[j] by subtracting
        // the whitespace of fragment[j-1] and then add the penalty.
        let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
            + fragments[j - 1].penalty_width();

        // We compute cost of the line containing fragments[i..j]. We
        // start with values[i].1, which is the optimal cost for
        // breaking before fragments[i].
        //
        // First, every extra line cost NLINE_PENALTY.
        let mut cost = minima[i].1 + penalties.nline_penalty as f64;

        // Next, we add a penalty depending on the line length.
        if line_width > target_width {
            // Lines that overflow get a hefty penalty.
            let overflow = line_width - target_width;
            cost += overflow * penalties.overflow_penalty as f64;
        } else if j < fragments.len() {
            // Other lines (except for the last line) get a milder
            // penalty which depend on the size of the gap.
            let gap = target_width - line_width;
            cost += gap * gap;
        } else if i + 1 == j
            && line_width < target_width / penalties.short_last_line_fraction as f64
        {
            // The last line can have any size gap, but we do add a
            // penalty if the line is very short (typically because it
            // contains just a single word).
            cost += penalties.short_last_line_penalty as f64;
        }

        // Finally, we discourage hyphens.
        if fragments[j - 1].penalty_width() > 0.0 {
            // TODO: this should use a penalty value from the fragment
            // instead.
            cost += penalties.hyphen_penalty as f64;
        }

        cost
    });

    for (_, cost) in &minima {
        if cost.is_infinite() {
            return Err(OverflowError);
        }
    }

    let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
    let mut pos = fragments.len();
    loop {
        let prev = minima[pos].0;
        lines.push(&fragments[prev..pos]);
        pos = prev;
        if pos == 0 {
            break;
        }
    }

    lines.reverse();
    Ok(lines)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[derive(Debug, PartialEq)]
    struct Word(f64);

    #[rustfmt::skip]
    impl Fragment for Word {
        fn width(&self) -> f64 { self.0 }
        fn whitespace_width(&self) -> f64 { 1.0 }
        fn penalty_width(&self) -> f64 { 0.0 }
    }

    #[test]
    fn wrap_fragments_with_infinite_widths() {
        let words = vec![Word(f64::INFINITY)];
        assert_eq!(
            wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
            Err(OverflowError)
        );
    }

    #[test]
    fn wrap_fragments_with_huge_widths() {
        let words = vec![Word(1e200), Word(1e250), Word(1e300)];
        assert_eq!(
            wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
            Err(OverflowError)
        );
    }

    #[test]
    fn wrap_fragments_with_large_widths() {
        // The gaps will be of the sizes between 1e25 and 1e75. This
        // makes the `gap * gap` cost fit comfortably in a f64.
        let words = vec![Word(1e25), Word(1e50), Word(1e75)];
        assert_eq!(
            wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
            Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
        );
    }
}
Commit	Line	Data
04454e1e FG	1	use std::cell::RefCell;
04454e1e FG	2
923072b8	3	use crate::core::Fragment;
04454e1e	4
923072b8 FG	5	/// Penalties for
	6	/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
	7	/// and [`wrap_optimal_fit`].
04454e1e	8	///
923072b8 FG	9	/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
	10	/// entire paragraph to find optimal line breaks. When wrapping text,
	11	/// "penalties" are assigned to line breaks based on the gaps left at
	12	/// the end of lines. The penalties are given by this struct, with
	13	/// [`Penalties::default`] assigning penalties that work well for
	14	/// monospace text.
	15	///
	16	/// If you are wrapping proportional text, you are advised to assign
	17	/// your own penalties according to your font size. See the individual
	18	/// penalties below for details.
04454e1e FG	19	///
	20	/// Note: Only available when the `smawk` Cargo feature is
	21	/// enabled.
923072b8 FG	22	#[derive(Clone, Copy, Debug)]
	23	pub struct Penalties {
	24	/// Per-line penalty. This is added for every line, which makes it
	25	/// expensive to output more lines than the minimum required.
	26	pub nline_penalty: usize,
	27
	28	/// Per-character cost for lines that overflow the target line width.
	29	///
	30	/// With a default value of 50², every single character costs as
	31	/// much as leaving a gap of 50 characters behind. This is because
	32	/// we assign as cost of `gap * gap` to a short line. When
	33	/// wrapping monospace text, we can overflow the line by 1
	34	/// character in extreme cases:
	35	///
	36	/// ```
	37	/// use textwrap::core::Word;
	38	/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
	39	///
	40	/// let short = "foo ";
	41	/// let long = "x".repeat(50);
	42	/// let length = (short.len() + long.len()) as f64;
	43	/// let fragments = vec![Word::from(short), Word::from(&long)];
	44	/// let penalties = Penalties::new();
	45	///
	46	/// // Perfect fit, both words are on a single line with no overflow.
	47	/// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
	48	/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
	49	///
	50	/// // The words no longer fit, yet we get a single line back. While
	51	/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
	52	/// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
	53	/// // which makes it cheaper to overflow than to use two lines.
	54	/// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
	55	/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
	56	///
	57	/// // The cost of overflow would be 2 * 2500, whereas the cost of
	58	/// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
	59	/// // 3401`. We therefore get two lines.
	60	/// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
	61	/// assert_eq!(wrapped, vec![&[Word::from(short)],
	62	/// &[Word::from(&long)]]);
	63	/// ```
	64	///
	65	/// This only happens if the overflowing word is 50 characters
	66	/// long _and_ if the word overflows the line by exactly one
	67	/// character. If it overflows by more than one character, the
	68	/// overflow penalty will quickly outgrow the cost of the gap, as
	69	/// seen above.
	70	pub overflow_penalty: usize,
	71
	72	/// When should the a single word on the last line be considered
	73	/// "too short"?
	74	///
	75	/// If the last line of the text consist of a single word and if
	76	/// this word is shorter than `1 / short_last_line_fraction` of
	77	/// the line width, then the final line will be considered "short"
	78	/// and `short_last_line_penalty` is added as an extra penalty.
	79	///
	80	/// The effect of this is to avoid a final line consisting of a
	81	/// single small word. For example, with a
	82	/// `short_last_line_penalty` of 25 (the default), a gap of up to
	83	/// 5 columns will be seen as more desirable than having a final
	84	/// short line.
	85	///
86	/// ## Examples
87	///
88	/// ```
89	/// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
90	///
91	/// let text = "This is a demo of the short last line penalty.";
92	///
93	/// // The first-fit algorithm leaves a single short word on the last line:
94	/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
95	/// vec!["This is a demo of the short last line",
96	/// "penalty."]);
97	///
98	/// #[cfg(feature = "smawk")] {
99	/// let mut penalties = wrap_algorithms::Penalties::new();
100	///
101	/// // Since "penalty." is shorter than 25% of the line width, the
102	/// // optimal-fit algorithm adds a penalty of 25. This is enough
103	/// // to move "line " down:
104	/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
105	/// vec!["This is a demo of the short last",
106	/// "line penalty."]);
107	///
108	/// // We can change the meaning of "short" lines. Here, only words
109	/// // shorter than 1/10th of the line width will be considered short:
110	/// penalties.short_last_line_fraction = 10;
111	/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
112	/// vec!["This is a demo of the short last line",
113	/// "penalty."]);
114	///
115	/// // If desired, the penalty can also be disabled:
116	/// penalties.short_last_line_fraction = 4;
117	/// penalties.short_last_line_penalty = 0;
118	/// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
119	/// vec!["This is a demo of the short last line",
120	/// "penalty."]);
121	/// }
122	/// ```
123	pub short_last_line_fraction: usize,
124
125	/// Penalty for a last line with a single short word.
126	///
127	/// Set this to zero if you do not want to penalize short last lines.
128	pub short_last_line_penalty: usize,
129
130	/// Penalty for lines ending with a hyphen.
131	pub hyphen_penalty: usize,
132	}
04454e1e	133
923072b8 FG	134	impl Penalties {
	135	/// Default penalties for monospace text.
	136	///
	137	/// The penalties here work well for monospace text. This is
	138	/// because they expect the gaps at the end of lines to be roughly
	139	/// in the range `0..100`. If the gaps are larger, the
	140	/// `overflow_penalty` and `hyphen_penalty` become insignificant.
	141	pub const fn new() -> Self {
	142	Penalties {
	143	nline_penalty: 1000,
	144	overflow_penalty: 50 * 50,
	145	short_last_line_fraction: 4,
	146	short_last_line_penalty: 25,
	147	hyphen_penalty: 25,
	148	}
	149	}
	150	}
	151
	152	impl Default for Penalties {
	153	fn default() -> Self {
	154	Self::new()
04454e1e FG	155	}
	156	}
	157
	158	/// Cache for line numbers. This is necessary to avoid a O(n**2)
	159	/// behavior when computing line numbers in [`wrap_optimal_fit`].
	160	struct LineNumbers {
	161	line_numbers: RefCell<Vec<usize>>,
	162	}
	163
	164	impl LineNumbers {
	165	fn new(size: usize) -> Self {
	166	let mut line_numbers = Vec::with_capacity(size);
	167	line_numbers.push(0);
	168	LineNumbers {
	169	line_numbers: RefCell::new(line_numbers),
	170	}
	171	}
	172
	173	fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
	174	while self.line_numbers.borrow_mut().len() < i + 1 {
	175	let pos = self.line_numbers.borrow().len();
923072b8	176	let line_number = 1 + self.get(minima[pos].0, minima);
04454e1e FG	177	self.line_numbers.borrow_mut().push(line_number);
	178	}
	179
	180	self.line_numbers.borrow()[i]
	181	}
	182	}
	183
923072b8 FG	184	/// Overflow error during the [`wrap_optimal_fit`] computation.
	185	#[derive(Debug, PartialEq, Eq)]
	186	pub struct OverflowError;
04454e1e	187
923072b8 FG	188	impl std::fmt::Display for OverflowError {
	189	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
	190	write!(f, "wrap_optimal_fit cost computation overflowed")
	191	}
	192	}
04454e1e	193
923072b8	194	impl std::error::Error for OverflowError {}
04454e1e FG	195
	196	/// Wrap abstract fragments into lines with an optimal-fit algorithm.
	197	///
	198	/// The `line_widths` slice gives the target line width for each line
	199	/// (the last slice element is repeated as necessary). This can be
	200	/// used to implement hanging indentation.
	201	///
	202	/// The fragments must already have been split into the desired
	203	/// widths, this function will not (and cannot) attempt to split them
	204	/// further when arranging them into lines.
	205	///
	206	/// # Optimal-Fit Algorithm
	207	///
	208	/// The algorithm considers all possible break points and picks the
	209	/// breaks which minimizes the gaps at the end of each line. More
	210	/// precisely, the algorithm assigns a cost or penalty to each break
	211	/// point, determined by `cost = gap * gap` where `gap = target_width -
	212	/// line_width`. Shorter lines are thus penalized more heavily since
	213	/// they leave behind a larger gap.
	214	///
	215	/// We can illustrate this with the text “To be, or not to be: that is
	216	/// the question”. We will be wrapping it in a narrow column with room
	217	/// for only 10 characters. The [greedy
	218	/// algorithm](super::wrap_first_fit) will produce these lines, each
	219	/// annotated with the corresponding penalty:
	220	///
	221	/// ```text
	222	/// "To be, or" 1² = 1
	223	/// "not to be:" 0² = 0
	224	/// "that is" 3² = 9
	225	/// "the" 7² = 49
	226	/// "question" 2² = 4
	227	/// ```
	228	///
	229	/// We see that line four with “the” leaves a gap of 7 columns, which
	230	/// gives it a penalty of 49. The sum of the penalties is 63.
	231	///
	232	/// There are 10 words, which means that there are `2_u32.pow(9)` or
	233	/// 512 different ways to typeset it. We can compute
	234	/// the sum of the penalties for each possible line break and search
	235	/// for the one with the lowest sum:
	236	///
	237	/// ```text
	238	/// "To be," 4² = 16
	239	/// "or not to" 1² = 1
	240	/// "be: that" 2² = 4
	241	/// "is the" 4² = 16
	242	/// "question" 2² = 4
	243	/// ```
	244	///
	245	/// The sum of the penalties is 41, which is better than what the
	246	/// greedy algorithm produced.
	247	///
	248	/// Searching through all possible combinations would normally be
	249	/// prohibitively slow. However, it turns out that the problem can be
	250	/// formulated as the task of finding column minima in a cost matrix.
	251	/// This matrix has a special form (totally monotone) which lets us
	252	/// use a [linear-time algorithm called
	253	/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
	254	/// points.
	255	///
	256	/// This means that the time complexity remains O(_n_) where _n_ is
	257	/// the number of words. Compared to
	258	/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
259	/// 4 times slower.
260	///
261	/// The optimization of per-line costs over the entire paragraph is
262	/// inspired by the line breaking algorithm used in TeX, as described
263	/// in the 1981 article [_Breaking Paragraphs into
264	/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
265	/// by Knuth and Plass. The implementation here is based on [Python
266	/// code by David
267	/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
268	///
923072b8 FG	269	/// # Errors
	270	///
	271	/// In case of an overflow during the cost computation, an `Err` is
	272	/// returned. Overflows happens when fragments or lines have infinite
	273	/// widths (`f64::INFINITY`) or if the widths are so large that the
	274	/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
	275	/// (approximately 1e154):
	276	///
	277	/// ```
	278	/// use textwrap::core::Fragment;
	279	/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
	280	///
	281	/// #[derive(Debug, PartialEq)]
	282	/// struct Word(f64);
	283	///
	284	/// impl Fragment for Word {
	285	/// fn width(&self) -> f64 { self.0 }
	286	/// fn whitespace_width(&self) -> f64 { 1.0 }
	287	/// fn penalty_width(&self) -> f64 { 0.0 }
	288	/// }
	289	///
	290	/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
	291	/// // larger than f64::MAX:
	292	/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
	293	/// Err(OverflowError));
	294	/// ```
	295	///
	296	/// When using fragment widths and line widths which fit inside an
	297	/// `u64`, overflows cannot happen. This means that fragments derived
	298	/// from a `&str` cannot cause overflows.
	299	///
04454e1e FG	300	/// Note: Only available when the `smawk` Cargo feature is
	301	/// enabled.
	302	pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
	303	fragments: &'a [T],
923072b8 FG	304	line_widths: &'b [f64],
	305	penalties: &'b Penalties,
	306	) -> Result<Vec<&'a [T]>, OverflowError> {
04454e1e	307	// The final line width is used for all remaining lines.
923072b8	308	let default_line_width = line_widths.last().copied().unwrap_or(0.0);
04454e1e	309	let mut widths = Vec::with_capacity(fragments.len() + 1);
923072b8	310	let mut width = 0.0;
04454e1e FG	311	widths.push(width);
	312	for fragment in fragments {
	313	width += fragment.width() + fragment.whitespace_width();
	314	widths.push(width);
	315	}
	316
	317	let line_numbers = LineNumbers::new(fragments.len());
	318
923072b8	319	let minima = smawk::online_column_minima(0.0, widths.len(), \|minima, i, j\| {
04454e1e	320	// Line number for fragment `i`.
923072b8	321	let line_number = line_numbers.get(i, minima);
04454e1e FG	322	let line_width = line_widths
	323	.get(line_number)
	324	.copied()
	325	.unwrap_or(default_line_width);
923072b8	326	let target_width = line_width.max(1.0);
04454e1e FG	327
	328	// Compute the width of a line spanning fragments[i..j] in
	329	// constant time. We need to adjust widths[j] by subtracting
923072b8	330	// the whitespace of fragment[j-1] and then add the penalty.
04454e1e FG	331	let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
	332	+ fragments[j - 1].penalty_width();
	333
	334	// We compute cost of the line containing fragments[i..j]. We
	335	// start with values[i].1, which is the optimal cost for
	336	// breaking before fragments[i].
	337	//
	338	// First, every extra line cost NLINE_PENALTY.
923072b8	339	let mut cost = minima[i].1 + penalties.nline_penalty as f64;
04454e1e FG	340
	341	// Next, we add a penalty depending on the line length.
	342	if line_width > target_width {
	343	// Lines that overflow get a hefty penalty.
923072b8 FG	344	let overflow = line_width - target_width;
923072b8 FG	345	cost += overflow * penalties.overflow_penalty as f64;
04454e1e FG	346	} else if j < fragments.len() {
	347	// Other lines (except for the last line) get a milder
	348	// penalty which depend on the size of the gap.
923072b8	349	let gap = target_width - line_width;
04454e1e	350	cost += gap * gap;
923072b8 FG	351	} else if i + 1 == j
	352	&& line_width < target_width / penalties.short_last_line_fraction as f64
	353	{
04454e1e FG	354	// The last line can have any size gap, but we do add a
	355	// penalty if the line is very short (typically because it
	356	// contains just a single word).
923072b8	357	cost += penalties.short_last_line_penalty as f64;
04454e1e FG	358	}
	359
	360	// Finally, we discourage hyphens.
923072b8	361	if fragments[j - 1].penalty_width() > 0.0 {
04454e1e FG	362	// TODO: this should use a penalty value from the fragment
04454e1e FG	363	// instead.
923072b8	364	cost += penalties.hyphen_penalty as f64;
04454e1e FG	365	}
	366
	367	cost
	368	});
	369
923072b8 FG	370	for (_, cost) in &minima {
	371	if cost.is_infinite() {
	372	return Err(OverflowError);
	373	}
	374	}
	375
04454e1e FG	376	let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
	377	let mut pos = fragments.len();
	378	loop {
	379	let prev = minima[pos].0;
	380	lines.push(&fragments[prev..pos]);
	381	pos = prev;
	382	if pos == 0 {
	383	break;
	384	}
	385	}
	386
	387	lines.reverse();
923072b8 FG	388	Ok(lines)
	389	}
	390
	391	#[cfg(test)]
	392	mod tests {
	393	use super::*;
	394
	395	#[derive(Debug, PartialEq)]
	396	struct Word(f64);
	397
	398	#[rustfmt::skip]
	399	impl Fragment for Word {
	400	fn width(&self) -> f64 { self.0 }
	401	fn whitespace_width(&self) -> f64 { 1.0 }
	402	fn penalty_width(&self) -> f64 { 0.0 }
	403	}
	404
	405	#[test]
	406	fn wrap_fragments_with_infinite_widths() {
	407	let words = vec![Word(f64::INFINITY)];
	408	assert_eq!(
	409	wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
	410	Err(OverflowError)
	411	);
	412	}
	413
	414	#[test]
	415	fn wrap_fragments_with_huge_widths() {
	416	let words = vec![Word(1e200), Word(1e250), Word(1e300)];
	417	assert_eq!(
	418	wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
	419	Err(OverflowError)
	420	);
	421	}
	422
	423	#[test]
	424	fn wrap_fragments_with_large_widths() {
	425	// The gaps will be of the sizes between 1e25 and 1e75. This
	426	// makes the `gap * gap` cost fit comfortably in a f64.
	427	let words = vec![Word(1e25), Word(1e50), Word(1e75)];
	428	assert_eq!(
	429	wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
	430	Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
	431	);
	432	}
04454e1e	433	}