]>
Commit | Line | Data |
---|---|---|
1a4d82fc | 1 | //! This pretty-printer is a direct reimplementation of Philip Karlton's |
f035d41b XL |
2 | //! Mesa pretty-printer, as described in the appendix to |
3 | //! Derek C. Oppen, "Pretty Printing" (1979), | |
4 | //! Stanford Computer Science Department STAN-CS-79-770, | |
5 | //! <http://i.stanford.edu/pub/cstr/reports/cs/tr/79/770/CS-TR-79-770.pdf>. | |
1a4d82fc JJ |
6 | //! |
7 | //! The algorithm's aim is to break a stream into as few lines as possible | |
8 | //! while respecting the indentation-consistency requirements of the enclosing | |
9 | //! block, and avoiding breaking at silly places on block boundaries, for | |
10 | //! example, between "x" and ")" in "x)". | |
11 | //! | |
12 | //! I am implementing this algorithm because it comes with 20 pages of | |
13 | //! documentation explaining its theory, and because it addresses the set of | |
14 | //! concerns I've seen other pretty-printers fall down on. Weirdly. Even though | |
15 | //! it's 32 years old. What can I say? | |
16 | //! | |
17 | //! Despite some redundancies and quirks in the way it's implemented in that | |
18 | //! paper, I've opted to keep the implementation here as similar as I can, | |
19 | //! changing only what was blatantly wrong, a typo, or sufficiently | |
20 | //! non-idiomatic rust that it really stuck out. | |
21 | //! | |
22 | //! In particular you'll see a certain amount of churn related to INTEGER vs. | |
23 | //! CARDINAL in the Mesa implementation. Mesa apparently interconverts the two | |
85aaf69f | 24 | //! somewhat readily? In any case, I've used usize for indices-in-buffers and |
1a4d82fc JJ |
25 | //! ints for character-sizes-and-indentation-offsets. This respects the need |
26 | //! for ints to "go negative" while carrying a pending-calculation balance, and | |
27 | //! helps differentiate all the numbers flying around internally (slightly). | |
28 | //! | |
29 | //! I also inverted the indentation arithmetic used in the print stack, since | |
30 | //! the Mesa implementation (somewhat randomly) stores the offset on the print | |
31 | //! stack in terms of margin-col rather than col itself. I store col. | |
32 | //! | |
33 | //! I also implemented a small change in the String token, in that I store an | |
34 | //! explicit length for the string. For most tokens this is just the length of | |
35 | //! the accompanying string. But it's necessary to permit it to differ, for | |
36 | //! encoding things that are supposed to "go on their own line" -- certain | |
37 | //! classes of comment and blank-line -- where relying on adjacent | |
38 | //! hardbreak-like Break tokens with long blankness indication doesn't actually | |
39 | //! work. To see why, consider when there is a "thing that should be on its own | |
40 | //! line" between two long blocks, say functions. If you put a hardbreak after | |
41 | //! each function (or before each) and the breaking algorithm decides to break | |
42 | //! there anyways (because the functions themselves are long) you wind up with | |
43 | //! extra blank lines. If you don't put hardbreaks you can wind up with the | |
44 | //! "thing which should be on its own line" not getting its own line in the | |
45 | //! rare case of "really small functions" or such. This re-occurs with comments | |
46 | //! and explicit blank lines. So in those cases we use a string with a payload | |
47 | //! we want isolated to a line and an explicit length that's huge, surrounded | |
48 | //! by two zero-length breaks. The algorithm will try its best to fit it on a | |
49 | //! line (which it can't) and so naturally place the content on its own line to | |
50 | //! avoid combining it with other lines and making matters even worse. | |
8bb4bdeb XL |
51 | //! |
52 | //! # Explanation | |
53 | //! | |
54 | //! In case you do not have the paper, here is an explanation of what's going | |
55 | //! on. | |
56 | //! | |
57 | //! There is a stream of input tokens flowing through this printer. | |
58 | //! | |
59 | //! The printer buffers up to 3N tokens inside itself, where N is linewidth. | |
60 | //! Yes, linewidth is chars and tokens are multi-char, but in the worst | |
61 | //! case every token worth buffering is 1 char long, so it's ok. | |
62 | //! | |
63 | //! Tokens are String, Break, and Begin/End to delimit blocks. | |
64 | //! | |
65 | //! Begin tokens can carry an offset, saying "how far to indent when you break | |
66 | //! inside here", as well as a flag indicating "consistent" or "inconsistent" | |
67 | //! breaking. Consistent breaking means that after the first break, no attempt | |
68 | //! will be made to flow subsequent breaks together onto lines. Inconsistent | |
69 | //! is the opposite. Inconsistent breaking example would be, say: | |
70 | //! | |
71 | //! ``` | |
72 | //! foo(hello, there, good, friends) | |
73 | //! ``` | |
74 | //! | |
75 | //! breaking inconsistently to become | |
76 | //! | |
77 | //! ``` | |
78 | //! foo(hello, there | |
79 | //! good, friends); | |
80 | //! ``` | |
81 | //! | |
82 | //! whereas a consistent breaking would yield: | |
83 | //! | |
84 | //! ``` | |
85 | //! foo(hello, | |
86 | //! there | |
87 | //! good, | |
88 | //! friends); | |
89 | //! ``` | |
90 | //! | |
91 | //! That is, in the consistent-break blocks we value vertical alignment | |
92 | //! more than the ability to cram stuff onto a line. But in all cases if it | |
93 | //! can make a block a one-liner, it'll do so. | |
94 | //! | |
95 | //! Carrying on with high-level logic: | |
96 | //! | |
97 | //! The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and | |
98 | //! 'right' indices denote the active portion of the ring buffer as well as | |
99 | //! describing hypothetical points-in-the-infinite-stream at most 3N tokens | |
0731742a | 100 | //! apart (i.e., "not wrapped to ring-buffer boundaries"). The paper will switch |
8bb4bdeb XL |
101 | //! between using 'left' and 'right' terms to denote the wrapped-to-ring-buffer |
102 | //! and point-in-infinite-stream senses freely. | |
103 | //! | |
7cac9316 | 104 | //! There is a parallel ring buffer, `size`, that holds the calculated size of |
8bb4bdeb XL |
105 | //! each token. Why calculated? Because for Begin/End pairs, the "size" |
106 | //! includes everything between the pair. That is, the "size" of Begin is | |
107 | //! actually the sum of the sizes of everything between Begin and the paired | |
7cac9316 | 108 | //! End that follows. Since that is arbitrarily far in the future, `size` is |
8bb4bdeb | 109 | //! being rewritten regularly while the printer runs; in fact most of the |
7cac9316 | 110 | //! machinery is here to work out `size` entries on the fly (and give up when |
8bb4bdeb XL |
111 | //! they're so obviously over-long that "infinity" is a good enough |
112 | //! approximation for purposes of line breaking). | |
113 | //! | |
114 | //! The "input side" of the printer is managed as an abstract process called | |
7cac9316 | 115 | //! SCAN, which uses `scan_stack`, to manage calculating `size`. SCAN is, in |
8bb4bdeb XL |
116 | //! other words, the process of calculating 'size' entries. |
117 | //! | |
118 | //! The "output side" of the printer is managed by an abstract process called | |
7cac9316 | 119 | //! PRINT, which uses `print_stack`, `margin` and `space` to figure out what to |
8bb4bdeb XL |
120 | //! do with each token/size pair it consumes as it goes. It's trying to consume |
121 | //! the entire buffered window, but can't output anything until the size is >= | |
122 | //! 0 (sizes are set to negative while they're pending calculation). | |
123 | //! | |
124 | //! So SCAN takes input and buffers tokens and pending calculations, while | |
125 | //! PRINT gobbles up completed calculations and tokens from the buffer. The | |
126 | //! theory is that the two can never get more than 3N tokens apart, because | |
127 | //! once there's "obviously" too much data to fit on a line, in a size | |
128 | //! calculation, SCAN will write "infinity" to the size and let PRINT consume | |
129 | //! it. | |
130 | //! | |
a1dfa0c6 | 131 | //! In this implementation (following the paper, again) the SCAN process is the |
416331ca | 132 | //! methods called `Printer::scan_*`, and the 'PRINT' process is the |
a1dfa0c6 | 133 | //! method called `Printer::print`. |
1a4d82fc | 134 | |
dfeec247 | 135 | use std::borrow::Cow; |
a7813a04 XL |
136 | use std::collections::VecDeque; |
137 | use std::fmt; | |
3dfed10e | 138 | use tracing::debug; |
970d7e83 | 139 | |
8bb4bdeb | 140 | /// How to break. Described in more detail in the module docs. |
1a4d82fc JJ |
141 | #[derive(Clone, Copy, PartialEq)] |
142 | pub enum Breaks { | |
143 | Consistent, | |
144 | Inconsistent, | |
145 | } | |
223e47cc | 146 | |
1a4d82fc JJ |
147 | #[derive(Clone, Copy)] |
148 | pub struct BreakToken { | |
85aaf69f | 149 | offset: isize, |
dfeec247 | 150 | blank_space: isize, |
223e47cc LB |
151 | } |
152 | ||
1a4d82fc JJ |
153 | #[derive(Clone, Copy)] |
154 | pub struct BeginToken { | |
85aaf69f | 155 | offset: isize, |
dfeec247 | 156 | breaks: Breaks, |
223e47cc LB |
157 | } |
158 | ||
1a4d82fc JJ |
159 | #[derive(Clone)] |
160 | pub enum Token { | |
a1dfa0c6 XL |
161 | // In practice a string token contains either a `&'static str` or a |
162 | // `String`. `Cow` is overkill for this because we never modify the data, | |
163 | // but it's more convenient than rolling our own more specialized type. | |
416331ca | 164 | String(Cow<'static, str>), |
1a4d82fc JJ |
165 | Break(BreakToken), |
166 | Begin(BeginToken), | |
167 | End, | |
168 | Eof, | |
223e47cc LB |
169 | } |
170 | ||
1a4d82fc | 171 | impl Token { |
416331ca | 172 | crate fn is_eof(&self) -> bool { |
85aaf69f SL |
173 | match *self { |
174 | Token::Eof => true, | |
175 | _ => false, | |
176 | } | |
223e47cc | 177 | } |
970d7e83 LB |
178 | |
179 | pub fn is_hardbreak_tok(&self) -> bool { | |
223e47cc | 180 | match *self { |
dfeec247 XL |
181 | Token::Break(BreakToken { offset: 0, blank_space: bs }) if bs == SIZE_INFINITY => true, |
182 | _ => false, | |
223e47cc LB |
183 | } |
184 | } | |
185 | } | |
186 | ||
a7813a04 | 187 | impl fmt::Display for Token { |
9fa01778 | 188 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
a7813a04 | 189 | match *self { |
416331ca | 190 | Token::String(ref s) => write!(f, "STR({},{})", s, s.len()), |
a7813a04 XL |
191 | Token::Break(_) => f.write_str("BREAK"), |
192 | Token::Begin(_) => f.write_str("BEGIN"), | |
193 | Token::End => f.write_str("END"), | |
194 | Token::Eof => f.write_str("EOF"), | |
195 | } | |
223e47cc LB |
196 | } |
197 | } | |
198 | ||
c30ab7b3 SL |
199 | fn buf_str(buf: &[BufEntry], left: usize, right: usize, lim: usize) -> String { |
200 | let n = buf.len(); | |
223e47cc | 201 | let mut i = left; |
1a4d82fc | 202 | let mut l = lim; |
a7813a04 | 203 | let mut s = String::from("["); |
85aaf69f SL |
204 | while i != right && l != 0 { |
205 | l -= 1; | |
970d7e83 LB |
206 | if i != left { |
207 | s.push_str(", "); | |
208 | } | |
c30ab7b3 | 209 | s.push_str(&format!("{}={}", buf[i].size, &buf[i].token)); |
85aaf69f | 210 | i += 1; |
223e47cc LB |
211 | i %= n; |
212 | } | |
1a4d82fc JJ |
213 | s.push(']'); |
214 | s | |
223e47cc LB |
215 | } |
216 | ||
c34b1796 | 217 | #[derive(Copy, Clone)] |
416331ca | 218 | enum PrintStackBreak { |
1a4d82fc JJ |
219 | Fits, |
220 | Broken(Breaks), | |
221 | } | |
223e47cc | 222 | |
c34b1796 | 223 | #[derive(Copy, Clone)] |
416331ca | 224 | struct PrintStackElem { |
85aaf69f | 225 | offset: isize, |
dfeec247 | 226 | pbreak: PrintStackBreak, |
223e47cc LB |
227 | } |
228 | ||
c34b1796 | 229 | const SIZE_INFINITY: isize = 0xffff; |
223e47cc | 230 | |
416331ca XL |
231 | pub fn mk_printer() -> Printer { |
232 | let linewidth = 78; | |
c30ab7b3 | 233 | // Yes 55, it makes the ring buffers big enough to never fall behind. |
3157f602 | 234 | let n: usize = 55 * linewidth; |
1a4d82fc | 235 | debug!("mk_printer {}", linewidth); |
1a4d82fc | 236 | Printer { |
416331ca | 237 | out: String::new(), |
83c7162d | 238 | buf_max_len: n, |
85aaf69f SL |
239 | margin: linewidth as isize, |
240 | space: linewidth as isize, | |
223e47cc LB |
241 | left: 0, |
242 | right: 0, | |
83c7162d XL |
243 | // Initialize a single entry; advance_right() will extend it on demand |
244 | // up to `buf_max_len` elements. | |
245 | buf: vec![BufEntry::default()], | |
223e47cc LB |
246 | left_total: 0, |
247 | right_total: 0, | |
c30ab7b3 | 248 | scan_stack: VecDeque::new(), |
1a4d82fc | 249 | print_stack: Vec::new(), |
dfeec247 | 250 | pending_indentation: 0, |
223e47cc LB |
251 | } |
252 | } | |
253 | ||
416331ca XL |
254 | pub struct Printer { |
255 | out: String, | |
83c7162d | 256 | buf_max_len: usize, |
1a4d82fc | 257 | /// Width of lines we're constrained to |
85aaf69f | 258 | margin: isize, |
1a4d82fc | 259 | /// Number of spaces left on line |
85aaf69f | 260 | space: isize, |
1a4d82fc | 261 | /// Index of left side of input stream |
85aaf69f | 262 | left: usize, |
1a4d82fc | 263 | /// Index of right side of input stream |
85aaf69f | 264 | right: usize, |
c30ab7b3 SL |
265 | /// Ring-buffer of tokens and calculated sizes |
266 | buf: Vec<BufEntry>, | |
1a4d82fc | 267 | /// Running size of stream "...left" |
85aaf69f | 268 | left_total: isize, |
1a4d82fc | 269 | /// Running size of stream "...right" |
85aaf69f | 270 | right_total: isize, |
1a4d82fc JJ |
271 | /// Pseudo-stack, really a ring too. Holds the |
272 | /// primary-ring-buffers index of the Begin that started the | |
273 | /// current block, possibly with the most recent Break after that | |
274 | /// Begin (if there is any) on top of it. Stuff is flushed off the | |
275 | /// bottom as it becomes irrelevant due to the primary ring-buffer | |
276 | /// advancing. | |
c30ab7b3 | 277 | scan_stack: VecDeque<usize>, |
1a4d82fc | 278 | /// Stack of blocks-in-progress being flushed by print |
dfeec247 | 279 | print_stack: Vec<PrintStackElem>, |
1a4d82fc | 280 | /// Buffered indentation to avoid writing trailing whitespace |
85aaf69f | 281 | pending_indentation: isize, |
223e47cc LB |
282 | } |
283 | ||
c30ab7b3 SL |
284 | #[derive(Clone)] |
285 | struct BufEntry { | |
286 | token: Token, | |
287 | size: isize, | |
288 | } | |
289 | ||
83c7162d XL |
290 | impl Default for BufEntry { |
291 | fn default() -> Self { | |
292 | BufEntry { token: Token::Eof, size: 0 } | |
293 | } | |
294 | } | |
295 | ||
416331ca XL |
296 | impl Printer { |
297 | pub fn last_token(&self) -> Token { | |
c30ab7b3 | 298 | self.buf[self.right].token.clone() |
1a4d82fc | 299 | } |
a1dfa0c6 XL |
300 | |
301 | /// Be very careful with this! | |
1a4d82fc | 302 | pub fn replace_last_token(&mut self, t: Token) { |
c30ab7b3 | 303 | self.buf[self.right].token = t; |
970d7e83 | 304 | } |
a1dfa0c6 | 305 | |
416331ca | 306 | fn scan_eof(&mut self) { |
a1dfa0c6 | 307 | if !self.scan_stack.is_empty() { |
223e47cc | 308 | self.check_stack(0); |
416331ca | 309 | self.advance_left(); |
a1dfa0c6 | 310 | } |
a1dfa0c6 XL |
311 | } |
312 | ||
416331ca | 313 | fn scan_begin(&mut self, b: BeginToken) { |
a1dfa0c6 XL |
314 | if self.scan_stack.is_empty() { |
315 | self.left_total = 1; | |
316 | self.right_total = 1; | |
317 | self.left = 0; | |
318 | self.right = 0; | |
319 | } else { | |
320 | self.advance_right(); | |
321 | } | |
dfeec247 | 322 | debug!("pp Begin({})/buffer Vec<{},{}>", b.offset, self.left, self.right); |
416331ca | 323 | self.scan_push(BufEntry { token: Token::Begin(b), size: -self.right_total }); |
a1dfa0c6 XL |
324 | } |
325 | ||
416331ca | 326 | fn scan_end(&mut self) { |
a1dfa0c6 XL |
327 | if self.scan_stack.is_empty() { |
328 | debug!("pp End/print Vec<{},{}>", self.left, self.right); | |
416331ca | 329 | self.print_end(); |
a1dfa0c6 XL |
330 | } else { |
331 | debug!("pp End/buffer Vec<{},{}>", self.left, self.right); | |
332 | self.advance_right(); | |
416331ca | 333 | self.scan_push(BufEntry { token: Token::End, size: -1 }); |
223e47cc LB |
334 | } |
335 | } | |
a1dfa0c6 | 336 | |
416331ca | 337 | fn scan_break(&mut self, b: BreakToken) { |
a1dfa0c6 XL |
338 | if self.scan_stack.is_empty() { |
339 | self.left_total = 1; | |
340 | self.right_total = 1; | |
341 | self.left = 0; | |
342 | self.right = 0; | |
343 | } else { | |
344 | self.advance_right(); | |
345 | } | |
dfeec247 | 346 | debug!("pp Break({})/buffer Vec<{},{}>", b.offset, self.left, self.right); |
a1dfa0c6 | 347 | self.check_stack(0); |
416331ca | 348 | self.scan_push(BufEntry { token: Token::Break(b), size: -self.right_total }); |
a1dfa0c6 | 349 | self.right_total += b.blank_space; |
a1dfa0c6 XL |
350 | } |
351 | ||
416331ca | 352 | fn scan_string(&mut self, s: Cow<'static, str>) { |
a1dfa0c6 | 353 | if self.scan_stack.is_empty() { |
dfeec247 | 354 | debug!("pp String('{}')/print Vec<{},{}>", s, self.left, self.right); |
416331ca | 355 | self.print_string(s); |
a1dfa0c6 | 356 | } else { |
dfeec247 | 357 | debug!("pp String('{}')/buffer Vec<{},{}>", s, self.left, self.right); |
a1dfa0c6 | 358 | self.advance_right(); |
416331ca XL |
359 | let len = s.len() as isize; |
360 | self.buf[self.right] = BufEntry { token: Token::String(s), size: len }; | |
a1dfa0c6 | 361 | self.right_total += len; |
416331ca | 362 | self.check_stream(); |
a1dfa0c6 XL |
363 | } |
364 | } | |
365 | ||
416331ca | 366 | fn check_stream(&mut self) { |
dfeec247 XL |
367 | debug!( |
368 | "check_stream Vec<{}, {}> with left_total={}, right_total={}", | |
369 | self.left, self.right, self.left_total, self.right_total | |
370 | ); | |
223e47cc | 371 | if self.right_total - self.left_total > self.space { |
dfeec247 XL |
372 | debug!( |
373 | "scan window is {}, longer than space on line ({})", | |
374 | self.right_total - self.left_total, | |
375 | self.space | |
376 | ); | |
a7813a04 XL |
377 | if Some(&self.left) == self.scan_stack.back() { |
378 | debug!("setting {} to infinity and popping", self.left); | |
379 | let scanned = self.scan_pop_bottom(); | |
c30ab7b3 | 380 | self.buf[scanned].size = SIZE_INFINITY; |
223e47cc | 381 | } |
416331ca | 382 | self.advance_left(); |
1a4d82fc | 383 | if self.left != self.right { |
416331ca | 384 | self.check_stream(); |
1a4d82fc | 385 | } |
223e47cc LB |
386 | } |
387 | } | |
a1dfa0c6 | 388 | |
416331ca XL |
389 | fn scan_push(&mut self, entry: BufEntry) { |
390 | debug!("scan_push {}", self.right); | |
391 | self.buf[self.right] = entry; | |
392 | self.scan_stack.push_front(self.right); | |
223e47cc | 393 | } |
a1dfa0c6 | 394 | |
416331ca | 395 | fn scan_pop(&mut self) -> usize { |
a7813a04 | 396 | self.scan_stack.pop_front().unwrap() |
223e47cc | 397 | } |
a1dfa0c6 | 398 | |
416331ca | 399 | fn scan_top(&mut self) -> usize { |
a7813a04 | 400 | *self.scan_stack.front().unwrap() |
223e47cc | 401 | } |
a1dfa0c6 | 402 | |
416331ca | 403 | fn scan_pop_bottom(&mut self) -> usize { |
a7813a04 | 404 | self.scan_stack.pop_back().unwrap() |
223e47cc | 405 | } |
a1dfa0c6 | 406 | |
416331ca | 407 | fn advance_right(&mut self) { |
85aaf69f | 408 | self.right += 1; |
83c7162d XL |
409 | self.right %= self.buf_max_len; |
410 | // Extend the buf if necessary. | |
411 | if self.right == self.buf.len() { | |
412 | self.buf.push(BufEntry::default()); | |
413 | } | |
7cac9316 | 414 | assert_ne!(self.right, self.left); |
223e47cc | 415 | } |
a1dfa0c6 | 416 | |
416331ca | 417 | fn advance_left(&mut self) { |
dfeec247 XL |
418 | debug!( |
419 | "advance_left Vec<{},{}>, sizeof({})={}", | |
420 | self.left, self.right, self.left, self.buf[self.left].size | |
421 | ); | |
85aaf69f | 422 | |
c30ab7b3 | 423 | let mut left_size = self.buf[self.left].size; |
85aaf69f SL |
424 | |
425 | while left_size >= 0 { | |
c30ab7b3 | 426 | let left = self.buf[self.left].token.clone(); |
85aaf69f SL |
427 | |
428 | let len = match left { | |
429 | Token::Break(b) => b.blank_space, | |
416331ca XL |
430 | Token::String(ref s) => { |
431 | let len = s.len() as isize; | |
85aaf69f SL |
432 | assert_eq!(len, left_size); |
433 | len | |
434 | } | |
dfeec247 | 435 | _ => 0, |
85aaf69f SL |
436 | }; |
437 | ||
416331ca | 438 | self.print(left, left_size); |
85aaf69f SL |
439 | |
440 | self.left_total += len; | |
441 | ||
442 | if self.left == self.right { | |
443 | break; | |
223e47cc | 444 | } |
85aaf69f SL |
445 | |
446 | self.left += 1; | |
83c7162d | 447 | self.left %= self.buf_max_len; |
85aaf69f | 448 | |
c30ab7b3 | 449 | left_size = self.buf[self.left].size; |
223e47cc LB |
450 | } |
451 | } | |
a1dfa0c6 | 452 | |
416331ca | 453 | fn check_stack(&mut self, k: usize) { |
a7813a04 | 454 | if !self.scan_stack.is_empty() { |
223e47cc | 455 | let x = self.scan_top(); |
c30ab7b3 | 456 | match self.buf[x].token { |
85aaf69f | 457 | Token::Begin(_) => { |
1a4d82fc | 458 | if k > 0 { |
416331ca XL |
459 | self.scan_pop(); |
460 | self.buf[x].size += self.right_total; | |
1a4d82fc JJ |
461 | self.check_stack(k - 1); |
462 | } | |
463 | } | |
85aaf69f | 464 | Token::End => { |
1a4d82fc | 465 | // paper says + not =, but that makes no sense. |
416331ca XL |
466 | self.scan_pop(); |
467 | self.buf[x].size = 1; | |
1a4d82fc JJ |
468 | self.check_stack(k + 1); |
469 | } | |
470 | _ => { | |
416331ca XL |
471 | self.scan_pop(); |
472 | self.buf[x].size += self.right_total; | |
1a4d82fc JJ |
473 | if k > 0 { |
474 | self.check_stack(k); | |
475 | } | |
223e47cc | 476 | } |
223e47cc LB |
477 | } |
478 | } | |
479 | } | |
a1dfa0c6 | 480 | |
416331ca | 481 | fn print_newline(&mut self, amount: isize) { |
1a4d82fc | 482 | debug!("NEWLINE {}", amount); |
416331ca | 483 | self.out.push('\n'); |
223e47cc LB |
484 | self.pending_indentation = 0; |
485 | self.indent(amount); | |
486 | } | |
a1dfa0c6 | 487 | |
416331ca | 488 | fn indent(&mut self, amount: isize) { |
1a4d82fc | 489 | debug!("INDENT {}", amount); |
223e47cc LB |
490 | self.pending_indentation += amount; |
491 | } | |
a1dfa0c6 | 492 | |
416331ca | 493 | fn get_top(&mut self) -> PrintStackElem { |
a7813a04 XL |
494 | match self.print_stack.last() { |
495 | Some(el) => *el, | |
dfeec247 XL |
496 | None => { |
497 | PrintStackElem { offset: 0, pbreak: PrintStackBreak::Broken(Breaks::Inconsistent) } | |
223e47cc LB |
498 | } |
499 | } | |
500 | } | |
a1dfa0c6 | 501 | |
416331ca | 502 | fn print_begin(&mut self, b: BeginToken, l: isize) { |
a1dfa0c6 XL |
503 | if l > self.space { |
504 | let col = self.margin - self.space + b.offset; | |
505 | debug!("print Begin -> push broken block at col {}", col); | |
dfeec247 XL |
506 | self.print_stack |
507 | .push(PrintStackElem { offset: col, pbreak: PrintStackBreak::Broken(b.breaks) }); | |
a1dfa0c6 XL |
508 | } else { |
509 | debug!("print Begin -> push fitting block"); | |
dfeec247 | 510 | self.print_stack.push(PrintStackElem { offset: 0, pbreak: PrintStackBreak::Fits }); |
223e47cc | 511 | } |
223e47cc | 512 | } |
a1dfa0c6 | 513 | |
416331ca | 514 | fn print_end(&mut self) { |
a1dfa0c6 | 515 | debug!("print End -> pop End"); |
416331ca | 516 | self.print_stack.pop().unwrap(); |
a1dfa0c6 XL |
517 | } |
518 | ||
416331ca | 519 | fn print_break(&mut self, b: BreakToken, l: isize) { |
a1dfa0c6 XL |
520 | let top = self.get_top(); |
521 | match top.pbreak { | |
522 | PrintStackBreak::Fits => { | |
1a4d82fc | 523 | debug!("print Break({}) in fitting block", b.blank_space); |
223e47cc LB |
524 | self.space -= b.blank_space; |
525 | self.indent(b.blank_space); | |
a1dfa0c6 XL |
526 | } |
527 | PrintStackBreak::Broken(Breaks::Consistent) => { | |
dfeec247 | 528 | debug!("print Break({}+{}) in consistent block", top.offset, b.offset); |
416331ca | 529 | self.print_newline(top.offset + b.offset); |
223e47cc | 530 | self.space = self.margin - (top.offset + b.offset); |
a1dfa0c6 XL |
531 | } |
532 | PrintStackBreak::Broken(Breaks::Inconsistent) => { | |
1a4d82fc | 533 | if l > self.space { |
dfeec247 | 534 | debug!("print Break({}+{}) w/ newline in inconsistent", top.offset, b.offset); |
416331ca | 535 | self.print_newline(top.offset + b.offset); |
223e47cc LB |
536 | self.space = self.margin - (top.offset + b.offset); |
537 | } else { | |
dfeec247 | 538 | debug!("print Break({}) w/o newline in inconsistent", b.blank_space); |
223e47cc LB |
539 | self.indent(b.blank_space); |
540 | self.space -= b.blank_space; | |
541 | } | |
223e47cc | 542 | } |
a1dfa0c6 XL |
543 | } |
544 | } | |
545 | ||
416331ca XL |
546 | fn print_string(&mut self, s: Cow<'static, str>) { |
547 | let len = s.len() as isize; | |
a1dfa0c6 XL |
548 | debug!("print String({})", s); |
549 | // assert!(len <= space); | |
550 | self.space -= len; | |
532ac7d7 XL |
551 | |
552 | // Write the pending indent. A more concise way of doing this would be: | |
553 | // | |
554 | // write!(self.out, "{: >n$}", "", n = self.pending_indentation as usize)?; | |
555 | // | |
416331ca XL |
556 | // But that is significantly slower. This code is sufficiently hot, and indents can get |
557 | // sufficiently large, that the difference is significant on some workloads. | |
558 | self.out.reserve(self.pending_indentation as usize); | |
559 | self.out.extend(std::iter::repeat(' ').take(self.pending_indentation as usize)); | |
560 | self.pending_indentation = 0; | |
561 | self.out.push_str(&s); | |
a1dfa0c6 XL |
562 | } |
563 | ||
416331ca | 564 | fn print(&mut self, token: Token, l: isize) { |
dfeec247 XL |
565 | debug!("print {} {} (remaining line space={})", token, l, self.space); |
566 | debug!("{}", buf_str(&self.buf, self.left, self.right, 6)); | |
a1dfa0c6 XL |
567 | match token { |
568 | Token::Begin(b) => self.print_begin(b, l), | |
569 | Token::End => self.print_end(), | |
570 | Token::Break(b) => self.print_break(b, l), | |
416331ca XL |
571 | Token::String(s) => { |
572 | let len = s.len() as isize; | |
a1dfa0c6 | 573 | assert_eq!(len, l); |
416331ca | 574 | self.print_string(s); |
a1dfa0c6 XL |
575 | } |
576 | Token::Eof => panic!(), // Eof should never get here. | |
223e47cc LB |
577 | } |
578 | } | |
223e47cc | 579 | |
041b39d2 | 580 | // Convenience functions to talk to the printer. |
8bb4bdeb | 581 | |
041b39d2 | 582 | /// "raw box" |
416331ca | 583 | pub fn rbox(&mut self, indent: usize, b: Breaks) { |
dfeec247 | 584 | self.scan_begin(BeginToken { offset: indent as isize, breaks: b }) |
041b39d2 | 585 | } |
223e47cc | 586 | |
041b39d2 | 587 | /// Inconsistent breaking box |
416331ca | 588 | pub fn ibox(&mut self, indent: usize) { |
041b39d2 XL |
589 | self.rbox(indent, Breaks::Inconsistent) |
590 | } | |
223e47cc | 591 | |
041b39d2 | 592 | /// Consistent breaking box |
416331ca | 593 | pub fn cbox(&mut self, indent: usize) { |
041b39d2 XL |
594 | self.rbox(indent, Breaks::Consistent) |
595 | } | |
223e47cc | 596 | |
416331ca | 597 | pub fn break_offset(&mut self, n: usize, off: isize) { |
dfeec247 | 598 | self.scan_break(BreakToken { offset: off, blank_space: n as isize }) |
041b39d2 | 599 | } |
223e47cc | 600 | |
416331ca XL |
601 | pub fn end(&mut self) { |
602 | self.scan_end() | |
041b39d2 | 603 | } |
223e47cc | 604 | |
416331ca XL |
605 | pub fn eof(mut self) -> String { |
606 | self.scan_eof(); | |
607 | self.out | |
041b39d2 | 608 | } |
223e47cc | 609 | |
416331ca | 610 | pub fn word<S: Into<Cow<'static, str>>>(&mut self, wrd: S) { |
a1dfa0c6 | 611 | let s = wrd.into(); |
416331ca | 612 | self.scan_string(s) |
041b39d2 | 613 | } |
223e47cc | 614 | |
416331ca | 615 | fn spaces(&mut self, n: usize) { |
041b39d2 XL |
616 | self.break_offset(n, 0) |
617 | } | |
223e47cc | 618 | |
416331ca | 619 | crate fn zerobreak(&mut self) { |
041b39d2 XL |
620 | self.spaces(0) |
621 | } | |
223e47cc | 622 | |
416331ca | 623 | pub fn space(&mut self) { |
041b39d2 XL |
624 | self.spaces(1) |
625 | } | |
223e47cc | 626 | |
416331ca | 627 | pub fn hardbreak(&mut self) { |
041b39d2 XL |
628 | self.spaces(SIZE_INFINITY as usize) |
629 | } | |
223e47cc | 630 | |
416331ca XL |
631 | pub fn is_beginning_of_line(&self) -> bool { |
632 | self.last_token().is_eof() || self.last_token().is_hardbreak_tok() | |
041b39d2 | 633 | } |
223e47cc | 634 | |
416331ca | 635 | pub fn hardbreak_tok_offset(off: isize) -> Token { |
dfeec247 | 636 | Token::Break(BreakToken { offset: off, blank_space: SIZE_INFINITY }) |
041b39d2 | 637 | } |
85aaf69f | 638 | } |