|
60 | 60 | //! by two zero-length breaks. The algorithm will try its best to fit it on a
|
61 | 61 | //! line (which it can't) and so naturally place the content on its own line to
|
62 | 62 | //! avoid combining it with other lines and making matters even worse.
|
| 63 | +//! |
| 64 | +//! # Explanation |
| 65 | +//! |
| 66 | +//! In case you do not have the paper, here is an explanation of what's going |
| 67 | +//! on. |
| 68 | +//! |
| 69 | +//! There is a stream of input tokens flowing through this printer. |
| 70 | +//! |
| 71 | +//! The printer buffers up to 3N tokens inside itself, where N is linewidth. |
| 72 | +//! Yes, linewidth is chars and tokens are multi-char, but in the worst |
| 73 | +//! case every token worth buffering is 1 char long, so it's ok. |
| 74 | +//! |
| 75 | +//! Tokens are String, Break, and Begin/End to delimit blocks. |
| 76 | +//! |
| 77 | +//! Begin tokens can carry an offset, saying "how far to indent when you break |
| 78 | +//! inside here", as well as a flag indicating "consistent" or "inconsistent" |
| 79 | +//! breaking. Consistent breaking means that after the first break, no attempt |
| 80 | +//! will be made to flow subsequent breaks together onto lines. Inconsistent |
| 81 | +//! is the opposite. Inconsistent breaking example would be, say: |
| 82 | +//! |
| 83 | +//! ``` |
| 84 | +//! foo(hello, there, good, friends) |
| 85 | +//! ``` |
| 86 | +//! |
| 87 | +//! breaking inconsistently to become |
| 88 | +//! |
| 89 | +//! ``` |
| 90 | +//! foo(hello, there |
| 91 | +//! good, friends); |
| 92 | +//! ``` |
| 93 | +//! |
| 94 | +//! whereas a consistent breaking would yield: |
| 95 | +//! |
| 96 | +//! ``` |
| 97 | +//! foo(hello, |
| 98 | +//! there |
| 99 | +//! good, |
| 100 | +//! friends); |
| 101 | +//! ``` |
| 102 | +//! |
| 103 | +//! That is, in the consistent-break blocks we value vertical alignment |
| 104 | +//! more than the ability to cram stuff onto a line. But in all cases if it |
| 105 | +//! can make a block a one-liner, it'll do so. |
| 106 | +//! |
| 107 | +//! Carrying on with high-level logic: |
| 108 | +//! |
| 109 | +//! The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and |
| 110 | +//! 'right' indices denote the active portion of the ring buffer as well as |
| 111 | +//! describing hypothetical points-in-the-infinite-stream at most 3N tokens |
| 112 | +//! apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch |
| 113 | +//! between using 'left' and 'right' terms to denote the wrapped-to-ring-buffer |
| 114 | +//! and point-in-infinite-stream senses freely. |
| 115 | +//! |
| 116 | +//! There is a parallel ring buffer, 'size', that holds the calculated size of |
| 117 | +//! each token. Why calculated? Because for Begin/End pairs, the "size" |
| 118 | +//! includes everything between the pair. That is, the "size" of Begin is |
| 119 | +//! actually the sum of the sizes of everything between Begin and the paired |
| 120 | +//! End that follows. Since that is arbitrarily far in the future, 'size' is |
| 121 | +//! being rewritten regularly while the printer runs; in fact most of the |
| 122 | +//! machinery is here to work out 'size' entries on the fly (and give up when |
| 123 | +//! they're so obviously over-long that "infinity" is a good enough |
| 124 | +//! approximation for purposes of line breaking). |
| 125 | +//! |
| 126 | +//! The "input side" of the printer is managed as an abstract process called |
| 127 | +//! SCAN, which uses 'scan_stack', to manage calculating 'size'. SCAN is, in |
| 128 | +//! other words, the process of calculating 'size' entries. |
| 129 | +//! |
| 130 | +//! The "output side" of the printer is managed by an abstract process called |
| 131 | +//! PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to |
| 132 | +//! do with each token/size pair it consumes as it goes. It's trying to consume |
| 133 | +//! the entire buffered window, but can't output anything until the size is >= |
| 134 | +//! 0 (sizes are set to negative while they're pending calculation). |
| 135 | +//! |
| 136 | +//! So SCAN takes input and buffers tokens and pending calculations, while |
| 137 | +//! PRINT gobbles up completed calculations and tokens from the buffer. The |
| 138 | +//! theory is that the two can never get more than 3N tokens apart, because |
| 139 | +//! once there's "obviously" too much data to fit on a line, in a size |
| 140 | +//! calculation, SCAN will write "infinity" to the size and let PRINT consume |
| 141 | +//! it. |
| 142 | +//! |
| 143 | +//! In this implementation (following the paper, again) the SCAN process is |
| 144 | +//! the method called `Printer::pretty_print`, and the 'PRINT' process is the method |
| 145 | +//! called `Printer::print`. |
63 | 146 |
|
64 | 147 | use std::collections::VecDeque;
|
65 | 148 | use std::fmt;
|
66 | 149 | use std::io;
|
67 | 150 |
|
| 151 | +/// How to break. Described in more detail in the module docs. |
68 | 152 | #[derive(Clone, Copy, PartialEq)]
|
69 | 153 | pub enum Breaks {
|
70 | 154 | Consistent,
|
@@ -177,81 +261,6 @@ pub fn mk_printer<'a>(out: Box<io::Write+'a>, linewidth: usize) -> Printer<'a> {
|
177 | 261 | }
|
178 | 262 | }
|
179 | 263 |
|
180 |
| - |
181 |
| -/// In case you do not have the paper, here is an explanation of what's going |
182 |
| -/// on. |
183 |
| -/// |
184 |
| -/// There is a stream of input tokens flowing through this printer. |
185 |
| -/// |
186 |
| -/// The printer buffers up to 3N tokens inside itself, where N is linewidth. |
187 |
| -/// Yes, linewidth is chars and tokens are multi-char, but in the worst |
188 |
| -/// case every token worth buffering is 1 char long, so it's ok. |
189 |
| -/// |
190 |
| -/// Tokens are String, Break, and Begin/End to delimit blocks. |
191 |
| -/// |
192 |
| -/// Begin tokens can carry an offset, saying "how far to indent when you break |
193 |
| -/// inside here", as well as a flag indicating "consistent" or "inconsistent" |
194 |
| -/// breaking. Consistent breaking means that after the first break, no attempt |
195 |
| -/// will be made to flow subsequent breaks together onto lines. Inconsistent |
196 |
| -/// is the opposite. Inconsistent breaking example would be, say: |
197 |
| -/// |
198 |
| -/// foo(hello, there, good, friends) |
199 |
| -/// |
200 |
| -/// breaking inconsistently to become |
201 |
| -/// |
202 |
| -/// foo(hello, there |
203 |
| -/// good, friends); |
204 |
| -/// |
205 |
| -/// whereas a consistent breaking would yield: |
206 |
| -/// |
207 |
| -/// foo(hello, |
208 |
| -/// there |
209 |
| -/// good, |
210 |
| -/// friends); |
211 |
| -/// |
212 |
| -/// That is, in the consistent-break blocks we value vertical alignment |
213 |
| -/// more than the ability to cram stuff onto a line. But in all cases if it |
214 |
| -/// can make a block a one-liner, it'll do so. |
215 |
| -/// |
216 |
| -/// Carrying on with high-level logic: |
217 |
| -/// |
218 |
| -/// The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and |
219 |
| -/// 'right' indices denote the active portion of the ring buffer as well as |
220 |
| -/// describing hypothetical points-in-the-infinite-stream at most 3N tokens |
221 |
| -/// apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch |
222 |
| -/// between using 'left' and 'right' terms to denote the wrapped-to-ring-buffer |
223 |
| -/// and point-in-infinite-stream senses freely. |
224 |
| -/// |
225 |
| -/// There is a parallel ring buffer, 'size', that holds the calculated size of |
226 |
| -/// each token. Why calculated? Because for Begin/End pairs, the "size" |
227 |
| -/// includes everything between the pair. That is, the "size" of Begin is |
228 |
| -/// actually the sum of the sizes of everything between Begin and the paired |
229 |
| -/// End that follows. Since that is arbitrarily far in the future, 'size' is |
230 |
| -/// being rewritten regularly while the printer runs; in fact most of the |
231 |
| -/// machinery is here to work out 'size' entries on the fly (and give up when |
232 |
| -/// they're so obviously over-long that "infinity" is a good enough |
233 |
| -/// approximation for purposes of line breaking). |
234 |
| -/// |
235 |
| -/// The "input side" of the printer is managed as an abstract process called |
236 |
| -/// SCAN, which uses 'scan_stack', to manage calculating 'size'. SCAN is, in |
237 |
| -/// other words, the process of calculating 'size' entries. |
238 |
| -/// |
239 |
| -/// The "output side" of the printer is managed by an abstract process called |
240 |
| -/// PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to |
241 |
| -/// do with each token/size pair it consumes as it goes. It's trying to consume |
242 |
| -/// the entire buffered window, but can't output anything until the size is >= |
243 |
| -/// 0 (sizes are set to negative while they're pending calculation). |
244 |
| -/// |
245 |
| -/// So SCAN takes input and buffers tokens and pending calculations, while |
246 |
| -/// PRINT gobbles up completed calculations and tokens from the buffer. The |
247 |
| -/// theory is that the two can never get more than 3N tokens apart, because |
248 |
| -/// once there's "obviously" too much data to fit on a line, in a size |
249 |
| -/// calculation, SCAN will write "infinity" to the size and let PRINT consume |
250 |
| -/// it. |
251 |
| -/// |
252 |
| -/// In this implementation (following the paper, again) the SCAN process is |
253 |
| -/// the method called 'pretty_print', and the 'PRINT' process is the method |
254 |
| -/// called 'print'. |
255 | 264 | pub struct Printer<'a> {
|
256 | 265 | pub out: Box<io::Write+'a>,
|
257 | 266 | buf_len: usize,
|
@@ -292,7 +301,7 @@ impl<'a> Printer<'a> {
|
292 | 301 | pub fn last_token(&mut self) -> Token {
|
293 | 302 | self.buf[self.right].token.clone()
|
294 | 303 | }
|
295 |
| - // be very careful with this! |
| 304 | + /// be very careful with this! |
296 | 305 | pub fn replace_last_token(&mut self, t: Token) {
|
297 | 306 | self.buf[self.right].token = t;
|
298 | 307 | }
|
@@ -571,19 +580,21 @@ impl<'a> Printer<'a> {
|
571 | 580 | }
|
572 | 581 |
|
573 | 582 | // Convenience functions to talk to the printer.
|
574 |
| -// |
575 |
| -// "raw box" |
| 583 | + |
| 584 | +/// "raw box" |
576 | 585 | pub fn rbox(p: &mut Printer, indent: usize, b: Breaks) -> io::Result<()> {
|
577 | 586 | p.pretty_print(Token::Begin(BeginToken {
|
578 | 587 | offset: indent as isize,
|
579 | 588 | breaks: b
|
580 | 589 | }))
|
581 | 590 | }
|
582 | 591 |
|
| 592 | +/// Inconsistent breaking box |
583 | 593 | pub fn ibox(p: &mut Printer, indent: usize) -> io::Result<()> {
|
584 | 594 | rbox(p, indent, Breaks::Inconsistent)
|
585 | 595 | }
|
586 | 596 |
|
| 597 | +/// Consistent breaking box |
587 | 598 | pub fn cbox(p: &mut Printer, indent: usize) -> io::Result<()> {
|
588 | 599 | rbox(p, indent, Breaks::Consistent)
|
589 | 600 | }
|
|
0 commit comments