Skip to content

Commit fe93f08

Browse files
authored
Rollup merge of #93065 - dtolnay:ringbuffer, r=lcnr
Pretty printer algorithm revamp step 2 This PR follows #92923 as a second chunk of modernizations backported from https://github.com/dtolnay/prettyplease into rustc_ast_pretty. I've broken this up into atomic commits that hopefully are sensible in isolation. At every commit, the pretty printer is compilable and has runtime behavior that is identical to before and after the PR. None of the refactoring so far changes behavior. The general theme of this chunk of commits is: the logic in the old pretty printer is doing some very basic things (pushing and popping tokens on a ring buffer) but expressed in a too-low-level way that I found makes it quite complicated/subtle to reason about. There are a number of obvious invariants that are "almost true" -- things like `self.left == self.buf.offset` and `self.right == self.buf.offset + self.buf.data.len()` and `self.right_total == self.left_total + self.buf.data.sum()`. The reason these things are "almost true" is the implementation tends to put updating one side of the invariant unreasonably far apart from updating the other side, leaving the invariant broken while unrelated stuff happens in between. The following code from master is an example of this: https://github.com/rust-lang/rust/blob/e5e2b0be26ea177527b60d355bd8f56cd473bd00/compiler/rustc_ast_pretty/src/pp.rs#L314-L317 In this code the `advance_right` is reserving an entry into which to write a next token on the right side of the ring buffer, the `check_stack` is doing something totally unrelated to the right boundary of the ring buffer, and the `scan_push` is actually writing the token we previously reserved space for. Much of what this PR is doing is rearranging code to shrink the amount of stuff in between when an invariant is broken to when it is restored, until the whole thing can be factored out into one indivisible method call on the RingBuffer type. The end state of the PR is that we can entirely eliminate `self.left` (because it's now just equal to `self.buf.offset` always) and `self.right` (because it's equal to `self.buf.offset + self.buf.data.len()` always) and the whole `Token::Eof` state which used to be the value of tokens that have been reserved space for but not yet written. I found without these changes the pretty printer implementation to be hard to reason about and I wasn't able to confidently introduce improvements like trailing commas in `prettyplease` until after this refactor. The logic here is 43 years old at this point (Graydon translated it as directly as possible from the 1979 pretty printing paper) and while there are advantages to following the paper as closely as possible, in `prettyplease` I decided if we're going to adapt the algorithm to work better for Rust syntax, it was worthwhile making it easier to follow than the original.
2 parents 623791d + 4d3faae commit fe93f08

File tree

3 files changed

+103
-112
lines changed

3 files changed

+103
-112
lines changed

Diff for: compiler/rustc_ast_pretty/src/pp.rs

+61-97
Original file line numberDiff line numberDiff line change
@@ -167,14 +167,9 @@ pub enum Token {
167167
Break(BreakToken),
168168
Begin(BeginToken),
169169
End,
170-
Eof,
171170
}
172171

173172
impl Token {
174-
crate fn is_eof(&self) -> bool {
175-
matches!(self, Token::Eof)
176-
}
177-
178173
pub fn is_hardbreak_tok(&self) -> bool {
179174
matches!(self, Token::Break(BreakToken { offset: 0, blank_space: SIZE_INFINITY }))
180175
}
@@ -187,7 +182,6 @@ impl fmt::Display for Token {
187182
Token::Break(_) => f.write_str("BREAK"),
188183
Token::Begin(_) => f.write_str("BEGIN"),
189184
Token::End => f.write_str("END"),
190-
Token::Eof => f.write_str("EOF"),
191185
}
192186
}
193187
}
@@ -212,10 +206,6 @@ pub struct Printer {
212206
margin: isize,
213207
/// Number of spaces left on line
214208
space: isize,
215-
/// Index of left side of input stream
216-
left: usize,
217-
/// Index of right side of input stream
218-
right: usize,
219209
/// Ring-buffer of tokens and calculated sizes
220210
buf: RingBuffer<BufEntry>,
221211
/// Running size of stream "...left"
@@ -233,6 +223,9 @@ pub struct Printer {
233223
print_stack: Vec<PrintStackElem>,
234224
/// Buffered indentation to avoid writing trailing whitespace
235225
pending_indentation: isize,
226+
/// The token most recently popped from the left boundary of the
227+
/// ring-buffer for printing
228+
last_printed: Option<Token>,
236229
}
237230

238231
#[derive(Clone)]
@@ -241,39 +234,34 @@ struct BufEntry {
241234
size: isize,
242235
}
243236

244-
impl Default for BufEntry {
245-
fn default() -> Self {
246-
BufEntry { token: Token::Eof, size: 0 }
247-
}
248-
}
249-
250237
impl Printer {
251238
pub fn new() -> Self {
252239
let linewidth = 78;
253-
let mut buf = RingBuffer::new();
254-
buf.advance_right();
255240
Printer {
256241
out: String::new(),
257242
margin: linewidth as isize,
258243
space: linewidth as isize,
259-
left: 0,
260-
right: 0,
261-
buf,
244+
buf: RingBuffer::new(),
262245
left_total: 0,
263246
right_total: 0,
264247
scan_stack: VecDeque::new(),
265248
print_stack: Vec::new(),
266249
pending_indentation: 0,
250+
last_printed: None,
267251
}
268252
}
269253

270-
pub fn last_token(&self) -> Token {
271-
self.buf[self.right].token.clone()
254+
pub fn last_token(&self) -> Option<&Token> {
255+
self.last_token_still_buffered().or_else(|| self.last_printed.as_ref())
256+
}
257+
258+
pub fn last_token_still_buffered(&self) -> Option<&Token> {
259+
self.buf.last().map(|last| &last.token)
272260
}
273261

274262
/// Be very careful with this!
275-
pub fn replace_last_token(&mut self, t: Token) {
276-
self.buf[self.right].token = t;
263+
pub fn replace_last_token_still_buffered(&mut self, t: Token) {
264+
self.buf.last_mut().unwrap().token = t;
277265
}
278266

279267
fn scan_eof(&mut self) {
@@ -287,89 +275,63 @@ impl Printer {
287275
if self.scan_stack.is_empty() {
288276
self.left_total = 1;
289277
self.right_total = 1;
290-
self.right = self.left;
291-
self.buf.truncate(1);
292-
} else {
293-
self.advance_right();
278+
self.buf.clear();
294279
}
295-
self.scan_push(BufEntry { token: Token::Begin(b), size: -self.right_total });
280+
let right = self.buf.push(BufEntry { token: Token::Begin(b), size: -self.right_total });
281+
self.scan_stack.push_front(right);
296282
}
297283

298284
fn scan_end(&mut self) {
299285
if self.scan_stack.is_empty() {
300286
self.print_end();
301287
} else {
302-
self.advance_right();
303-
self.scan_push(BufEntry { token: Token::End, size: -1 });
288+
let right = self.buf.push(BufEntry { token: Token::End, size: -1 });
289+
self.scan_stack.push_front(right);
304290
}
305291
}
306292

307293
fn scan_break(&mut self, b: BreakToken) {
308294
if self.scan_stack.is_empty() {
309295
self.left_total = 1;
310296
self.right_total = 1;
311-
self.right = self.left;
312-
self.buf.truncate(1);
297+
self.buf.clear();
313298
} else {
314-
self.advance_right();
299+
self.check_stack(0);
315300
}
316-
self.check_stack(0);
317-
self.scan_push(BufEntry { token: Token::Break(b), size: -self.right_total });
301+
let right = self.buf.push(BufEntry { token: Token::Break(b), size: -self.right_total });
302+
self.scan_stack.push_front(right);
318303
self.right_total += b.blank_space;
319304
}
320305

321306
fn scan_string(&mut self, s: Cow<'static, str>) {
322307
if self.scan_stack.is_empty() {
323-
self.print_string(s);
308+
self.print_string(&s);
324309
} else {
325-
self.advance_right();
326310
let len = s.len() as isize;
327-
self.buf[self.right] = BufEntry { token: Token::String(s), size: len };
311+
self.buf.push(BufEntry { token: Token::String(s), size: len });
328312
self.right_total += len;
329313
self.check_stream();
330314
}
331315
}
332316

333317
fn check_stream(&mut self) {
334-
if self.right_total - self.left_total > self.space {
335-
if Some(&self.left) == self.scan_stack.back() {
336-
let scanned = self.scan_pop_bottom();
337-
self.buf[scanned].size = SIZE_INFINITY;
318+
while self.right_total - self.left_total > self.space {
319+
if *self.scan_stack.back().unwrap() == self.buf.index_of_first() {
320+
self.scan_stack.pop_back().unwrap();
321+
self.buf.first_mut().unwrap().size = SIZE_INFINITY;
338322
}
339323
self.advance_left();
340-
if self.left != self.right {
341-
self.check_stream();
324+
if self.buf.is_empty() {
325+
break;
342326
}
343327
}
344328
}
345329

346-
fn scan_push(&mut self, entry: BufEntry) {
347-
self.buf[self.right] = entry;
348-
self.scan_stack.push_front(self.right);
349-
}
350-
351-
fn scan_pop(&mut self) -> usize {
352-
self.scan_stack.pop_front().unwrap()
353-
}
354-
355-
fn scan_top(&self) -> usize {
356-
*self.scan_stack.front().unwrap()
357-
}
358-
359-
fn scan_pop_bottom(&mut self) -> usize {
360-
self.scan_stack.pop_back().unwrap()
361-
}
362-
363-
fn advance_right(&mut self) {
364-
self.right += 1;
365-
self.buf.advance_right();
366-
}
367-
368330
fn advance_left(&mut self) {
369-
let mut left_size = self.buf[self.left].size;
331+
let mut left_size = self.buf.first().unwrap().size;
370332

371333
while left_size >= 0 {
372-
let left = self.buf[self.left].token.clone();
334+
let left = self.buf.first().unwrap().token.clone();
373335

374336
let len = match left {
375337
Token::Break(b) => b.blank_space,
@@ -385,39 +347,38 @@ impl Printer {
385347

386348
self.left_total += len;
387349

388-
if self.left == self.right {
350+
self.buf.advance_left();
351+
if self.buf.is_empty() {
389352
break;
390353
}
391354

392-
self.buf.advance_left();
393-
self.left += 1;
394-
395-
left_size = self.buf[self.left].size;
355+
left_size = self.buf.first().unwrap().size;
396356
}
397357
}
398358

399-
fn check_stack(&mut self, k: usize) {
400-
if !self.scan_stack.is_empty() {
401-
let x = self.scan_top();
402-
match self.buf[x].token {
359+
fn check_stack(&mut self, mut k: usize) {
360+
while let Some(&x) = self.scan_stack.front() {
361+
let mut entry = &mut self.buf[x];
362+
match entry.token {
403363
Token::Begin(_) => {
404-
if k > 0 {
405-
self.scan_pop();
406-
self.buf[x].size += self.right_total;
407-
self.check_stack(k - 1);
364+
if k == 0 {
365+
break;
408366
}
367+
self.scan_stack.pop_front().unwrap();
368+
entry.size += self.right_total;
369+
k -= 1;
409370
}
410371
Token::End => {
411372
// paper says + not =, but that makes no sense.
412-
self.scan_pop();
413-
self.buf[x].size = 1;
414-
self.check_stack(k + 1);
373+
self.scan_stack.pop_front().unwrap();
374+
entry.size = 1;
375+
k += 1;
415376
}
416377
_ => {
417-
self.scan_pop();
418-
self.buf[x].size += self.right_total;
419-
if k > 0 {
420-
self.check_stack(k);
378+
self.scan_stack.pop_front().unwrap();
379+
entry.size += self.right_total;
380+
if k == 0 {
381+
break;
421382
}
422383
}
423384
}
@@ -477,7 +438,7 @@ impl Printer {
477438
}
478439
}
479440

480-
fn print_string(&mut self, s: Cow<'static, str>) {
441+
fn print_string(&mut self, s: &str) {
481442
let len = s.len() as isize;
482443
// assert!(len <= space);
483444
self.space -= len;
@@ -491,21 +452,21 @@ impl Printer {
491452
self.out.reserve(self.pending_indentation as usize);
492453
self.out.extend(std::iter::repeat(' ').take(self.pending_indentation as usize));
493454
self.pending_indentation = 0;
494-
self.out.push_str(&s);
455+
self.out.push_str(s);
495456
}
496457

497458
fn print(&mut self, token: Token, l: isize) {
498-
match token {
499-
Token::Begin(b) => self.print_begin(b, l),
459+
match &token {
460+
Token::Begin(b) => self.print_begin(*b, l),
500461
Token::End => self.print_end(),
501-
Token::Break(b) => self.print_break(b, l),
462+
Token::Break(b) => self.print_break(*b, l),
502463
Token::String(s) => {
503464
let len = s.len() as isize;
504465
assert_eq!(len, l);
505466
self.print_string(s);
506467
}
507-
Token::Eof => panic!(), // Eof should never get here.
508468
}
469+
self.last_printed = Some(token);
509470
}
510471

511472
// Convenience functions to talk to the printer.
@@ -560,7 +521,10 @@ impl Printer {
560521
}
561522

562523
pub fn is_beginning_of_line(&self) -> bool {
563-
self.last_token().is_eof() || self.last_token().is_hardbreak_tok()
524+
match self.last_token() {
525+
Some(last_token) => last_token.is_hardbreak_tok(),
526+
None => true,
527+
}
564528
}
565529

566530
pub fn hardbreak_tok_offset(off: isize) -> Token {

Diff for: compiler/rustc_ast_pretty/src/pp/ring.rs

+30-7
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,43 @@ impl<T> RingBuffer<T> {
2222
RingBuffer { data: VecDeque::new(), offset: 0 }
2323
}
2424

25-
pub fn advance_right(&mut self)
26-
where
27-
T: Default,
28-
{
29-
self.data.push_back(T::default());
25+
pub fn is_empty(&self) -> bool {
26+
self.data.is_empty()
27+
}
28+
29+
pub fn push(&mut self, value: T) -> usize {
30+
let index = self.offset + self.data.len();
31+
self.data.push_back(value);
32+
index
3033
}
3134

3235
pub fn advance_left(&mut self) {
3336
self.data.pop_front().unwrap();
3437
self.offset += 1;
3538
}
3639

37-
pub fn truncate(&mut self, len: usize) {
38-
self.data.truncate(len);
40+
pub fn clear(&mut self) {
41+
self.data.clear();
42+
}
43+
44+
pub fn index_of_first(&self) -> usize {
45+
self.offset
46+
}
47+
48+
pub fn first(&self) -> Option<&T> {
49+
self.data.front()
50+
}
51+
52+
pub fn first_mut(&mut self) -> Option<&mut T> {
53+
self.data.front_mut()
54+
}
55+
56+
pub fn last(&self) -> Option<&T> {
57+
self.data.back()
58+
}
59+
60+
pub fn last_mut(&mut self) -> Option<&mut T> {
61+
self.data.back_mut()
3962
}
4063
}
4164

Diff for: compiler/rustc_ast_pretty/src/pprust/state.rs

+12-8
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
328328
CommentStyle::BlankLine => {
329329
// We need to do at least one, possibly two hardbreaks.
330330
let twice = match self.last_token() {
331-
pp::Token::String(s) => ";" == s,
332-
pp::Token::Begin(_) => true,
333-
pp::Token::End => true,
331+
Some(pp::Token::String(s)) => ";" == s,
332+
Some(pp::Token::Begin(_)) => true,
333+
Some(pp::Token::End) => true,
334334
_ => false,
335335
};
336336
if twice {
@@ -686,11 +686,15 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
686686
fn break_offset_if_not_bol(&mut self, n: usize, off: isize) {
687687
if !self.is_beginning_of_line() {
688688
self.break_offset(n, off)
689-
} else if off != 0 && self.last_token().is_hardbreak_tok() {
690-
// We do something pretty sketchy here: tuck the nonzero
691-
// offset-adjustment we were going to deposit along with the
692-
// break into the previous hardbreak.
693-
self.replace_last_token(pp::Printer::hardbreak_tok_offset(off));
689+
} else if off != 0 {
690+
if let Some(last_token) = self.last_token_still_buffered() {
691+
if last_token.is_hardbreak_tok() {
692+
// We do something pretty sketchy here: tuck the nonzero
693+
// offset-adjustment we were going to deposit along with the
694+
// break into the previous hardbreak.
695+
self.replace_last_token_still_buffered(pp::Printer::hardbreak_tok_offset(off));
696+
}
697+
}
694698
}
695699
}
696700

0 commit comments

Comments
 (0)