Skip to content

Commit fd85ca0

Browse files
committedJan 2, 2021
Auto merge of #80550 - bugadani:markdown-refactor, r=jyn514
Cleanup markdown span handling, take 2 This PR includes the cleanups made in #80244 except for the removal of `locate()`. While the biggest conceptual part in #80244 was the removal of `locate()`, it introduced a diagnostic regression. Additional cleanup: - Use `RefCell` to avoid building two separate vectors for the links Work to do: - [ ] Decide if `locate()` can be simplified by assuming `s` is always in `md` - [ ] Should probably add some tests that still provide the undesired diagnostics causing #80381 cc `@jyn514` This is the best I can do without patching Pulldown to provide multiple ranges for reference-style links. Also, since `locate` is probably more efficient than `rfind` (at least it's constant time), I decided to not check the link type and just cover every &str as it was before.
2 parents 90ccf4f + f073543 commit fd85ca0

File tree

2 files changed

+111
-103
lines changed

2 files changed

+111
-103
lines changed
 

‎src/librustdoc/html/markdown.rs

+84-71
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use rustc_session::lint;
2525
use rustc_span::edition::Edition;
2626
use rustc_span::Span;
2727
use std::borrow::Cow;
28+
use std::cell::RefCell;
2829
use std::collections::VecDeque;
2930
use std::default::Default;
3031
use std::fmt::Write;
@@ -414,11 +415,13 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
414415
}
415416
}
416417

418+
type SpannedEvent<'a> = (Event<'a>, Range<usize>);
419+
417420
/// Make headings links with anchor IDs and build up TOC.
418421
struct HeadingLinks<'a, 'b, 'ids, I> {
419422
inner: I,
420423
toc: Option<&'b mut TocBuilder>,
421-
buf: VecDeque<Event<'a>>,
424+
buf: VecDeque<SpannedEvent<'a>>,
422425
id_map: &'ids mut IdMap,
423426
}
424427

@@ -428,48 +431,48 @@ impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> {
428431
}
429432
}
430433

431-
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a, 'b, 'ids, I> {
432-
type Item = Event<'a>;
434+
impl<'a, 'b, 'ids, I: Iterator<Item = SpannedEvent<'a>>> Iterator
435+
for HeadingLinks<'a, 'b, 'ids, I>
436+
{
437+
type Item = SpannedEvent<'a>;
433438

434439
fn next(&mut self) -> Option<Self::Item> {
435440
if let Some(e) = self.buf.pop_front() {
436441
return Some(e);
437442
}
438443

439444
let event = self.inner.next();
440-
if let Some(Event::Start(Tag::Heading(level))) = event {
445+
if let Some((Event::Start(Tag::Heading(level)), _)) = event {
441446
let mut id = String::new();
442447
for event in &mut self.inner {
443-
match &event {
448+
match &event.0 {
444449
Event::End(Tag::Heading(..)) => break,
450+
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
445451
Event::Text(text) | Event::Code(text) => {
446452
id.extend(text.chars().filter_map(slugify));
453+
self.buf.push_back(event);
447454
}
448-
_ => {}
449-
}
450-
match event {
451-
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
452-
event => self.buf.push_back(event),
455+
_ => self.buf.push_back(event),
453456
}
454457
}
455458
let id = self.id_map.derive(id);
456459

457460
if let Some(ref mut builder) = self.toc {
458461
let mut html_header = String::new();
459-
html::push_html(&mut html_header, self.buf.iter().cloned());
462+
html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone()));
460463
let sec = builder.push(level as u32, html_header, id.clone());
461-
self.buf.push_front(Event::Html(format!("{} ", sec).into()));
464+
self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0));
462465
}
463466

464-
self.buf.push_back(Event::Html(format!("</a></h{}>", level).into()));
467+
self.buf.push_back((Event::Html(format!("</a></h{}>", level).into()), 0..0));
465468

466469
let start_tags = format!(
467470
"<h{level} id=\"{id}\" class=\"section-header\">\
468471
<a href=\"#{id}\">",
469472
id = id,
470473
level = level
471474
);
472-
return Some(Event::Html(start_tags.into()));
475+
return Some((Event::Html(start_tags.into()), 0..0));
473476
}
474477
event
475478
}
@@ -555,23 +558,23 @@ impl<'a, I> Footnotes<'a, I> {
555558
}
556559
}
557560

558-
impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
559-
type Item = Event<'a>;
561+
impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for Footnotes<'a, I> {
562+
type Item = SpannedEvent<'a>;
560563

561564
fn next(&mut self) -> Option<Self::Item> {
562565
loop {
563566
match self.inner.next() {
564-
Some(Event::FootnoteReference(ref reference)) => {
567+
Some((Event::FootnoteReference(ref reference), range)) => {
565568
let entry = self.get_entry(&reference);
566569
let reference = format!(
567570
"<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>",
568571
(*entry).1
569572
);
570-
return Some(Event::Html(reference.into()));
573+
return Some((Event::Html(reference.into()), range));
571574
}
572-
Some(Event::Start(Tag::FootnoteDefinition(def))) => {
575+
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
573576
let mut content = Vec::new();
574-
for event in &mut self.inner {
577+
for (event, _) in &mut self.inner {
575578
if let Event::End(Tag::FootnoteDefinition(..)) = event {
576579
break;
577580
}
@@ -602,7 +605,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
602605
ret.push_str("</li>");
603606
}
604607
ret.push_str("</ol></div>");
605-
return Some(Event::Html(ret.into()));
608+
return Some((Event::Html(ret.into()), 0..0));
606609
} else {
607610
return None;
608611
}
@@ -912,13 +915,14 @@ impl Markdown<'_> {
912915
};
913916

914917
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
918+
let p = p.into_offset_iter();
915919

916920
let mut s = String::with_capacity(md.len() * 3 / 2);
917921

918922
let p = HeadingLinks::new(p, None, &mut ids);
919-
let p = LinkReplacer::new(p, links);
920-
let p = CodeBlocks::new(p, codes, edition, playground);
921923
let p = Footnotes::new(p);
924+
let p = LinkReplacer::new(p.map(|(ev, _)| ev), links);
925+
let p = CodeBlocks::new(p, codes, edition, playground);
922926
html::push_html(&mut s, p);
923927

924928
s
@@ -929,16 +933,16 @@ impl MarkdownWithToc<'_> {
929933
crate fn into_string(self) -> String {
930934
let MarkdownWithToc(md, mut ids, codes, edition, playground) = self;
931935

932-
let p = Parser::new_ext(md, opts());
936+
let p = Parser::new_ext(md, opts()).into_offset_iter();
933937

934938
let mut s = String::with_capacity(md.len() * 3 / 2);
935939

936940
let mut toc = TocBuilder::new();
937941

938942
{
939943
let p = HeadingLinks::new(p, Some(&mut toc), &mut ids);
940-
let p = CodeBlocks::new(p, codes, edition, playground);
941944
let p = Footnotes::new(p);
945+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
942946
html::push_html(&mut s, p);
943947
}
944948

@@ -954,19 +958,19 @@ impl MarkdownHtml<'_> {
954958
if md.is_empty() {
955959
return String::new();
956960
}
957-
let p = Parser::new_ext(md, opts());
961+
let p = Parser::new_ext(md, opts()).into_offset_iter();
958962

959963
// Treat inline HTML as plain text.
960-
let p = p.map(|event| match event {
961-
Event::Html(text) => Event::Text(text),
964+
let p = p.map(|event| match event.0 {
965+
Event::Html(text) => (Event::Text(text), event.1),
962966
_ => event,
963967
});
964968

965969
let mut s = String::with_capacity(md.len() * 3 / 2);
966970

967971
let p = HeadingLinks::new(p, None, &mut ids);
968-
let p = CodeBlocks::new(p, codes, edition, playground);
969972
let p = Footnotes::new(p);
973+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
970974
html::push_html(&mut s, p);
971975

972976
s
@@ -1119,56 +1123,65 @@ crate fn plain_text_summary(md: &str) -> String {
11191123
s
11201124
}
11211125

1122-
crate fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
1126+
crate fn markdown_links(md: &str) -> Vec<(String, Range<usize>)> {
11231127
if md.is_empty() {
11241128
return vec![];
11251129
}
11261130

1127-
let mut links = vec![];
1128-
let mut shortcut_links = vec![];
1129-
1130-
{
1131-
let locate = |s: &str| unsafe {
1132-
let s_start = s.as_ptr();
1133-
let s_end = s_start.add(s.len());
1134-
let md_start = md.as_ptr();
1135-
let md_end = md_start.add(md.len());
1136-
if md_start <= s_start && s_end <= md_end {
1137-
let start = s_start.offset_from(md_start) as usize;
1138-
let end = s_end.offset_from(md_start) as usize;
1139-
Some(start..end)
1140-
} else {
1141-
None
1142-
}
1143-
};
1131+
let links = RefCell::new(vec![]);
1132+
1133+
// FIXME: remove this function once pulldown_cmark can provide spans for link definitions.
1134+
let locate = |s: &str, fallback: Range<usize>| unsafe {
1135+
let s_start = s.as_ptr();
1136+
let s_end = s_start.add(s.len());
1137+
let md_start = md.as_ptr();
1138+
let md_end = md_start.add(md.len());
1139+
if md_start <= s_start && s_end <= md_end {
1140+
let start = s_start.offset_from(md_start) as usize;
1141+
let end = s_end.offset_from(md_start) as usize;
1142+
start..end
1143+
} else {
1144+
fallback
1145+
}
1146+
};
1147+
1148+
let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1149+
// For diagnostics, we want to underline the link's definition but `span` will point at
1150+
// where the link is used. This is a problem for reference-style links, where the definition
1151+
// is separate from the usage.
1152+
match link {
1153+
// `Borrowed` variant means the string (the link's destination) may come directly from
1154+
// the markdown text and we can locate the original link destination.
1155+
// NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources,
1156+
// so `locate()` can fall back to use `span`.
1157+
CowStr::Borrowed(s) => locate(s, span),
1158+
1159+
// For anything else, we can only use the provided range.
1160+
CowStr::Boxed(_) | CowStr::Inlined(_) => span,
1161+
}
1162+
};
11441163

1145-
let mut push = |link: BrokenLink<'_>| {
1146-
// FIXME: use `link.span` instead of `locate`
1147-
// (doing it now includes the `[]` as well as the text)
1148-
shortcut_links.push((link.reference.to_owned(), locate(link.reference)));
1149-
None
1150-
};
1151-
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
1152-
1153-
// There's no need to thread an IdMap through to here because
1154-
// the IDs generated aren't going to be emitted anywhere.
1155-
let mut ids = IdMap::new();
1156-
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
1157-
1158-
for ev in iter {
1159-
if let Event::Start(Tag::Link(_, dest, _)) = ev {
1160-
debug!("found link: {}", dest);
1161-
links.push(match dest {
1162-
CowStr::Borrowed(s) => (s.to_owned(), locate(s)),
1163-
s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None),
1164-
});
1165-
}
1164+
let mut push = |link: BrokenLink<'_>| {
1165+
let span = span_for_link(&CowStr::Borrowed(link.reference), link.span);
1166+
links.borrow_mut().push((link.reference.to_owned(), span));
1167+
None
1168+
};
1169+
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter();
1170+
1171+
// There's no need to thread an IdMap through to here because
1172+
// the IDs generated aren't going to be emitted anywhere.
1173+
let mut ids = IdMap::new();
1174+
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
1175+
1176+
for ev in iter {
1177+
if let Event::Start(Tag::Link(_, dest, _)) = ev.0 {
1178+
debug!("found link: {}", dest);
1179+
let span = span_for_link(&dest, ev.1);
1180+
links.borrow_mut().push((dest.into_string(), span));
11661181
}
11671182
}
11681183

1169-
links.append(&mut shortcut_links);
1170-
1171-
links
1184+
links.into_inner()
11721185
}
11731186

11741187
#[derive(Debug)]

‎src/librustdoc/passes/collect_intra_doc_links.rs

+27-32
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ struct DiagnosticInfo<'a> {
245245
item: &'a Item,
246246
dox: &'a str,
247247
ori_link: &'a str,
248-
link_range: Option<Range<usize>>,
248+
link_range: Range<usize>,
249249
}
250250

251251
#[derive(Clone, Debug, Hash)]
@@ -960,7 +960,7 @@ impl LinkCollector<'_, '_> {
960960
parent_node: Option<DefId>,
961961
krate: CrateNum,
962962
ori_link: String,
963-
link_range: Option<Range<usize>>,
963+
link_range: Range<usize>,
964964
) -> Option<ItemLink> {
965965
trace!("considering link '{}'", ori_link);
966966

@@ -1606,7 +1606,7 @@ fn report_diagnostic(
16061606
msg: &str,
16071607
item: &Item,
16081608
dox: &str,
1609-
link_range: &Option<Range<usize>>,
1609+
link_range: &Range<usize>,
16101610
decorate: impl FnOnce(&mut DiagnosticBuilder<'_>, Option<rustc_span::Span>),
16111611
) {
16121612
let hir_id = match cx.as_local_hir_id(item.def_id) {
@@ -1624,31 +1624,27 @@ fn report_diagnostic(
16241624
cx.tcx.struct_span_lint_hir(lint, hir_id, sp, |lint| {
16251625
let mut diag = lint.build(msg);
16261626

1627-
let span = link_range
1628-
.as_ref()
1629-
.and_then(|range| super::source_span_for_markdown_range(cx, dox, range, attrs));
1627+
let span = super::source_span_for_markdown_range(cx, dox, link_range, attrs);
16301628

1631-
if let Some(link_range) = link_range {
1632-
if let Some(sp) = span {
1633-
diag.set_span(sp);
1634-
} else {
1635-
// blah blah blah\nblah\nblah [blah] blah blah\nblah blah
1636-
// ^ ~~~~
1637-
// | link_range
1638-
// last_new_line_offset
1639-
let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1);
1640-
let line = dox[last_new_line_offset..].lines().next().unwrap_or("");
1641-
1642-
// Print the line containing the `link_range` and manually mark it with '^'s.
1643-
diag.note(&format!(
1644-
"the link appears in this line:\n\n{line}\n\
1629+
if let Some(sp) = span {
1630+
diag.set_span(sp);
1631+
} else {
1632+
// blah blah blah\nblah\nblah [blah] blah blah\nblah blah
1633+
// ^ ~~~~
1634+
// | link_range
1635+
// last_new_line_offset
1636+
let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1);
1637+
let line = dox[last_new_line_offset..].lines().next().unwrap_or("");
1638+
1639+
// Print the line containing the `link_range` and manually mark it with '^'s.
1640+
diag.note(&format!(
1641+
"the link appears in this line:\n\n{line}\n\
16451642
{indicator: <before$}{indicator:^<found$}",
1646-
line = line,
1647-
indicator = "",
1648-
before = link_range.start - last_new_line_offset,
1649-
found = link_range.len(),
1650-
));
1651-
}
1643+
line = line,
1644+
indicator = "",
1645+
before = link_range.start - last_new_line_offset,
1646+
found = link_range.len(),
1647+
));
16521648
}
16531649

16541650
decorate(&mut diag, span);
@@ -1668,7 +1664,7 @@ fn resolution_failure(
16681664
path_str: &str,
16691665
disambiguator: Option<Disambiguator>,
16701666
dox: &str,
1671-
link_range: Option<Range<usize>>,
1667+
link_range: Range<usize>,
16721668
kinds: SmallVec<[ResolutionFailure<'_>; 3]>,
16731669
) {
16741670
let tcx = collector.cx.tcx;
@@ -1892,7 +1888,7 @@ fn anchor_failure(
18921888
item: &Item,
18931889
path_str: &str,
18941890
dox: &str,
1895-
link_range: Option<Range<usize>>,
1891+
link_range: Range<usize>,
18961892
failure: AnchorFailure,
18971893
) {
18981894
let msg = match failure {
@@ -1917,7 +1913,7 @@ fn ambiguity_error(
19171913
item: &Item,
19181914
path_str: &str,
19191915
dox: &str,
1920-
link_range: Option<Range<usize>>,
1916+
link_range: Range<usize>,
19211917
candidates: Vec<Res>,
19221918
) {
19231919
let mut msg = format!("`{}` is ", path_str);
@@ -1966,13 +1962,12 @@ fn suggest_disambiguator(
19661962
path_str: &str,
19671963
dox: &str,
19681964
sp: Option<rustc_span::Span>,
1969-
link_range: &Option<Range<usize>>,
1965+
link_range: &Range<usize>,
19701966
) {
19711967
let suggestion = disambiguator.suggestion();
19721968
let help = format!("to link to the {}, {}", disambiguator.descr(), suggestion.descr());
19731969

19741970
if let Some(sp) = sp {
1975-
let link_range = link_range.as_ref().expect("must have a link range if we have a span");
19761971
let msg = if dox.bytes().nth(link_range.start) == Some(b'`') {
19771972
format!("`{}`", suggestion.as_help(path_str))
19781973
} else {
@@ -1991,7 +1986,7 @@ fn privacy_error(
19911986
item: &Item,
19921987
path_str: &str,
19931988
dox: &str,
1994-
link_range: Option<Range<usize>>,
1989+
link_range: Range<usize>,
19951990
) {
19961991
let sym;
19971992
let item_name = match item.name {

0 commit comments

Comments
 (0)
Please sign in to comment.