Skip to content

Commit 6f2fbc1

Browse files
authored
Rollup merge of #77686 - camelid:rustdoc-render-search-results, r=GuillaumeGomez
Render Markdown in search results Fixes #32040. Previously Markdown documentation was not rendered to HTML for search results, which led to the output not being very readable, particularly for inline code. This PR fixes that by rendering Markdown to HTML with the help of pulldown-cmark (the library rustdoc uses to parse Markdown for the main text of documentation). However, the text for the title attribute (the text shown when you hover over an element) still uses the plain-text rendering since it is displayed in browsers as plain-text. Only these styles will be rendered; everything else is stripped away: * *italics* * **bold** * `inline code`
2 parents 5be3f9f + 376507f commit 6f2fbc1

File tree

12 files changed

+203
-51
lines changed

12 files changed

+203
-51
lines changed

src/librustdoc/clean/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1000,7 +1000,7 @@ impl<'tcx> Clean<FnDecl> for (DefId, ty::PolyFnSig<'tcx>) {
10001000
.iter()
10011001
.map(|t| Argument {
10021002
type_: t.clean(cx),
1003-
name: names.next().map_or(String::new(), |name| name.to_string()),
1003+
name: names.next().map_or_else(|| String::new(), |name| name.to_string()),
10041004
})
10051005
.collect(),
10061006
},

src/librustdoc/formats/cache.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ use crate::config::RenderInfo;
1414
use crate::fold::DocFolder;
1515
use crate::formats::item_type::ItemType;
1616
use crate::formats::Impl;
17+
use crate::html::markdown::short_markdown_summary;
1718
use crate::html::render::cache::{extern_location, get_index_search_type, ExternalLocation};
1819
use crate::html::render::IndexItem;
19-
use crate::html::render::{plain_text_summary, shorten};
2020

2121
thread_local!(crate static CACHE_KEY: RefCell<Arc<Cache>> = Default::default());
2222

23-
/// This cache is used to store information about the `clean::Crate` being
23+
/// This cache is used to store information about the [`clean::Crate`] being
2424
/// rendered in order to provide more useful documentation. This contains
2525
/// information like all implementors of a trait, all traits a type implements,
2626
/// documentation for all known traits, etc.
@@ -313,7 +313,9 @@ impl DocFolder for Cache {
313313
ty: item.type_(),
314314
name: s.to_string(),
315315
path: path.join("::"),
316-
desc: shorten(plain_text_summary(item.doc_value())),
316+
desc: item
317+
.doc_value()
318+
.map_or_else(|| String::new(), short_markdown_summary),
317319
parent,
318320
parent_idx: None,
319321
search_type: get_index_search_type(&item),

src/librustdoc/html/markdown.rs

+88-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
//! // ... something using html
1818
//! ```
1919
20-
#![allow(non_camel_case_types)]
21-
2220
use rustc_data_structures::fx::FxHashMap;
2321
use rustc_hir::def_id::DefId;
2422
use rustc_hir::HirId;
@@ -1037,7 +1035,95 @@ impl MarkdownSummaryLine<'_> {
10371035
}
10381036
}
10391037

1038+
/// Renders a subset of Markdown in the first paragraph of the provided Markdown.
1039+
///
1040+
/// - *Italics*, **bold**, and `inline code` styles **are** rendered.
1041+
/// - Headings and links are stripped (though the text *is* rendered).
1042+
/// - HTML, code blocks, and everything else are ignored.
1043+
///
1044+
/// Returns a tuple of the rendered HTML string and whether the output was shortened
1045+
/// due to the provided `length_limit`.
1046+
fn markdown_summary_with_limit(md: &str, length_limit: usize) -> (String, bool) {
1047+
if md.is_empty() {
1048+
return (String::new(), false);
1049+
}
1050+
1051+
let mut s = String::with_capacity(md.len() * 3 / 2);
1052+
let mut text_length = 0;
1053+
let mut stopped_early = false;
1054+
1055+
fn push(s: &mut String, text_length: &mut usize, text: &str) {
1056+
s.push_str(text);
1057+
*text_length += text.len();
1058+
};
1059+
1060+
'outer: for event in Parser::new_ext(md, Options::ENABLE_STRIKETHROUGH) {
1061+
match &event {
1062+
Event::Text(text) => {
1063+
for word in text.split_inclusive(char::is_whitespace) {
1064+
if text_length + word.len() >= length_limit {
1065+
stopped_early = true;
1066+
break 'outer;
1067+
}
1068+
1069+
push(&mut s, &mut text_length, word);
1070+
}
1071+
}
1072+
Event::Code(code) => {
1073+
if text_length + code.len() >= length_limit {
1074+
stopped_early = true;
1075+
break;
1076+
}
1077+
1078+
s.push_str("<code>");
1079+
push(&mut s, &mut text_length, code);
1080+
s.push_str("</code>");
1081+
}
1082+
Event::Start(tag) => match tag {
1083+
Tag::Emphasis => s.push_str("<em>"),
1084+
Tag::Strong => s.push_str("<strong>"),
1085+
Tag::CodeBlock(..) => break,
1086+
_ => {}
1087+
},
1088+
Event::End(tag) => match tag {
1089+
Tag::Emphasis => s.push_str("</em>"),
1090+
Tag::Strong => s.push_str("</strong>"),
1091+
Tag::Paragraph => break,
1092+
_ => {}
1093+
},
1094+
Event::HardBreak | Event::SoftBreak => {
1095+
if text_length + 1 >= length_limit {
1096+
stopped_early = true;
1097+
break;
1098+
}
1099+
1100+
push(&mut s, &mut text_length, " ");
1101+
}
1102+
_ => {}
1103+
}
1104+
}
1105+
1106+
(s, stopped_early)
1107+
}
1108+
1109+
/// Renders a shortened first paragraph of the given Markdown as a subset of Markdown,
1110+
/// making it suitable for contexts like the search index.
1111+
///
1112+
/// Will shorten to 59 or 60 characters, including an ellipsis (…) if it was shortened.
1113+
///
1114+
/// See [`markdown_summary_with_limit`] for details about what is rendered and what is not.
1115+
crate fn short_markdown_summary(markdown: &str) -> String {
1116+
let (mut s, was_shortened) = markdown_summary_with_limit(markdown, 59);
1117+
1118+
if was_shortened {
1119+
s.push('…');
1120+
}
1121+
1122+
s
1123+
}
1124+
10401125
/// Renders the first paragraph of the provided markdown as plain text.
1126+
/// Useful for alt-text.
10411127
///
10421128
/// - Headings, links, and formatting are stripped.
10431129
/// - Inline code is rendered as-is, surrounded by backticks.

src/librustdoc/html/markdown/tests.rs

+32-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::plain_text_summary;
1+
use super::{plain_text_summary, short_markdown_summary};
22
use super::{ErrorCodes, IdMap, Ignore, LangString, Markdown, MarkdownHtml};
33
use rustc_span::edition::{Edition, DEFAULT_EDITION};
44
use std::cell::RefCell;
@@ -204,6 +204,33 @@ fn test_header_ids_multiple_blocks() {
204204
);
205205
}
206206

207+
#[test]
208+
fn test_short_markdown_summary() {
209+
fn t(input: &str, expect: &str) {
210+
let output = short_markdown_summary(input);
211+
assert_eq!(output, expect, "original: {}", input);
212+
}
213+
214+
t("hello [Rust](https://www.rust-lang.org) :)", "hello Rust :)");
215+
t("*italic*", "<em>italic</em>");
216+
t("**bold**", "<strong>bold</strong>");
217+
t("Multi-line\nsummary", "Multi-line summary");
218+
t("Hard-break \nsummary", "Hard-break summary");
219+
t("hello [Rust] :)\n\n[Rust]: https://www.rust-lang.org", "hello Rust :)");
220+
t("hello [Rust](https://www.rust-lang.org \"Rust\") :)", "hello Rust :)");
221+
t("code `let x = i32;` ...", "code <code>let x = i32;</code> ...");
222+
t("type `Type<'static>` ...", "type <code>Type<'static></code> ...");
223+
t("# top header", "top header");
224+
t("## header", "header");
225+
t("first paragraph\n\nsecond paragraph", "first paragraph");
226+
t("```\nfn main() {}\n```", "");
227+
t("<div>hello</div>", "");
228+
t(
229+
"a *very*, **very** long first paragraph. it has lots of `inline code: Vec<T>`. and it has a [link](https://www.rust-lang.org).\nthat was a soft line break! \nthat was a hard one\n\nsecond paragraph.",
230+
"a <em>very</em>, <strong>very</strong> long first paragraph. it has lots of …",
231+
);
232+
}
233+
207234
#[test]
208235
fn test_plain_text_summary() {
209236
fn t(input: &str, expect: &str) {
@@ -224,6 +251,10 @@ fn test_plain_text_summary() {
224251
t("first paragraph\n\nsecond paragraph", "first paragraph");
225252
t("```\nfn main() {}\n```", "");
226253
t("<div>hello</div>", "");
254+
t(
255+
"a *very*, **very** long first paragraph. it has lots of `inline code: Vec<T>`. and it has a [link](https://www.rust-lang.org).\nthat was a soft line break! \nthat was a hard one\n\nsecond paragraph.",
256+
"a very, very long first paragraph. it has lots of `inline code: Vec<T>`. and it has a link. that was a soft line break! that was a hard one",
257+
);
227258
}
228259

229260
#[test]

src/librustdoc/html/render/cache.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::clean::types::GetDefId;
99
use crate::clean::{self, AttributesExt};
1010
use crate::formats::cache::Cache;
1111
use crate::formats::item_type::ItemType;
12-
use crate::html::render::{plain_text_summary, shorten};
12+
use crate::html::markdown::short_markdown_summary;
1313
use crate::html::render::{Generic, IndexItem, IndexItemFunctionType, RenderType, TypeWithKind};
1414

1515
/// Indicates where an external crate can be found.
@@ -78,7 +78,7 @@ crate fn build_index(krate: &clean::Crate, cache: &mut Cache) -> String {
7878
ty: item.type_(),
7979
name: item.name.clone().unwrap(),
8080
path: fqp[..fqp.len() - 1].join("::"),
81-
desc: shorten(plain_text_summary(item.doc_value())),
81+
desc: item.doc_value().map_or_else(|| String::new(), short_markdown_summary),
8282
parent: Some(did),
8383
parent_idx: None,
8484
search_type: get_index_search_type(&item),
@@ -127,7 +127,7 @@ crate fn build_index(krate: &clean::Crate, cache: &mut Cache) -> String {
127127
let crate_doc = krate
128128
.module
129129
.as_ref()
130-
.map(|module| shorten(plain_text_summary(module.doc_value())))
130+
.map(|module| module.doc_value().map_or_else(|| String::new(), short_markdown_summary))
131131
.unwrap_or_default();
132132

133133
#[derive(Serialize)]

src/librustdoc/html/render/mod.rs

+7-34
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ use crate::html::format::fmt_impl_for_trait_page;
7676
use crate::html::format::Function;
7777
use crate::html::format::{href, print_default_space, print_generic_bounds, WhereClause};
7878
use crate::html::format::{print_abi_with_space, Buffer, PrintWithSpace};
79-
use crate::html::markdown::{self, ErrorCodes, IdMap, Markdown, MarkdownHtml, MarkdownSummaryLine};
79+
use crate::html::markdown::{
80+
self, plain_text_summary, ErrorCodes, IdMap, Markdown, MarkdownHtml, MarkdownSummaryLine,
81+
};
8082
use crate::html::sources;
8183
use crate::html::{highlight, layout, static_files};
8284
use cache::{build_index, ExternalLocation};
@@ -1604,9 +1606,10 @@ impl Context {
16041606
Some(ref s) => s.to_string(),
16051607
};
16061608
let short = short.to_string();
1607-
map.entry(short)
1608-
.or_default()
1609-
.push((myname, Some(plain_text_summary(item.doc_value()))));
1609+
map.entry(short).or_default().push((
1610+
myname,
1611+
Some(item.doc_value().map_or_else(|| String::new(), plain_text_summary)),
1612+
));
16101613
}
16111614

16121615
if self.shared.sort_modules_alphabetically {
@@ -1810,36 +1813,6 @@ fn full_path(cx: &Context, item: &clean::Item) -> String {
18101813
s
18111814
}
18121815

1813-
/// Renders the first paragraph of the given markdown as plain text, making it suitable for
1814-
/// contexts like alt-text or the search index.
1815-
///
1816-
/// If no markdown is supplied, the empty string is returned.
1817-
///
1818-
/// See [`markdown::plain_text_summary`] for further details.
1819-
#[inline]
1820-
crate fn plain_text_summary(s: Option<&str>) -> String {
1821-
s.map(markdown::plain_text_summary).unwrap_or_default()
1822-
}
1823-
1824-
crate fn shorten(s: String) -> String {
1825-
if s.chars().count() > 60 {
1826-
let mut len = 0;
1827-
let mut ret = s
1828-
.split_whitespace()
1829-
.take_while(|p| {
1830-
// + 1 for the added character after the word.
1831-
len += p.chars().count() + 1;
1832-
len < 60
1833-
})
1834-
.collect::<Vec<_>>()
1835-
.join(" ");
1836-
ret.push('…');
1837-
ret
1838-
} else {
1839-
s
1840-
}
1841-
}
1842-
18431816
fn document(w: &mut Buffer, cx: &Context, item: &clean::Item, parent: Option<&clean::Item>) {
18441817
if let Some(ref name) = item.name {
18451818
info!("Documenting {}", name);

src/librustdoc/html/static/main.js

+21-2
Original file line numberDiff line numberDiff line change
@@ -1611,7 +1611,7 @@ function defocusSearchBar() {
16111611
item.displayPath + "<span class=\"" + type + "\">" +
16121612
name + "</span></a></td><td>" +
16131613
"<a href=\"" + item.href + "\">" +
1614-
"<span class=\"desc\">" + escape(item.desc) +
1614+
"<span class=\"desc\">" + item.desc +
16151615
"&nbsp;</span></a></td></tr>";
16161616
});
16171617
output += "</table>";
@@ -2013,7 +2013,9 @@ function defocusSearchBar() {
20132013
}
20142014
var link = document.createElement("a");
20152015
link.href = rootPath + crates[i] + "/index.html";
2016-
link.title = rawSearchIndex[crates[i]].doc;
2016+
// The summary in the search index has HTML, so we need to
2017+
// dynamically render it as plaintext.
2018+
link.title = convertHTMLToPlaintext(rawSearchIndex[crates[i]].doc);
20172019
link.className = klass;
20182020
link.textContent = crates[i];
20192021

@@ -2026,6 +2028,23 @@ function defocusSearchBar() {
20262028
}
20272029
};
20282030

2031+
/**
2032+
* Convert HTML to plaintext:
2033+
*
2034+
* * Replace "<code>foo</code>" with "`foo`"
2035+
* * Strip all other HTML tags
2036+
*
2037+
* Used by the dynamic sidebar crate list renderer.
2038+
*
2039+
* @param {[string]} html [The HTML to convert]
2040+
* @return {[string]} [The resulting plaintext]
2041+
*/
2042+
function convertHTMLToPlaintext(html) {
2043+
var x = document.createElement("div");
2044+
x.innerHTML = html.replace('<code>', '`').replace('</code>', '`');
2045+
return x.innerText;
2046+
}
2047+
20292048

20302049
// delayed sidebar rendering.
20312050
window.initSidebarItems = function(items) {

src/librustdoc/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#![feature(never_type)]
1616
#![feature(once_cell)]
1717
#![feature(type_ascription)]
18+
#![feature(split_inclusive)]
1819
#![recursion_limit = "256"]
1920

2021
#[macro_use]

src/test/rustdoc-js/basic.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
/// Foo
1+
/// Docs for Foo
22
pub struct Foo;

src/test/rustdoc-js/summaries.js

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// ignore-tidy-linelength
2+
3+
const QUERY = ['summaries', 'summaries::Sidebar', 'summaries::Sidebar2'];
4+
5+
const EXPECTED = [
6+
{
7+
'others': [
8+
{ 'path': '', 'name': 'summaries', 'desc': 'This <em>summary</em> has a link and <code>code</code>.' },
9+
],
10+
},
11+
{
12+
'others': [
13+
{ 'path': 'summaries', 'name': 'Sidebar', 'desc': 'This <code>code</code> will be rendered in a code tag.' },
14+
],
15+
},
16+
{
17+
'others': [
18+
{ 'path': 'summaries', 'name': 'Sidebar2', 'desc': '' },
19+
],
20+
},
21+
];

src/test/rustdoc-js/summaries.rs

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#![crate_type = "lib"]
2+
#![crate_name = "summaries"]
3+
4+
//! This *summary* has a [link] and `code`.
5+
//!
6+
//! This is the second paragraph.
7+
//!
8+
//! [link]: https://example.com
9+
10+
/// This `code` will be rendered in a code tag.
11+
///
12+
/// This text should not be rendered.
13+
pub struct Sidebar;
14+
15+
/// ```text
16+
/// this block should not be rendered
17+
/// ```
18+
pub struct Sidebar2;

0 commit comments

Comments
 (0)