@@ -17,12 +17,25 @@ use crate::html::format::join_with_double_colon;
17
17
use crate :: html:: markdown:: short_markdown_summary;
18
18
use crate :: html:: render:: { self , IndexItem , IndexItemFunctionType , RenderType , RenderTypeId } ;
19
19
20
+ /// The serialized search description sharded version
21
+ ///
22
+ /// The `index` is a JSON-encoded list of names and other information.
23
+ ///
24
+ /// The desc has newlined descriptions, split up by size into 1MiB shards.
25
+ /// For example, `(4, "foo\nbar\nbaz\nquux")`.
26
+ pub ( crate ) struct SerializedSearchIndex {
27
+ pub ( crate ) index : String ,
28
+ pub ( crate ) desc : Vec < ( usize , String ) > ,
29
+ }
30
+
31
+ const DESC_INDEX_SHARD_LEN : usize = 1024 * 1024 ;
32
+
20
33
/// Builds the search index from the collected metadata
21
34
pub ( crate ) fn build_index < ' tcx > (
22
35
krate : & clean:: Crate ,
23
36
cache : & mut Cache ,
24
37
tcx : TyCtxt < ' tcx > ,
25
- ) -> String {
38
+ ) -> SerializedSearchIndex {
26
39
let mut itemid_to_pathid = FxHashMap :: default ( ) ;
27
40
let mut primitives = FxHashMap :: default ( ) ;
28
41
let mut associated_types = FxHashMap :: default ( ) ;
@@ -318,7 +331,6 @@ pub(crate) fn build_index<'tcx>(
318
331
. collect :: < Vec < _ > > ( ) ;
319
332
320
333
struct CrateData < ' a > {
321
- doc : String ,
322
334
items : Vec < & ' a IndexItem > ,
323
335
paths : Vec < ( ItemType , Vec < Symbol > ) > ,
324
336
// The String is alias name and the vec is the list of the elements with this alias.
@@ -327,6 +339,9 @@ pub(crate) fn build_index<'tcx>(
327
339
aliases : & ' a BTreeMap < String , Vec < usize > > ,
328
340
// Used when a type has more than one impl with an associated item with the same name.
329
341
associated_item_disambiguators : & ' a Vec < ( usize , String ) > ,
342
+ // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string
343
+ // for information on the format.
344
+ descindex : String ,
330
345
}
331
346
332
347
struct Paths {
@@ -408,7 +423,6 @@ pub(crate) fn build_index<'tcx>(
408
423
let mut names = Vec :: with_capacity ( self . items . len ( ) ) ;
409
424
let mut types = String :: with_capacity ( self . items . len ( ) ) ;
410
425
let mut full_paths = Vec :: with_capacity ( self . items . len ( ) ) ;
411
- let mut descriptions = Vec :: with_capacity ( self . items . len ( ) ) ;
412
426
let mut parents = Vec :: with_capacity ( self . items . len ( ) ) ;
413
427
let mut functions = String :: with_capacity ( self . items . len ( ) ) ;
414
428
let mut deprecated = Vec :: with_capacity ( self . items . len ( ) ) ;
@@ -431,7 +445,6 @@ pub(crate) fn build_index<'tcx>(
431
445
parents. push ( item. parent_idx . map ( |x| x + 1 ) . unwrap_or ( 0 ) ) ;
432
446
433
447
names. push ( item. name . as_str ( ) ) ;
434
- descriptions. push ( & item. desc ) ;
435
448
436
449
if !item. path . is_empty ( ) {
437
450
full_paths. push ( ( index, & item. path ) ) ;
@@ -454,14 +467,12 @@ pub(crate) fn build_index<'tcx>(
454
467
let has_aliases = !self . aliases . is_empty ( ) ;
455
468
let mut crate_data =
456
469
serializer. serialize_struct ( "CrateData" , if has_aliases { 9 } else { 8 } ) ?;
457
- crate_data. serialize_field ( "doc" , & self . doc ) ?;
458
470
crate_data. serialize_field ( "t" , & types) ?;
459
471
crate_data. serialize_field ( "n" , & names) ?;
460
- // Serialize as an array of item indices and full paths
461
472
crate_data. serialize_field ( "q" , & full_paths) ?;
462
- crate_data. serialize_field ( "d" , & descriptions) ?;
463
473
crate_data. serialize_field ( "i" , & parents) ?;
464
474
crate_data. serialize_field ( "f" , & functions) ?;
475
+ crate_data. serialize_field ( "D" , & self . descindex ) ?;
465
476
crate_data. serialize_field ( "c" , & deprecated) ?;
466
477
crate_data. serialize_field ( "p" , & paths) ?;
467
478
crate_data. serialize_field ( "b" , & self . associated_item_disambiguators ) ?;
@@ -472,24 +483,92 @@ pub(crate) fn build_index<'tcx>(
472
483
}
473
484
}
474
485
475
- // Collect the index into a string
476
- format ! (
486
+ let desc = {
487
+ let mut result = Vec :: new ( ) ;
488
+ let mut set = String :: new ( ) ;
489
+ let mut len: usize = 0 ;
490
+ for desc in std:: iter:: once ( & crate_doc) . chain ( crate_items. iter ( ) . map ( |item| & item. desc ) ) {
491
+ if set. len ( ) >= DESC_INDEX_SHARD_LEN {
492
+ result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
493
+ len = 0 ;
494
+ } else if len != 0 {
495
+ set. push ( '\n' ) ;
496
+ }
497
+ set. push_str ( & desc) ;
498
+ len += 1 ;
499
+ }
500
+ result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
501
+ result
502
+ } ;
503
+
504
+ let descindex = {
505
+ let mut descindex = String :: with_capacity ( desc. len ( ) * 4 ) ;
506
+ for & ( len, _) in desc. iter ( ) {
507
+ write_vlqhex_to_string ( len. try_into ( ) . unwrap ( ) , & mut descindex) ;
508
+ }
509
+ descindex
510
+ } ;
511
+
512
+ assert_eq ! ( crate_items. len( ) + 1 , desc. iter( ) . map( |( len, _) | * len) . sum:: <usize >( ) ) ;
513
+
514
+ // The index, which is actually used to search, is JSON
515
+ // It uses `JSON.parse(..)` to actually load, since JSON
516
+ // parses faster than the full JavaScript syntax.
517
+ let index = format ! (
477
518
r#"["{}",{}]"# ,
478
519
krate. name( tcx) ,
479
520
serde_json:: to_string( & CrateData {
480
- doc: crate_doc,
481
521
items: crate_items,
482
522
paths: crate_paths,
483
523
aliases: & aliases,
484
524
associated_item_disambiguators: & associated_item_disambiguators,
525
+ descindex,
485
526
} )
486
527
. expect( "failed serde conversion" )
487
528
// All these `replace` calls are because we have to go through JS string for JSON content.
488
529
. replace( '\\' , r"\\" )
489
530
. replace( '\'' , r"\'" )
490
531
// We need to escape double quotes for the JSON.
491
532
. replace( "\\ \" " , "\\ \\ \" " )
492
- )
533
+ ) ;
534
+ SerializedSearchIndex { index, desc }
535
+ }
536
+
537
+ pub ( crate ) fn write_vlqhex_to_string ( n : i32 , string : & mut String ) {
538
+ let ( sign, magnitude) : ( bool , u32 ) =
539
+ if n >= 0 { ( false , n. try_into ( ) . unwrap ( ) ) } else { ( true , ( -n) . try_into ( ) . unwrap ( ) ) } ;
540
+ // zig-zag encoding
541
+ let value: u32 = ( magnitude << 1 ) | ( if sign { 1 } else { 0 } ) ;
542
+ // Self-terminating hex use capital letters for everything but the
543
+ // least significant digit, which is lowercase. For example, decimal 17
544
+ // would be `` Aa `` if zig-zag encoding weren't used.
545
+ //
546
+ // Zig-zag encoding, however, stores the sign bit as the last bit.
547
+ // This means, in the last hexit, 1 is actually `c`, -1 is `b`
548
+ // (`a` is the imaginary -0), and, because all the bits are shifted
549
+ // by one, `` A` `` is actually 8 and `` Aa `` is -8.
550
+ //
551
+ // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
552
+ // describes the encoding in more detail.
553
+ let mut shift: u32 = 28 ;
554
+ let mut mask: u32 = 0xF0_00_00_00 ;
555
+ // first skip leading zeroes
556
+ while shift < 32 {
557
+ let hexit = ( value & mask) >> shift;
558
+ if hexit != 0 || shift == 0 {
559
+ break ;
560
+ }
561
+ shift = shift. wrapping_sub ( 4 ) ;
562
+ mask = mask >> 4 ;
563
+ }
564
+ // now write the rest
565
+ while shift < 32 {
566
+ let hexit = ( value & mask) >> shift;
567
+ let hex = char:: try_from ( if shift == 0 { '`' } else { '@' } as u32 + hexit) . unwrap ( ) ;
568
+ string. push ( hex) ;
569
+ shift = shift. wrapping_sub ( 4 ) ;
570
+ mask = mask >> 4 ;
571
+ }
493
572
}
494
573
495
574
pub ( crate ) fn get_function_type_for_search < ' tcx > (
0 commit comments