Skip to content

Commit 71ecf5d

Browse files
committed
Auto merge of #98851 - klensy:encode_symbols, r=cjgillot
rustc_metadata: dedupe strings to prevent multiple copies in rmeta/query cache blow file size r? `@cjgillot` Encodes strings in rmeta/query cache so duplicated ones will be encoded as offsets to first strings, reducing file size.
2 parents 0b79f75 + adba469 commit 71ecf5d

File tree

5 files changed

+113
-3
lines changed

5 files changed

+113
-3
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+29
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,35 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
637637
}
638638
}
639639

640+
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Symbol {
641+
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self {
642+
let tag = d.read_u8();
643+
644+
match tag {
645+
SYMBOL_STR => {
646+
let s = d.read_str();
647+
Symbol::intern(s)
648+
}
649+
SYMBOL_OFFSET => {
650+
// read str offset
651+
let pos = d.read_usize();
652+
let old_pos = d.opaque.position();
653+
654+
// move to str ofset and read
655+
d.opaque.set_position(pos);
656+
let s = d.read_str();
657+
let sym = Symbol::intern(s);
658+
659+
// restore position
660+
d.opaque.set_position(old_pos);
661+
662+
sym
663+
}
664+
_ => unreachable!(),
665+
}
666+
}
667+
}
668+
640669
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for &'tcx [ty::abstract_const::Node<'tcx>] {
641670
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self {
642671
ty::codec::RefDecodable::decode(d)

compiler/rustc_metadata/src/rmeta/encoder.rs

+21
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use rustc_span::{
3939
};
4040
use rustc_target::abi::VariantIdx;
4141
use std::borrow::Borrow;
42+
use std::collections::hash_map::Entry;
4243
use std::hash::Hash;
4344
use std::io::{Read, Seek, Write};
4445
use std::iter;
@@ -75,6 +76,7 @@ pub(super) struct EncodeContext<'a, 'tcx> {
7576
required_source_files: Option<GrowableBitSet<usize>>,
7677
is_proc_macro: bool,
7778
hygiene_ctxt: &'a HygieneEncodeContext,
79+
symbol_table: FxHashMap<Symbol, usize>,
7880
}
7981

8082
/// If the current crate is a proc-macro, returns early with `Lazy:empty()`.
@@ -307,6 +309,24 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
307309
}
308310
}
309311

312+
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Symbol {
313+
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
314+
match s.symbol_table.entry(*self) {
315+
Entry::Vacant(o) => {
316+
s.opaque.emit_u8(SYMBOL_STR);
317+
let pos = s.opaque.position();
318+
o.insert(pos);
319+
s.emit_str(self.as_str());
320+
}
321+
Entry::Occupied(o) => {
322+
let x = o.get().clone();
323+
s.emit_u8(SYMBOL_OFFSET);
324+
s.emit_usize(x);
325+
}
326+
}
327+
}
328+
}
329+
310330
impl<'a, 'tcx> TyEncoder for EncodeContext<'a, 'tcx> {
311331
const CLEAR_CROSS_CRATE: bool = true;
312332

@@ -2259,6 +2279,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
22592279
required_source_files,
22602280
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
22612281
hygiene_ctxt: &hygiene_ctxt,
2282+
symbol_table: Default::default(),
22622283
};
22632284

22642285
// Encode the rustc version string in a predictable location.

compiler/rustc_metadata/src/rmeta/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,10 @@ const TAG_VALID_SPAN_LOCAL: u8 = 0;
445445
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
446446
const TAG_PARTIAL_SPAN: u8 = 2;
447447

448+
// Tags for encoding Symbol's
449+
const SYMBOL_STR: u8 = 0;
450+
const SYMBOL_OFFSET: u8 = 1;
451+
448452
pub fn provide(providers: &mut Providers) {
449453
encoder::provide(providers);
450454
decoder::provide(providers);

compiler/rustc_query_impl/src/on_disk_cache.rs

+57-1
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ use rustc_span::hygiene::{
2222
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
2323
};
2424
use rustc_span::source_map::{SourceMap, StableSourceFileId};
25-
use rustc_span::CachingSourceMapView;
2625
use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, SourceFile, Span};
26+
use rustc_span::{CachingSourceMapView, Symbol};
27+
use std::collections::hash_map::Entry;
2728
use std::io;
2829
use std::mem;
2930

@@ -38,6 +39,10 @@ const TAG_RELATIVE_SPAN: u8 = 2;
3839
const TAG_SYNTAX_CONTEXT: u8 = 0;
3940
const TAG_EXPN_DATA: u8 = 1;
4041

42+
// Tags for encoding Symbol's
43+
const SYMBOL_STR: u8 = 0;
44+
const SYMBOL_OFFSET: u8 = 1;
45+
4146
/// Provides an interface to incremental compilation data cached from the
4247
/// previous compilation session. This data will eventually include the results
4348
/// of a few selected queries (like `typeck` and `mir_optimized`) and
@@ -254,6 +259,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
254259
source_map: CachingSourceMapView::new(tcx.sess.source_map()),
255260
file_to_file_index,
256261
hygiene_context: &hygiene_encode_context,
262+
symbol_table: Default::default(),
257263
};
258264

259265
// Encode query results.
@@ -714,6 +720,36 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Span {
714720
}
715721
}
716722

723+
// copy&paste impl from rustc_metadata
724+
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
725+
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
726+
let tag = d.read_u8();
727+
728+
match tag {
729+
SYMBOL_STR => {
730+
let s = d.read_str();
731+
Symbol::intern(s)
732+
}
733+
SYMBOL_OFFSET => {
734+
// read str offset
735+
let pos = d.read_usize();
736+
let old_pos = d.opaque.position();
737+
738+
// move to str ofset and read
739+
d.opaque.set_position(pos);
740+
let s = d.read_str();
741+
let sym = Symbol::intern(s);
742+
743+
// restore position
744+
d.opaque.set_position(old_pos);
745+
746+
sym
747+
}
748+
_ => unreachable!(),
749+
}
750+
}
751+
}
752+
717753
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for CrateNum {
718754
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
719755
let stable_id = StableCrateId::decode(d);
@@ -815,6 +851,7 @@ pub struct CacheEncoder<'a, 'tcx> {
815851
source_map: CachingSourceMapView<'tcx>,
816852
file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
817853
hygiene_context: &'a HygieneEncodeContext,
854+
symbol_table: FxHashMap<Symbol, usize>,
818855
}
819856

820857
impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
@@ -899,6 +936,25 @@ impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Span {
899936
}
900937
}
901938

939+
// copy&paste impl from rustc_metadata
940+
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
941+
fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
942+
match s.symbol_table.entry(*self) {
943+
Entry::Vacant(o) => {
944+
s.encoder.emit_u8(SYMBOL_STR);
945+
let pos = s.encoder.position();
946+
o.insert(pos);
947+
s.emit_str(self.as_str());
948+
}
949+
Entry::Occupied(o) => {
950+
let x = o.get().clone();
951+
s.emit_u8(SYMBOL_OFFSET);
952+
s.emit_usize(x);
953+
}
954+
}
955+
}
956+
}
957+
902958
impl<'a, 'tcx> TyEncoder for CacheEncoder<'a, 'tcx> {
903959
type I = TyCtxt<'tcx>;
904960
const CLEAR_CROSS_CRATE: bool = false;

compiler/rustc_span/src/symbol.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1852,14 +1852,14 @@ impl fmt::Display for Symbol {
18521852
}
18531853

18541854
impl<S: Encoder> Encodable<S> for Symbol {
1855-
fn encode(&self, s: &mut S) {
1855+
default fn encode(&self, s: &mut S) {
18561856
s.emit_str(self.as_str());
18571857
}
18581858
}
18591859

18601860
impl<D: Decoder> Decodable<D> for Symbol {
18611861
#[inline]
1862-
fn decode(d: &mut D) -> Symbol {
1862+
default fn decode(d: &mut D) -> Symbol {
18631863
Symbol::intern(&d.read_str())
18641864
}
18651865
}

0 commit comments

Comments
 (0)