From ac20ded1f8af6a35edf27059a6ee215d61666fef Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Sun, 1 Mar 2015 15:09:58 +0900 Subject: [PATCH 01/12] metadata: Avoid the use of raw `wr_str` or `write_all`. They are, with a conjunction of `start_tag` and `end_tag`, commonly used to write a document with a binary data of known size. However the use of `start_tag` makes the length always 4 bytes long, which is almost not optimal (requiring the relaxation step to remedy). Directly using `wr_tagged_*` methods is better for both readability and resulting metadata size. --- src/librustc/metadata/encoder.rs | 221 ++++++++++--------------------- 1 file changed, 70 insertions(+), 151 deletions(-) diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index ee2745ca66bc5..7c28f0e17b5e4 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -121,9 +121,7 @@ fn encode_trait_ref<'a, 'tcx>(rbml_w: &mut Encoder, // Item info table encoding fn encode_family(rbml_w: &mut Encoder, c: char) { - rbml_w.start_tag(tag_items_data_item_family); - rbml_w.writer.write_all(&[c as u8]); - rbml_w.end_tag(); + rbml_w.wr_tagged_u8(tag_items_data_item_family, c as u8); } pub fn def_to_string(did: DefId) -> String { @@ -157,14 +155,9 @@ fn encode_bounds_and_type<'a, 'tcx>(rbml_w: &mut Encoder, } fn encode_variant_id(rbml_w: &mut Encoder, vid: DefId) { - rbml_w.start_tag(tag_items_data_item_variant); let s = def_to_string(vid); - rbml_w.writer.write_all(s.as_bytes()); - rbml_w.end_tag(); - - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&s[..]); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_items_data_item_variant, &s[..]); + rbml_w.wr_tagged_str(tag_mod_child, &s[..]); } pub fn write_closure_type<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, @@ -265,34 +258,26 @@ fn encode_method_fty<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, fn encode_symbol(ecx: &EncodeContext, rbml_w: &mut Encoder, id: NodeId) { - rbml_w.start_tag(tag_items_data_item_symbol); match ecx.item_symbols.borrow().get(&id) { Some(x) => { debug!("encode_symbol(id={}, str={})", id, *x); - rbml_w.writer.write_all(x.as_bytes()); + rbml_w.wr_tagged_str(tag_items_data_item_symbol, x); } None => { ecx.diag.handler().bug( &format!("encode_symbol: id not found {}", id)); } } - rbml_w.end_tag(); } fn encode_disr_val(_: &EncodeContext, rbml_w: &mut Encoder, disr_val: ty::Disr) { - rbml_w.start_tag(tag_disr_val); - let s = disr_val.to_string(); - rbml_w.writer.write_all(s.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_disr_val, &disr_val.to_string()); } fn encode_parent_item(rbml_w: &mut Encoder, id: DefId) { - rbml_w.start_tag(tag_items_data_parent_item); - let s = def_to_string(id); - rbml_w.writer.write_all(s.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_items_data_parent_item, &def_to_string(id)); } fn encode_struct_fields(rbml_w: &mut Encoder, @@ -307,10 +292,7 @@ fn encode_struct_fields(rbml_w: &mut Encoder, } encode_struct_field_family(rbml_w, f.vis); encode_def_id(rbml_w, f.id); - rbml_w.start_tag(tag_item_field_origin); - let s = def_to_string(origin); - rbml_w.writer.write_all(s.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_item_field_origin, &def_to_string(origin)); rbml_w.end_tag(); } } @@ -393,14 +375,11 @@ fn encode_reexported_static_method(rbml_w: &mut Encoder, debug!("(encode reexported static method) {}::{}", exp.name, token::get_name(method_name)); rbml_w.start_tag(tag_items_data_item_reexport); - rbml_w.start_tag(tag_items_data_item_reexport_def_id); - rbml_w.wr_str(&def_to_string(method_def_id)); - rbml_w.end_tag(); - rbml_w.start_tag(tag_items_data_item_reexport_name); - rbml_w.wr_str(&format!("{}::{}", - exp.name, - token::get_name(method_name))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_items_data_item_reexport_def_id, + &def_to_string(method_def_id)); + rbml_w.wr_tagged_str(tag_items_data_item_reexport_name, + &format!("{}::{}", exp.name, + token::get_name(method_name))); rbml_w.end_tag(); } @@ -536,12 +515,10 @@ fn encode_reexports(ecx: &EncodeContext, exp.def_id.node, id); rbml_w.start_tag(tag_items_data_item_reexport); - rbml_w.start_tag(tag_items_data_item_reexport_def_id); - rbml_w.wr_str(&def_to_string(exp.def_id)); - rbml_w.end_tag(); - rbml_w.start_tag(tag_items_data_item_reexport_name); - rbml_w.wr_str(exp.name.as_str()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_items_data_item_reexport_def_id, + &def_to_string(exp.def_id)); + rbml_w.wr_tagged_str(tag_items_data_item_reexport_name, + exp.name.as_str()); rbml_w.end_tag(); encode_reexported_static_methods(ecx, rbml_w, path.clone(), exp); } @@ -569,15 +546,12 @@ fn encode_info_for_mod(ecx: &EncodeContext, // Encode info about all the module children. for item in &md.items { - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&def_to_string(local_def(item.id))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_child, + &def_to_string(local_def(item.id))); each_auxiliary_node_id(&**item, |auxiliary_node_id| { - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&def_to_string(local_def( - auxiliary_node_id))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_child, + &def_to_string(local_def(auxiliary_node_id))); true }); @@ -587,9 +561,8 @@ fn encode_info_for_mod(ecx: &EncodeContext, token::get_ident(ident), did, ecx.tcx.map.node_to_string(did)); - rbml_w.start_tag(tag_mod_impl); - rbml_w.wr_str(&def_to_string(local_def(did))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_impl, + &def_to_string(local_def(did))); } } @@ -618,67 +591,56 @@ fn encode_struct_field_family(rbml_w: &mut Encoder, } fn encode_visibility(rbml_w: &mut Encoder, visibility: ast::Visibility) { - rbml_w.start_tag(tag_items_data_item_visibility); let ch = match visibility { ast::Public => 'y', ast::Inherited => 'i', }; - rbml_w.wr_str(&ch.to_string()); - rbml_w.end_tag(); + rbml_w.wr_tagged_u8(tag_items_data_item_visibility, ch as u8); } fn encode_explicit_self(rbml_w: &mut Encoder, explicit_self: &ty::ExplicitSelfCategory) { - rbml_w.start_tag(tag_item_trait_method_explicit_self); + let tag = tag_item_trait_method_explicit_self; // Encode the base self type. match *explicit_self { ty::StaticExplicitSelfCategory => { - rbml_w.writer.write_all(&[ 's' as u8 ]); + rbml_w.wr_tagged_bytes(tag, &['s' as u8]); } ty::ByValueExplicitSelfCategory => { - rbml_w.writer.write_all(&[ 'v' as u8 ]); + rbml_w.wr_tagged_bytes(tag, &['v' as u8]); } ty::ByBoxExplicitSelfCategory => { - rbml_w.writer.write_all(&[ '~' as u8 ]); + rbml_w.wr_tagged_bytes(tag, &['~' as u8]); } ty::ByReferenceExplicitSelfCategory(_, m) => { // FIXME(#4846) encode custom lifetime - rbml_w.writer.write_all(&['&' as u8]); - encode_mutability(rbml_w, m); + let ch = encode_mutability(m); + rbml_w.wr_tagged_bytes(tag, &['&' as u8, ch]); } } - rbml_w.end_tag(); - - fn encode_mutability(rbml_w: &mut Encoder, - m: ast::Mutability) { + fn encode_mutability(m: ast::Mutability) -> u8 { match m { - ast::MutImmutable => { rbml_w.writer.write_all(&[ 'i' as u8 ]); } - ast::MutMutable => { rbml_w.writer.write_all(&[ 'm' as u8 ]); } + ast::MutImmutable => 'i' as u8, + ast::MutMutable => 'm' as u8, } } } fn encode_item_sort(rbml_w: &mut Encoder, sort: char) { - rbml_w.start_tag(tag_item_trait_item_sort); - rbml_w.writer.write_all(&[ sort as u8 ]); - rbml_w.end_tag(); + rbml_w.wr_tagged_u8(tag_item_trait_item_sort, sort as u8); } fn encode_parent_sort(rbml_w: &mut Encoder, sort: char) { - rbml_w.start_tag(tag_item_trait_parent_sort); - rbml_w.writer.write_all(&[ sort as u8 ]); - rbml_w.end_tag(); + rbml_w.wr_tagged_u8(tag_item_trait_parent_sort, sort as u8); } fn encode_provided_source(rbml_w: &mut Encoder, source_opt: Option) { if let Some(source) = source_opt { - rbml_w.start_tag(tag_item_method_provided_source); - let s = def_to_string(source); - rbml_w.writer.write_all(s.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_item_method_provided_source, + &def_to_string(source)); } } @@ -747,8 +709,7 @@ fn encode_info_for_struct_ctor(ecx: &EncodeContext, // indicate that this is a tuple struct ctor, because downstream users will normally want // the tuple struct definition, but without this there is no way for them to tell that // they actually have a ctor rather than a normal function - rbml_w.start_tag(tag_items_data_item_is_tuple_struct_ctor); - rbml_w.end_tag(); + rbml_w.wr_tagged_bytes(tag_items_data_item_is_tuple_struct_ctor, &[]); rbml_w.end_tag(); } @@ -919,12 +880,13 @@ fn encode_method_argument_names(rbml_w: &mut Encoder, decl: &ast::FnDecl) { rbml_w.start_tag(tag_method_argument_names); for arg in &decl.inputs { - rbml_w.start_tag(tag_method_argument_name); + let tag = tag_method_argument_name; if let ast::PatIdent(_, ref path1, _) = arg.pat.node { let name = token::get_ident(path1.node); - rbml_w.writer.write_all(name.as_bytes()); + rbml_w.wr_tagged_bytes(tag, name.as_bytes()); + } else { + rbml_w.wr_tagged_bytes(tag, &[]); } - rbml_w.end_tag(); } rbml_w.end_tag(); } @@ -1091,9 +1053,8 @@ fn encode_info_for_item(ecx: &EncodeContext, // Encode all the items in this module. for foreign_item in &fm.items { - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&def_to_string(local_def(foreign_item.id))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_child, + &def_to_string(local_def(foreign_item.id))); } encode_visibility(rbml_w, vis); encode_stability(rbml_w, stab); @@ -1339,9 +1300,8 @@ fn encode_info_for_item(ecx: &EncodeContext, } rbml_w.end_tag(); - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&def_to_string(method_def_id.def_id())); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_child, + &def_to_string(method_def_id.def_id())); } encode_path(rbml_w, path.clone()); @@ -1642,21 +1602,15 @@ fn encode_meta_item(rbml_w: &mut Encoder, mi: &ast::MetaItem) { match mi.node { ast::MetaWord(ref name) => { rbml_w.start_tag(tag_meta_item_word); - rbml_w.start_tag(tag_meta_item_name); - rbml_w.writer.write_all(name.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_meta_item_name, name); rbml_w.end_tag(); } ast::MetaNameValue(ref name, ref value) => { match value.node { ast::LitStr(ref value, _) => { rbml_w.start_tag(tag_meta_item_name_value); - rbml_w.start_tag(tag_meta_item_name); - rbml_w.writer.write_all(name.as_bytes()); - rbml_w.end_tag(); - rbml_w.start_tag(tag_meta_item_value); - rbml_w.writer.write_all(value.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_meta_item_name, name); + rbml_w.wr_tagged_str(tag_meta_item_value, value); rbml_w.end_tag(); } _ => {/* FIXME (#623): encode other variants */ } @@ -1664,9 +1618,7 @@ fn encode_meta_item(rbml_w: &mut Encoder, mi: &ast::MetaItem) { } ast::MetaList(ref name, ref items) => { rbml_w.start_tag(tag_meta_item_list); - rbml_w.start_tag(tag_meta_item_name); - rbml_w.writer.write_all(name.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_meta_item_name, name); for inner_item in items { encode_meta_item(rbml_w, &**inner_item); } @@ -1760,22 +1712,9 @@ fn encode_lang_items(ecx: &EncodeContext, rbml_w: &mut Encoder) { if let Some(id) = def_id { if id.krate == ast::LOCAL_CRATE { rbml_w.start_tag(tag_lang_items_item); - - rbml_w.start_tag(tag_lang_items_item_id); - { - let wr: &mut SeekableMemWriter = rbml_w.writer; - wr.write_be_u32(i as u32); - } - rbml_w.end_tag(); // tag_lang_items_item_id - - rbml_w.start_tag(tag_lang_items_item_node_id); - { - let wr: &mut SeekableMemWriter = rbml_w.writer; - wr.write_be_u32(id.node as u32); - } - rbml_w.end_tag(); // tag_lang_items_item_node_id - - rbml_w.end_tag(); // tag_lang_items_item + rbml_w.wr_tagged_u32(tag_lang_items_item_id, i as u32); + rbml_w.wr_tagged_u32(tag_lang_items_item_node_id, id.node as u32); + rbml_w.end_tag(); } } } @@ -1796,15 +1735,8 @@ fn encode_native_libraries(ecx: &EncodeContext, rbml_w: &mut Encoder) { cstore::NativeStatic => {} // these libraries are not propagated cstore::NativeFramework | cstore::NativeUnknown => { rbml_w.start_tag(tag_native_libraries_lib); - - rbml_w.start_tag(tag_native_libraries_kind); - rbml_w.writer.write_be_u32(kind as u32); - rbml_w.end_tag(); - - rbml_w.start_tag(tag_native_libraries_name); - rbml_w.writer.write_all(lib.as_bytes()); - rbml_w.end_tag(); - + rbml_w.wr_tagged_u32(tag_native_libraries_kind, kind as u32); + rbml_w.wr_tagged_str(tag_native_libraries_name, lib); rbml_w.end_tag(); } } @@ -1830,9 +1762,8 @@ fn encode_macro_defs(rbml_w: &mut Encoder, encode_name(rbml_w, def.ident.name); encode_attributes(rbml_w, &def.attrs); - rbml_w.start_tag(tag_macro_def_body); - rbml_w.wr_str(&pprust::tts_to_string(&def.body)); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_macro_def_body, + &pprust::tts_to_string(&def.body)); rbml_w.end_tag(); } @@ -1917,15 +1848,12 @@ fn encode_misc_info(ecx: &EncodeContext, rbml_w.start_tag(tag_misc_info); rbml_w.start_tag(tag_misc_info_crate_items); for item in &krate.module.items { - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&def_to_string(local_def(item.id))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_child, + &def_to_string(local_def(item.id))); each_auxiliary_node_id(&**item, |auxiliary_node_id| { - rbml_w.start_tag(tag_mod_child); - rbml_w.wr_str(&def_to_string(local_def( - auxiliary_node_id))); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_mod_child, + &def_to_string(local_def(auxiliary_node_id))); true }); } @@ -1956,35 +1884,25 @@ fn encode_reachable_extern_fns(ecx: &EncodeContext, rbml_w: &mut Encoder) { fn encode_crate_dep(rbml_w: &mut Encoder, dep: decoder::CrateDep) { rbml_w.start_tag(tag_crate_dep); - rbml_w.start_tag(tag_crate_dep_crate_name); - rbml_w.writer.write_all(dep.name.as_bytes()); - rbml_w.end_tag(); - rbml_w.start_tag(tag_crate_dep_hash); - rbml_w.writer.write_all(dep.hash.as_str().as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_crate_dep_crate_name, &dep.name); + rbml_w.wr_tagged_str(tag_crate_dep_hash, dep.hash.as_str()); rbml_w.end_tag(); } fn encode_hash(rbml_w: &mut Encoder, hash: &Svh) { - rbml_w.start_tag(tag_crate_hash); - rbml_w.writer.write_all(hash.as_str().as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_crate_hash, hash.as_str()); } fn encode_crate_name(rbml_w: &mut Encoder, crate_name: &str) { - rbml_w.start_tag(tag_crate_crate_name); - rbml_w.writer.write_all(crate_name.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_crate_crate_name, crate_name); } fn encode_crate_triple(rbml_w: &mut Encoder, triple: &str) { - rbml_w.start_tag(tag_crate_triple); - rbml_w.writer.write_all(triple.as_bytes()); - rbml_w.end_tag(); + rbml_w.wr_tagged_str(tag_crate_triple, triple); } fn encode_dylib_dependency_formats(rbml_w: &mut Encoder, ecx: &EncodeContext) { - rbml_w.start_tag(tag_dylib_dependency_formats); + let tag = tag_dylib_dependency_formats; match ecx.tcx.dependency_formats.borrow().get(&config::CrateTypeDylib) { Some(arr) => { let s = arr.iter().enumerate().filter_map(|(i, slot)| { @@ -1993,11 +1911,12 @@ fn encode_dylib_dependency_formats(rbml_w: &mut Encoder, ecx: &EncodeContext) { cstore::RequireStatic => "s", })).to_string()) }).collect::>(); - rbml_w.writer.write_all(s.connect(",").as_bytes()); + rbml_w.wr_tagged_str(tag, &s.connect(",")); + } + None => { + rbml_w.wr_tagged_str(tag, ""); } - None => {} } - rbml_w.end_tag(); } // NB: Increment this as you change the metadata encoding version. From 38a965a747cb5998cce85369aa30a53062dcf363 Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Sun, 1 Mar 2015 01:09:39 +0900 Subject: [PATCH 02/12] metadata: New tag encoding scheme. EBML tags are encoded in a variable-length unsigned int (vuint), which is clever but causes some tags to be encoded in two bytes while there are really about 180 tags or so. Assuming that there wouldn't be, say, over 1,000 tags in the future, we can use much more efficient encoding scheme. The new scheme should support at most 4,096 tags anyway. This also flattens a scattered tag namespace (did you know that 0xa9 is followed by 0xb0?) and makes a room for autoserialized tags in 0x00 through 0x1f. --- src/librbml/lib.rs | 43 ++++- src/librustc/metadata/common.rs | 281 ++++++++++++++++---------------- 2 files changed, 175 insertions(+), 149 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index 05cd24de7368c..d0b8301bed572 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -115,6 +115,7 @@ pub enum EbmlEncoderTag { #[derive(Debug)] pub enum Error { IntTooBig(uint), + InvalidTag(uint), Expected(String), IoError(std::old_io::IoError), ApplicationError(String) @@ -142,7 +143,7 @@ pub mod reader { EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, - Error, IntTooBig, Expected }; + Error, IntTooBig, InvalidTag, Expected }; pub type DecodeResult = Result; // rbml reading @@ -165,6 +166,18 @@ pub mod reader { pub next: uint } + pub fn tag_at(data: &[u8], start: uint) -> DecodeResult { + let v = data[start] as uint; + if v < 0xf0 { + Ok(Res { val: v, next: start + 1 }) + } else if v > 0xf0 { + Ok(Res { val: ((v & 0xf) << 8) | data[start + 1] as uint, next: start + 2 }) + } else { + // every tag starting with byte 0xf0 is an overlong form, which is prohibited. + Err(InvalidTag(v)) + } + } + #[inline(never)] fn vuint_at_slow(data: &[u8], start: uint) -> DecodeResult { let a = data[start]; @@ -238,7 +251,7 @@ pub mod reader { } pub fn doc_at<'a>(data: &'a [u8], start: uint) -> DecodeResult> { - let elt_tag = try!(vuint_at(data, start)); + let elt_tag = try!(tag_at(data, start)); let elt_size = try!(vuint_at(data, elt_tag.next)); let end = elt_size.next + elt_size.val; Ok(TaggedDoc { @@ -250,7 +263,7 @@ pub mod reader { pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: uint) -> Option> { let mut pos = d.start; while pos < d.end { - let elt_tag = try_or!(vuint_at(d.data, pos), None); + let elt_tag = try_or!(tag_at(d.data, pos), None); let elt_size = try_or!(vuint_at(d.data, elt_tag.next), None); pos = elt_size.next + elt_size.val; if elt_tag.val == tg { @@ -276,7 +289,7 @@ pub mod reader { { let mut pos = d.start; while pos < d.end { - let elt_tag = try_or!(vuint_at(d.data, pos), false); + let elt_tag = try_or!(tag_at(d.data, pos), false); let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false); pos = elt_size.next + elt_size.val; let doc = Doc { data: d.data, start: elt_size.next, end: pos }; @@ -292,7 +305,7 @@ pub mod reader { { let mut pos = d.start; while pos < d.end { - let elt_tag = try_or!(vuint_at(d.data, pos), false); + let elt_tag = try_or!(tag_at(d.data, pos), false); let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false); pos = elt_size.next + elt_size.val; if elt_tag.val == tg { @@ -718,6 +731,20 @@ pub mod writer { size_positions: Vec, } + fn write_tag(w: &mut W, n: uint) -> EncodeResult { + if n < 0xf0 { + w.write_all(&[n as u8]) + } else if 0x100 <= n && n < 0x1000 { + w.write_all(&[0xf0 | (n >> 8) as u8, n as u8]) + } else { + Err(old_io::IoError { + kind: old_io::OtherIoError, + desc: "invalid tag", + detail: Some(format!("{}", n)) + }) + } + } + fn write_sized_vuint(w: &mut W, n: uint, size: uint) -> EncodeResult { match size { 1 => w.write_all(&[0x80u8 | (n as u8)]), @@ -766,7 +793,7 @@ pub mod writer { debug!("Start tag {:?}", tag_id); // Write the enum ID: - try!(write_vuint(self.writer, tag_id)); + try!(write_tag(self.writer, tag_id)); // Write a placeholder four-byte size. self.size_positions.push(try!(self.writer.tell()) as uint); @@ -795,7 +822,7 @@ pub mod writer { } pub fn wr_tagged_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult { - try!(write_vuint(self.writer, tag_id)); + try!(write_tag(self.writer, tag_id)); try!(write_vuint(self.writer, b.len())); self.writer.write_all(b) } diff --git a/src/librustc/metadata/common.rs b/src/librustc/metadata/common.rs index 4930eddb35a53..f0a465b73f60d 100644 --- a/src/librustc/metadata/common.rs +++ b/src/librustc/metadata/common.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -14,85 +14,90 @@ pub use self::astencode_tag::*; use back::svh::Svh; -// EBML enum definitions and utils shared by the encoder and decoder +// RBML enum definitions and utils shared by the encoder and decoder +// +// 0x00..0x1f: reserved for RBML generic type tags +// 0x20..0xef: free for use, preferred for frequent tags +// 0xf0..0xff: internally used by RBML to encode 0x100..0xfff in two bytes +// 0x100..0xfff: free for use, preferred for infrequent tags -pub const tag_items: uint = 0x00; +pub const tag_items: uint = 0x100; // top-level only -pub const tag_paths_data_name: uint = 0x01; +pub const tag_paths_data_name: uint = 0x20; -pub const tag_def_id: uint = 0x02; +pub const tag_def_id: uint = 0x21; -pub const tag_items_data: uint = 0x03; +pub const tag_items_data: uint = 0x22; -pub const tag_items_data_item: uint = 0x04; +pub const tag_items_data_item: uint = 0x23; -pub const tag_items_data_item_family: uint = 0x05; +pub const tag_items_data_item_family: uint = 0x24; -pub const tag_items_data_item_type: uint = 0x07; +pub const tag_items_data_item_type: uint = 0x25; -pub const tag_items_data_item_symbol: uint = 0x08; +pub const tag_items_data_item_symbol: uint = 0x26; -pub const tag_items_data_item_variant: uint = 0x09; +pub const tag_items_data_item_variant: uint = 0x27; -pub const tag_items_data_parent_item: uint = 0x0a; +pub const tag_items_data_parent_item: uint = 0x28; -pub const tag_items_data_item_is_tuple_struct_ctor: uint = 0x0b; +pub const tag_items_data_item_is_tuple_struct_ctor: uint = 0x29; -pub const tag_index: uint = 0x0c; +pub const tag_index: uint = 0x2a; -pub const tag_index_buckets: uint = 0x0d; +pub const tag_index_buckets: uint = 0x2b; -pub const tag_index_buckets_bucket: uint = 0x0e; +pub const tag_index_buckets_bucket: uint = 0x2c; -pub const tag_index_buckets_bucket_elt: uint = 0x0f; +pub const tag_index_buckets_bucket_elt: uint = 0x2d; -pub const tag_index_table: uint = 0x10; +pub const tag_index_table: uint = 0x2e; -pub const tag_meta_item_name_value: uint = 0x11; +pub const tag_meta_item_name_value: uint = 0x2f; -pub const tag_meta_item_name: uint = 0x12; +pub const tag_meta_item_name: uint = 0x30; -pub const tag_meta_item_value: uint = 0x13; +pub const tag_meta_item_value: uint = 0x31; -pub const tag_attributes: uint = 0x14; +pub const tag_attributes: uint = 0x101; // top-level only -pub const tag_attribute: uint = 0x15; +pub const tag_attribute: uint = 0x32; -pub const tag_meta_item_word: uint = 0x16; +pub const tag_meta_item_word: uint = 0x33; -pub const tag_meta_item_list: uint = 0x17; +pub const tag_meta_item_list: uint = 0x34; // The list of crates that this crate depends on -pub const tag_crate_deps: uint = 0x18; +pub const tag_crate_deps: uint = 0x102; // top-level only // A single crate dependency -pub const tag_crate_dep: uint = 0x19; +pub const tag_crate_dep: uint = 0x35; -pub const tag_crate_hash: uint = 0x1a; -pub const tag_crate_crate_name: uint = 0x1b; +pub const tag_crate_hash: uint = 0x103; // top-level only +pub const tag_crate_crate_name: uint = 0x104; // top-level only -pub const tag_crate_dep_crate_name: uint = 0x1d; -pub const tag_crate_dep_hash: uint = 0x1e; +pub const tag_crate_dep_crate_name: uint = 0x36; +pub const tag_crate_dep_hash: uint = 0x37; -pub const tag_mod_impl: uint = 0x1f; +pub const tag_mod_impl: uint = 0x38; -pub const tag_item_trait_item: uint = 0x20; +pub const tag_item_trait_item: uint = 0x39; -pub const tag_item_trait_ref: uint = 0x21; -pub const tag_item_super_trait_ref: uint = 0x22; +pub const tag_item_trait_ref: uint = 0x3a; +pub const tag_item_super_trait_ref: uint = 0x3b; // discriminator value for variants -pub const tag_disr_val: uint = 0x23; +pub const tag_disr_val: uint = 0x3c; // used to encode ast_map::PathElem -pub const tag_path: uint = 0x24; -pub const tag_path_len: uint = 0x25; -pub const tag_path_elem_mod: uint = 0x26; -pub const tag_path_elem_name: uint = 0x27; -pub const tag_item_field: uint = 0x28; -pub const tag_item_field_origin: uint = 0x29; - -pub const tag_item_variances: uint = 0x2a; +pub const tag_path: uint = 0x3d; +pub const tag_path_len: uint = 0x3e; +pub const tag_path_elem_mod: uint = 0x3f; +pub const tag_path_elem_name: uint = 0x40; +pub const tag_item_field: uint = 0x41; +pub const tag_item_field_origin: uint = 0x42; + +pub const tag_item_variances: uint = 0x43; /* trait items contain tag_item_trait_item elements, impl items contain tag_item_impl_item elements, and classes @@ -101,60 +106,60 @@ pub const tag_item_variances: uint = 0x2a; both, tag_item_trait_item and tag_item_impl_item have to be two different tags. */ -pub const tag_item_impl_item: uint = 0x30; -pub const tag_item_trait_method_explicit_self: uint = 0x31; +pub const tag_item_impl_item: uint = 0x44; +pub const tag_item_trait_method_explicit_self: uint = 0x45; // Reexports are found within module tags. Each reexport contains def_ids // and names. -pub const tag_items_data_item_reexport: uint = 0x38; -pub const tag_items_data_item_reexport_def_id: uint = 0x39; -pub const tag_items_data_item_reexport_name: uint = 0x3a; +pub const tag_items_data_item_reexport: uint = 0x46; +pub const tag_items_data_item_reexport_def_id: uint = 0x47; +pub const tag_items_data_item_reexport_name: uint = 0x48; // used to encode crate_ctxt side tables #[derive(Copy, PartialEq, FromPrimitive)] #[repr(uint)] -pub enum astencode_tag { // Reserves 0x40 -- 0x5f - tag_ast = 0x40, - - tag_tree = 0x41, - - tag_id_range = 0x42, - - tag_table = 0x43, - tag_table_id = 0x44, - tag_table_val = 0x45, - tag_table_def = 0x46, - tag_table_node_type = 0x47, - tag_table_item_subst = 0x48, - tag_table_freevars = 0x49, - tag_table_tcache = 0x4a, - tag_table_param_defs = 0x4b, - tag_table_mutbl = 0x4c, - tag_table_last_use = 0x4d, - tag_table_spill = 0x4e, - tag_table_method_map = 0x4f, - tag_table_vtable_map = 0x50, - tag_table_adjustments = 0x51, - tag_table_moves_map = 0x52, - tag_table_capture_map = 0x53, - tag_table_closure_tys = 0x54, - tag_table_closure_kinds = 0x55, - tag_table_upvar_capture_map = 0x56, - tag_table_capture_modes = 0x57, - tag_table_object_cast_map = 0x58, - tag_table_const_qualif = 0x59, +pub enum astencode_tag { // Reserves 0x50 -- 0x6f + tag_ast = 0x50, + + tag_tree = 0x51, + + tag_id_range = 0x52, + + tag_table = 0x53, + tag_table_id = 0x54, + tag_table_val = 0x55, + tag_table_def = 0x56, + tag_table_node_type = 0x57, + tag_table_item_subst = 0x58, + tag_table_freevars = 0x59, + tag_table_tcache = 0x5a, + tag_table_param_defs = 0x5b, + tag_table_mutbl = 0x5c, + tag_table_last_use = 0x5d, + tag_table_spill = 0x5e, + tag_table_method_map = 0x5f, + tag_table_vtable_map = 0x60, + tag_table_adjustments = 0x61, + tag_table_moves_map = 0x62, + tag_table_capture_map = 0x63, + tag_table_closure_tys = 0x64, + tag_table_closure_kinds = 0x65, + tag_table_upvar_capture_map = 0x66, + tag_table_capture_modes = 0x67, + tag_table_object_cast_map = 0x68, + tag_table_const_qualif = 0x69, } -pub const tag_item_trait_item_sort: uint = 0x60; +pub const tag_item_trait_item_sort: uint = 0x70; -pub const tag_item_trait_parent_sort: uint = 0x61; +pub const tag_item_trait_parent_sort: uint = 0x71; -pub const tag_item_impl_type_basename: uint = 0x62; +pub const tag_item_impl_type_basename: uint = 0x72; -pub const tag_crate_triple: uint = 0x66; +pub const tag_crate_triple: uint = 0x105; // top-level only -pub const tag_dylib_dependency_formats: uint = 0x67; +pub const tag_dylib_dependency_formats: uint = 0x106; // top-level only // Language items are a top-level directory (for speed). Hierarchy: // @@ -163,51 +168,47 @@ pub const tag_dylib_dependency_formats: uint = 0x67; // - tag_lang_items_item_id: u32 // - tag_lang_items_item_node_id: u32 -pub const tag_lang_items: uint = 0x70; -pub const tag_lang_items_item: uint = 0x71; -pub const tag_lang_items_item_id: uint = 0x72; -pub const tag_lang_items_item_node_id: uint = 0x73; -pub const tag_lang_items_missing: uint = 0x74; +pub const tag_lang_items: uint = 0x107; // top-level only +pub const tag_lang_items_item: uint = 0x73; +pub const tag_lang_items_item_id: uint = 0x74; +pub const tag_lang_items_item_node_id: uint = 0x75; +pub const tag_lang_items_missing: uint = 0x76; -pub const tag_item_unnamed_field: uint = 0x75; -pub const tag_items_data_item_visibility: uint = 0x76; +pub const tag_item_unnamed_field: uint = 0x77; +pub const tag_items_data_item_visibility: uint = 0x78; pub const tag_item_method_tps: uint = 0x79; pub const tag_item_method_fty: uint = 0x7a; pub const tag_mod_child: uint = 0x7b; -pub const tag_misc_info: uint = 0x7c; -pub const tag_misc_info_crate_items: uint = 0x7d; - -pub const tag_item_method_provided_source: uint = 0x7e; -pub const tag_item_impl_vtables: uint = 0x7f; +pub const tag_misc_info: uint = 0x108; // top-level only +pub const tag_misc_info_crate_items: uint = 0x7c; -pub const tag_impls: uint = 0x80; -pub const tag_impls_impl: uint = 0x81; +pub const tag_item_method_provided_source: uint = 0x7d; +pub const tag_item_impl_vtables: uint = 0x7e; -pub const tag_items_data_item_inherent_impl: uint = 0x82; -pub const tag_items_data_item_extension_impl: uint = 0x83; +pub const tag_impls: uint = 0x109; // top-level only +pub const tag_impls_impl: uint = 0x7f; -// GAP 0x84, 0x85, 0x86 +pub const tag_items_data_item_inherent_impl: uint = 0x80; +pub const tag_items_data_item_extension_impl: uint = 0x81; -pub const tag_native_libraries: uint = 0x87; -pub const tag_native_libraries_lib: uint = 0x88; -pub const tag_native_libraries_name: uint = 0x89; -pub const tag_native_libraries_kind: uint = 0x8a; +pub const tag_native_libraries: uint = 0x10a; // top-level only +pub const tag_native_libraries_lib: uint = 0x82; +pub const tag_native_libraries_name: uint = 0x83; +pub const tag_native_libraries_kind: uint = 0x84; -pub const tag_plugin_registrar_fn: uint = 0x8b; +pub const tag_plugin_registrar_fn: uint = 0x10b; // top-level only -// GAP 0x8c, 0x8d +pub const tag_method_argument_names: uint = 0x85; +pub const tag_method_argument_name: uint = 0x86; -pub const tag_method_argument_names: uint = 0x8e; -pub const tag_method_argument_name: uint = 0x8f; +pub const tag_reachable_extern_fns: uint = 0x10c; // top-level only +pub const tag_reachable_extern_fn_id: uint = 0x87; -pub const tag_reachable_extern_fns: uint = 0x90; -pub const tag_reachable_extern_fn_id: uint = 0x91; +pub const tag_items_data_item_stability: uint = 0x88; -pub const tag_items_data_item_stability: uint = 0x92; - -pub const tag_items_data_item_repr: uint = 0x93; +pub const tag_items_data_item_repr: uint = 0x89; #[derive(Clone, Debug)] pub struct LinkMeta { @@ -215,42 +216,40 @@ pub struct LinkMeta { pub crate_hash: Svh, } -// GAP 0x94...0x98 - -pub const tag_struct_fields: uint = 0x99; -pub const tag_struct_field: uint = 0x9a; -pub const tag_struct_field_id: uint = 0x9b; +pub const tag_struct_fields: uint = 0x10d; // top-level only +pub const tag_struct_field: uint = 0x8a; +pub const tag_struct_field_id: uint = 0x8b; -pub const tag_attribute_is_sugared_doc: uint = 0x9c; +pub const tag_attribute_is_sugared_doc: uint = 0x8c; -pub const tag_trait_def_bounds: uint = 0x9d; +pub const tag_trait_def_bounds: uint = 0x8d; -pub const tag_items_data_region: uint = 0x9e; +pub const tag_items_data_region: uint = 0x8e; -pub const tag_region_param_def: uint = 0xa0; -pub const tag_region_param_def_ident: uint = 0xa1; -pub const tag_region_param_def_def_id: uint = 0xa2; -pub const tag_region_param_def_space: uint = 0xa3; -pub const tag_region_param_def_index: uint = 0xa4; +pub const tag_region_param_def: uint = 0x8f; +pub const tag_region_param_def_ident: uint = 0x90; +pub const tag_region_param_def_def_id: uint = 0x91; +pub const tag_region_param_def_space: uint = 0x92; +pub const tag_region_param_def_index: uint = 0x93; -pub const tag_type_param_def: uint = 0xa5; +pub const tag_type_param_def: uint = 0x94; -pub const tag_item_generics: uint = 0xa6; -pub const tag_method_ty_generics: uint = 0xa7; +pub const tag_item_generics: uint = 0x95; +pub const tag_method_ty_generics: uint = 0x96; -pub const tag_predicate: uint = 0xa8; -pub const tag_predicate_space: uint = 0xa9; -pub const tag_predicate_data: uint = 0xb0; +pub const tag_predicate: uint = 0x97; +pub const tag_predicate_space: uint = 0x98; +pub const tag_predicate_data: uint = 0x99; -pub const tag_unsafety: uint = 0xb1; +pub const tag_unsafety: uint = 0x9a; -pub const tag_associated_type_names: uint = 0xb2; -pub const tag_associated_type_name: uint = 0xb3; +pub const tag_associated_type_names: uint = 0x9b; +pub const tag_associated_type_name: uint = 0x9c; -pub const tag_polarity: uint = 0xb4; +pub const tag_polarity: uint = 0x9d; -pub const tag_macro_defs: uint = 0xb5; -pub const tag_macro_def: uint = 0xb6; -pub const tag_macro_def_body: uint = 0xb7; +pub const tag_macro_defs: uint = 0x10e; // top-level only +pub const tag_macro_def: uint = 0x9e; +pub const tag_macro_def_body: uint = 0x9f; -pub const tag_paren_sugar: uint = 0xb8; +pub const tag_paren_sugar: uint = 0xa0; From c9840b644c7e69551c6f9d737125375e4aec602d Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Sun, 1 Mar 2015 11:57:20 +0900 Subject: [PATCH 03/12] metadata: Introduce implicit lengths for auto-serialization. Many auto-serialization tags are fixed-size (note: many ordinary tags are also fixed-size but for now this commit ignores them), so having an explicit length is a waste. This moves any auto-serialization tags with an implicit length before other tags, so a test for them is easy. A preliminary experiment shows this has at least 1% gain over the status quo. --- src/librbml/lib.rs | 173 +++++++++++++++++++++++++++++++-------------- 1 file changed, 121 insertions(+), 52 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index d0b8301bed572..a2fe71f0b4b2c 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -80,38 +80,56 @@ pub struct TaggedDoc<'a> { #[derive(Copy, Debug)] pub enum EbmlEncoderTag { - EsUint, // 0 - EsU64, // 1 - EsU32, // 2 - EsU16, // 3 - EsU8, // 4 - EsInt, // 5 - EsI64, // 6 - EsI32, // 7 - EsI16, // 8 - EsI8, // 9 - EsBool, // 10 - EsChar, // 11 - EsStr, // 12 - EsF64, // 13 - EsF32, // 14 - EsFloat, // 15 - EsEnum, // 16 - EsEnumVid, // 17 - EsEnumBody, // 18 - EsVec, // 19 - EsVecLen, // 20 - EsVecElt, // 21 - EsMap, // 22 - EsMapLen, // 23 - EsMapKey, // 24 - EsMapVal, // 25 - - EsOpaque, - - EsLabel, // Used only when debugging + // tags 00..1f are reserved for auto-serialization. + // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded. + + EsUint = 0x00, // + 8 bytes + EsU64 = 0x01, // + 8 bytes + EsU32 = 0x02, // + 4 bytes + EsU16 = 0x03, // + 2 bytes + EsU8 = 0x04, // + 1 byte + EsInt = 0x05, // + 8 bytes + EsI64 = 0x06, // + 8 bytes + EsI32 = 0x07, // + 4 bytes + EsI16 = 0x08, // + 2 bytes + EsI8 = 0x09, // + 1 byte + EsBool = 0x0a, // + 1 byte + EsChar = 0x0b, // + 4 bytes + EsF64 = 0x0c, // + 8 bytes + EsF32 = 0x0d, // + 4 bytes + EsEnumVid = 0x0e, // + 4 bytes + EsVecLen = 0x0f, // + 4 bytes + EsMapLen = 0x10, // + 4 bytes + + EsStr = 0x11, + EsEnum = 0x12, + EsEnumBody = 0x13, + EsVec = 0x14, + EsVecElt = 0x15, + EsMap = 0x16, + EsMapKey = 0x17, + EsMapVal = 0x18, + + EsOpaque = 0x19, + + // Used only when debugging + EsLabel = 0x1a, } +const NUM_TAGS: uint = 0x1000; +const NUM_IMPLICIT_TAGS: uint = 0x11; + +static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [ + 8, 8, 4, 2, 1, // EsU* + 8, 8, 4, 2, 1, // ESI* + 1, // EsBool + 4, // EsChar + 8, 4, // EsF* + 4, // EsEnumVid + 4, // EsVecLen + 4, // EsMapLen +]; + #[derive(Debug)] pub enum Error { IntTooBig(uint), @@ -143,7 +161,7 @@ pub mod reader { EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, - Error, IntTooBig, InvalidTag, Expected }; + Error, IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN }; pub type DecodeResult = Result; // rbml reading @@ -250,9 +268,17 @@ pub mod reader { } } + pub fn tag_len_at(data: &[u8], tag: Res) -> DecodeResult { + if tag.val < NUM_IMPLICIT_TAGS && TAG_IMPLICIT_LEN[tag.val] >= 0 { + Ok(Res { val: TAG_IMPLICIT_LEN[tag.val] as uint, next: tag.next }) + } else { + vuint_at(data, tag.next) + } + } + pub fn doc_at<'a>(data: &'a [u8], start: uint) -> DecodeResult> { let elt_tag = try!(tag_at(data, start)); - let elt_size = try!(vuint_at(data, elt_tag.next)); + let elt_size = try!(tag_len_at(data, elt_tag)); let end = elt_size.next + elt_size.val; Ok(TaggedDoc { tag: elt_tag.val, @@ -264,7 +290,7 @@ pub mod reader { let mut pos = d.start; while pos < d.end { let elt_tag = try_or!(tag_at(d.data, pos), None); - let elt_size = try_or!(vuint_at(d.data, elt_tag.next), None); + let elt_size = try_or!(tag_len_at(d.data, elt_tag), None); pos = elt_size.next + elt_size.val; if elt_tag.val == tg { return Some(Doc { data: d.data, start: elt_size.next, @@ -290,7 +316,7 @@ pub mod reader { let mut pos = d.start; while pos < d.end { let elt_tag = try_or!(tag_at(d.data, pos), false); - let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false); + let elt_size = try_or!(tag_len_at(d.data, elt_tag), false); pos = elt_size.next + elt_size.val; let doc = Doc { data: d.data, start: elt_size.next, end: pos }; if !it(elt_tag.val, doc) { @@ -306,7 +332,7 @@ pub mod reader { let mut pos = d.start; while pos < d.end { let elt_tag = try_or!(tag_at(d.data, pos), false); - let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false); + let elt_size = try_or!(tag_len_at(d.data, elt_tag), false); pos = elt_size.next + elt_size.val; if elt_tag.val == tg { let doc = Doc { data: d.data, start: elt_size.next, @@ -718,7 +744,7 @@ pub mod writer { use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint, - EsOpaque, EsLabel, EbmlEncoderTag }; + EsOpaque, EsLabel, EbmlEncoderTag, NUM_IMPLICIT_TAGS, NUM_TAGS }; use serialize; @@ -734,7 +760,7 @@ pub mod writer { fn write_tag(w: &mut W, n: uint) -> EncodeResult { if n < 0xf0 { w.write_all(&[n as u8]) - } else if 0x100 <= n && n < 0x1000 { + } else if 0x100 <= n && n < NUM_TAGS { w.write_all(&[0xf0 | (n >> 8) as u8, n as u8]) } else { Err(old_io::IoError { @@ -791,6 +817,7 @@ pub mod writer { pub fn start_tag(&mut self, tag_id: uint) -> EncodeResult { debug!("Start tag {:?}", tag_id); + assert!(tag_id >= NUM_IMPLICIT_TAGS); // Write the enum ID: try!(write_tag(self.writer, tag_id)); @@ -822,6 +849,7 @@ pub mod writer { } pub fn wr_tagged_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult { + assert!(tag_id >= NUM_IMPLICIT_TAGS); try!(write_tag(self.writer, tag_id)); try!(write_vuint(self.writer, b.len())); self.writer.write_all(b) @@ -866,6 +894,47 @@ pub mod writer { self.wr_tagged_bytes(tag_id, v.as_bytes()) } + // for auto-serialization + fn wr_tagged_raw_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult { + try!(write_tag(self.writer, tag_id)); + self.writer.write_all(b) + } + + fn wr_tagged_raw_u64(&mut self, tag_id: uint, v: u64) -> EncodeResult { + let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) }; + self.wr_tagged_raw_bytes(tag_id, &bytes) + } + + fn wr_tagged_raw_u32(&mut self, tag_id: uint, v: u32) -> EncodeResult{ + let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) }; + self.wr_tagged_raw_bytes(tag_id, &bytes) + } + + fn wr_tagged_raw_u16(&mut self, tag_id: uint, v: u16) -> EncodeResult { + let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) }; + self.wr_tagged_raw_bytes(tag_id, &bytes) + } + + fn wr_tagged_raw_u8(&mut self, tag_id: uint, v: u8) -> EncodeResult { + self.wr_tagged_raw_bytes(tag_id, &[v]) + } + + fn wr_tagged_raw_i64(&mut self, tag_id: uint, v: i64) -> EncodeResult { + self.wr_tagged_raw_u64(tag_id, v as u64) + } + + fn wr_tagged_raw_i32(&mut self, tag_id: uint, v: i32) -> EncodeResult { + self.wr_tagged_raw_u32(tag_id, v as u32) + } + + fn wr_tagged_raw_i16(&mut self, tag_id: uint, v: i16) -> EncodeResult { + self.wr_tagged_raw_u16(tag_id, v as u16) + } + + fn wr_tagged_raw_i8(&mut self, tag_id: uint, v: i8) -> EncodeResult { + self.wr_tagged_raw_bytes(tag_id, &[v as u8]) + } + pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult { debug!("Write {:?} bytes", b.len()); self.writer.write_all(b) @@ -891,7 +960,7 @@ pub mod writer { // used internally to emit things like the vector length and so on fn _emit_tagged_uint(&mut self, t: EbmlEncoderTag, v: uint) -> EncodeResult { assert!(v <= 0xFFFF_FFFF); - self.wr_tagged_u32(t as uint, v as u32) + self.wr_tagged_raw_u32(t as uint, v as u32) } fn _emit_label(&mut self, label: &str) -> EncodeResult { @@ -922,51 +991,51 @@ pub mod writer { } fn emit_uint(&mut self, v: uint) -> EncodeResult { - self.wr_tagged_u64(EsUint as uint, v as u64) + self.wr_tagged_raw_u64(EsUint as uint, v as u64) } fn emit_u64(&mut self, v: u64) -> EncodeResult { - self.wr_tagged_u64(EsU64 as uint, v) + self.wr_tagged_raw_u64(EsU64 as uint, v) } fn emit_u32(&mut self, v: u32) -> EncodeResult { - self.wr_tagged_u32(EsU32 as uint, v) + self.wr_tagged_raw_u32(EsU32 as uint, v) } fn emit_u16(&mut self, v: u16) -> EncodeResult { - self.wr_tagged_u16(EsU16 as uint, v) + self.wr_tagged_raw_u16(EsU16 as uint, v) } fn emit_u8(&mut self, v: u8) -> EncodeResult { - self.wr_tagged_u8(EsU8 as uint, v) + self.wr_tagged_raw_u8(EsU8 as uint, v) } fn emit_int(&mut self, v: int) -> EncodeResult { - self.wr_tagged_i64(EsInt as uint, v as i64) + self.wr_tagged_raw_i64(EsInt as uint, v as i64) } fn emit_i64(&mut self, v: i64) -> EncodeResult { - self.wr_tagged_i64(EsI64 as uint, v) + self.wr_tagged_raw_i64(EsI64 as uint, v) } fn emit_i32(&mut self, v: i32) -> EncodeResult { - self.wr_tagged_i32(EsI32 as uint, v) + self.wr_tagged_raw_i32(EsI32 as uint, v) } fn emit_i16(&mut self, v: i16) -> EncodeResult { - self.wr_tagged_i16(EsI16 as uint, v) + self.wr_tagged_raw_i16(EsI16 as uint, v) } fn emit_i8(&mut self, v: i8) -> EncodeResult { - self.wr_tagged_i8(EsI8 as uint, v) + self.wr_tagged_raw_i8(EsI8 as uint, v) } fn emit_bool(&mut self, v: bool) -> EncodeResult { - self.wr_tagged_u8(EsBool as uint, v as u8) + self.wr_tagged_raw_u8(EsBool as uint, v as u8) } fn emit_f64(&mut self, v: f64) -> EncodeResult { let bits = unsafe { mem::transmute(v) }; - self.wr_tagged_u64(EsF64 as uint, bits) + self.wr_tagged_raw_u64(EsF64 as uint, bits) } fn emit_f32(&mut self, v: f32) -> EncodeResult { let bits = unsafe { mem::transmute(v) }; - self.wr_tagged_u32(EsF32 as uint, bits) + self.wr_tagged_raw_u32(EsF32 as uint, bits) } fn emit_char(&mut self, v: char) -> EncodeResult { - self.wr_tagged_u32(EsChar as uint, v as u32) + self.wr_tagged_raw_u32(EsChar as uint, v as u32) } fn emit_str(&mut self, v: &str) -> EncodeResult { From 2f3aa0dd2e9a599a80e22d79c33a5d7e2554f90b Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Mon, 2 Mar 2015 00:37:14 +0900 Subject: [PATCH 04/12] metadata: Eliminate the `EsEnumBody` tag. It doesn't serve any useful purpose. It *might* be useful when there are some tags that are generated by `Encodable` and not delimited by any tags, but IIUC it's not the case. Previous: <-------------------- len1 -------------------> EsEnum EsEnumVid EsEnumBody <--- len2 --> Now: <----------- len1 ----------> EsEnum EsEnumVid --- src/librbml/lib.rs | 47 ++++++++++++---------------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index a2fe71f0b4b2c..58cf5daf94121 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -103,17 +103,16 @@ pub enum EbmlEncoderTag { EsStr = 0x11, EsEnum = 0x12, - EsEnumBody = 0x13, - EsVec = 0x14, - EsVecElt = 0x15, - EsMap = 0x16, - EsMapKey = 0x17, - EsMapVal = 0x18, + EsVec = 0x13, + EsVecElt = 0x14, + EsMap = 0x15, + EsMapKey = 0x16, + EsMapVal = 0x17, - EsOpaque = 0x19, + EsOpaque = 0x18, // Used only when debugging - EsLabel = 0x1a, + EsLabel = 0x19, } const NUM_TAGS: uint = 0x1000; @@ -160,7 +159,7 @@ pub mod reader { use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, - EsEnumBody, EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, + EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN }; pub type DecodeResult = Result; @@ -564,17 +563,7 @@ pub mod reader { let idx = try!(self._next_uint(EsEnumVid)); debug!(" idx={}", idx); - let doc = try!(self.next_doc(EsEnumBody)); - - let (old_parent, old_pos) = (self.parent, self.pos); - self.parent = doc; - self.pos = self.parent.start; - - let result = try!(f(self, idx)); - - self.parent = old_parent; - self.pos = old_pos; - Ok(result) + f(self, idx) } fn read_enum_variant_arg(&mut self, idx: uint, f: F) -> DecodeResult where @@ -592,17 +581,7 @@ pub mod reader { let idx = try!(self._next_uint(EsEnumVid)); debug!(" idx={}", idx); - let doc = try!(self.next_doc(EsEnumBody)); - - let (old_parent, old_pos) = (self.parent, self.pos); - self.parent = doc; - self.pos = self.parent.start; - - let result = try!(f(self, idx)); - - self.parent = old_parent; - self.pos = old_pos; - Ok(result) + f(self, idx) } fn read_enum_struct_variant_field(&mut self, @@ -743,7 +722,7 @@ pub mod writer { use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, - EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint, + EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint, EsOpaque, EsLabel, EbmlEncoderTag, NUM_IMPLICIT_TAGS, NUM_TAGS }; use serialize; @@ -1059,9 +1038,7 @@ pub mod writer { F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, { try!(self._emit_tagged_uint(EsEnumVid, v_id)); - try!(self.start_tag(EsEnumBody as uint)); - try!(f(self)); - self.end_tag() + f(self) } fn emit_enum_variant_arg(&mut self, _: uint, f: F) -> EncodeResult where From 35c798b3fcef00f6a31b40da91afa596a1476b4b Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Mon, 2 Mar 2015 02:20:46 +0900 Subject: [PATCH 05/12] metadata: Bye bye `EsLabel`. No regrets. For the reference, while it is designed to be selectively enabled, it was essentially enabled throughout every snapshot and nightly as far as I can tell. This makes the usefulness of `EsLabel` itself questionable, as it was quite rare that `EsLabel` broke the build. It had consumed about 20~30% of metadata (!) and so this should be a huge win. --- src/librbml/lib.rs | 51 ++++------------------------------------------ 1 file changed, 4 insertions(+), 47 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index 58cf5daf94121..70a6ff88f0696 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -108,11 +108,7 @@ pub enum EbmlEncoderTag { EsMap = 0x15, EsMapKey = 0x16, EsMapVal = 0x17, - EsOpaque = 0x18, - - // Used only when debugging - EsLabel = 0x19, } const NUM_TAGS: uint = 0x1000; @@ -159,7 +155,7 @@ pub mod reader { use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, - EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, + EsUint, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN }; pub type DecodeResult = Result; @@ -395,23 +391,6 @@ pub mod reader { } } - fn _check_label(&mut self, lbl: &str) -> DecodeResult<()> { - if self.pos < self.parent.end { - let TaggedDoc { tag: r_tag, doc: r_doc } = - try!(doc_at(self.parent.data, self.pos)); - - if r_tag == (EsLabel as uint) { - self.pos = r_doc.end; - let str = r_doc.as_str_slice(); - if lbl != str { - return Err(Expected(format!("Expected label {:?} but \ - found {:?}", lbl, str))); - } - } - } - Ok(()) - } - fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult> { debug!(". next_doc(exp_tag={:?})", exp_tag); if self.pos >= self.parent.end { @@ -540,7 +519,6 @@ pub mod reader { F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_enum({})", name); - try!(self._check_label(name)); let doc = try!(self.next_doc(EsEnum)); @@ -606,7 +584,6 @@ pub mod reader { F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_struct_field(name={}, idx={})", name, idx); - try!(self._check_label(name)); f(self) } @@ -723,7 +700,7 @@ pub mod writer { use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint, - EsOpaque, EsLabel, EbmlEncoderTag, NUM_IMPLICIT_TAGS, NUM_TAGS }; + EsOpaque, EbmlEncoderTag, NUM_IMPLICIT_TAGS, NUM_TAGS }; use serialize; @@ -928,13 +905,6 @@ pub mod writer { // FIXME (#2743): optionally perform "relaxations" on end_tag to more // efficiently encode sizes; this is a fixed point iteration - // Set to true to generate more debugging in EBML code. - // Totally lame approach. - #[cfg(not(ndebug))] - static DEBUG: bool = true; - #[cfg(ndebug)] - static DEBUG: bool = false; - impl<'a, W: Writer + Seek> Encoder<'a, W> { // used internally to emit things like the vector length and so on fn _emit_tagged_uint(&mut self, t: EbmlEncoderTag, v: uint) -> EncodeResult { @@ -942,17 +912,6 @@ pub mod writer { self.wr_tagged_raw_u32(t as uint, v as u32) } - fn _emit_label(&mut self, label: &str) -> EncodeResult { - // There are various strings that we have access to, such as - // the name of a record field, which do not actually appear in - // the encoded EBML (normally). This is just for - // efficiency. When debugging, though, we can emit such - // labels and then they will be checked by decoder to - // try and check panics more quickly. - if DEBUG { self.wr_tagged_str(EsLabel as uint, label) } - else { Ok(()) } - } - pub fn emit_opaque(&mut self, f: F) -> EncodeResult where F: FnOnce(&mut Encoder) -> EncodeResult, { @@ -1021,10 +980,9 @@ pub mod writer { self.wr_tagged_str(EsStr as uint, v) } - fn emit_enum(&mut self, name: &str, f: F) -> EncodeResult where + fn emit_enum(&mut self, _name: &str, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, { - try!(self._emit_label(name)); try!(self.start_tag(EsEnum as uint)); try!(f(self)); self.end_tag() @@ -1072,10 +1030,9 @@ pub mod writer { f(self) } - fn emit_struct_field(&mut self, name: &str, _: uint, f: F) -> EncodeResult where + fn emit_struct_field(&mut self, _name: &str, _: uint, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, { - try!(self._emit_label(name)); f(self) } From de00b858d10aec2c39c692d4f61af8e9c9170ee0 Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Mon, 2 Mar 2015 02:46:53 +0900 Subject: [PATCH 06/12] metadata: Introduce `EsSub8` and `EsSub32` tags. They replace the existing `EsEnumVid`, `EsVecLen` and `EsMapLen` tags altogether; the meaning of them can be easily inferred from the enclosing tag. It also has an added benefit of encodings for smaller variant ids or lengths being more compact (5 bytes to 2 bytes). --- src/librbml/lib.rs | 112 +++++++++++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 35 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index 70a6ff88f0696..fc4fbb36707ea 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -97,22 +97,21 @@ pub enum EbmlEncoderTag { EsChar = 0x0b, // + 4 bytes EsF64 = 0x0c, // + 8 bytes EsF32 = 0x0d, // + 4 bytes - EsEnumVid = 0x0e, // + 4 bytes - EsVecLen = 0x0f, // + 4 bytes - EsMapLen = 0x10, // + 4 bytes - - EsStr = 0x11, - EsEnum = 0x12, - EsVec = 0x13, - EsVecElt = 0x14, - EsMap = 0x15, - EsMapKey = 0x16, - EsMapVal = 0x17, - EsOpaque = 0x18, + EsSub8 = 0x0e, // + 1 byte + EsSub32 = 0x0f, // + 4 bytes + + EsStr = 0x10, + EsEnum = 0x11, // encodes the variant id as the first EsSub* + EsVec = 0x12, // encodes the # of elements as the first EsSub* + EsVecElt = 0x13, + EsMap = 0x14, // encodes the # of pairs as the first EsSub* + EsMapKey = 0x15, + EsMapVal = 0x16, + EsOpaque = 0x17, } const NUM_TAGS: uint = 0x1000; -const NUM_IMPLICIT_TAGS: uint = 0x11; +const NUM_IMPLICIT_TAGS: uint = 0x10; static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [ 8, 8, 4, 2, 1, // EsU* @@ -120,9 +119,7 @@ static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [ 1, // EsBool 4, // EsChar 8, 4, // EsF* - 4, // EsEnumVid - 4, // EsVecLen - 4, // EsMapLen + 1, 4, // EsSub* ]; #[derive(Debug)] @@ -152,8 +149,8 @@ pub mod reader { use serialize; - use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, - EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, + use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, + EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN }; @@ -419,6 +416,37 @@ pub mod reader { Ok(r_doc) } + fn next_doc2(&mut self, + exp_tag1: EbmlEncoderTag, + exp_tag2: EbmlEncoderTag) -> DecodeResult<(bool, Doc<'doc>)> { + assert!((exp_tag1 as uint) != (exp_tag2 as uint)); + debug!(". next_doc2(exp_tag1={:?}, exp_tag2={:?})", exp_tag1, exp_tag2); + if self.pos >= self.parent.end { + return Err(Expected(format!("no more documents in \ + current node!"))); + } + let TaggedDoc { tag: r_tag, doc: r_doc } = + try!(doc_at(self.parent.data, self.pos)); + debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}", + self.parent.start, + self.parent.end, + self.pos, + r_tag, + r_doc.start, + r_doc.end); + if r_tag != (exp_tag1 as uint) && r_tag != (exp_tag2 as uint) { + return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but \ + found tag {:?}", exp_tag1, exp_tag2, r_tag))); + } + if r_doc.end > self.parent.end { + return Err(Expected(format!("invalid EBML, child extends to \ + {:#x}, parent to {:#x}", + r_doc.end, self.parent.end))); + } + self.pos = r_doc.end; + Ok((r_tag == (exp_tag2 as uint), r_doc)) + } + fn push_doc(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { @@ -433,10 +461,15 @@ pub mod reader { Ok(r) } - fn _next_uint(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult { - let r = doc_as_u32(try!(self.next_doc(exp_tag))); - debug!("_next_uint exp_tag={:?} result={:?}", exp_tag, r); - Ok(r as uint) + fn _next_sub(&mut self) -> DecodeResult { + let (big, doc) = try!(self.next_doc2(EsSub8, EsSub32)); + let r = if big { + doc_as_u32(doc) as uint + } else { + doc_as_u8(doc) as uint + }; + debug!("_next_sub result={:?}", r); + Ok(r) } pub fn read_opaque(&mut self, op: F) -> DecodeResult where @@ -538,7 +571,7 @@ pub mod reader { where F: FnMut(&mut Decoder<'doc>, uint) -> DecodeResult, { debug!("read_enum_variant()"); - let idx = try!(self._next_uint(EsEnumVid)); + let idx = try!(self._next_sub()); debug!(" idx={}", idx); f(self, idx) @@ -556,7 +589,7 @@ pub mod reader { where F: FnMut(&mut Decoder<'doc>, uint) -> DecodeResult, { debug!("read_enum_struct_variant()"); - let idx = try!(self._next_uint(EsEnumVid)); + let idx = try!(self._next_sub()); debug!(" idx={}", idx); f(self, idx) @@ -647,7 +680,7 @@ pub mod reader { { debug!("read_seq()"); self.push_doc(EsVec, move |d| { - let len = try!(d._next_uint(EsVecLen)); + let len = try!(d._next_sub()); debug!(" len={}", len); f(d, len) }) @@ -665,7 +698,7 @@ pub mod reader { { debug!("read_map()"); self.push_doc(EsMap, move |d| { - let len = try!(d._next_uint(EsMapLen)); + let len = try!(d._next_sub()); debug!(" len={}", len); f(d, len) }) @@ -697,10 +730,10 @@ pub mod writer { use std::old_io::{Writer, Seek}; use std::old_io; - use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey, - EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, + use super::{ EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, + EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint, - EsOpaque, EbmlEncoderTag, NUM_IMPLICIT_TAGS, NUM_TAGS }; + EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS }; use serialize; @@ -907,9 +940,18 @@ pub mod writer { impl<'a, W: Writer + Seek> Encoder<'a, W> { // used internally to emit things like the vector length and so on - fn _emit_tagged_uint(&mut self, t: EbmlEncoderTag, v: uint) -> EncodeResult { - assert!(v <= 0xFFFF_FFFF); - self.wr_tagged_raw_u32(t as uint, v as u32) + fn _emit_tagged_sub(&mut self, v: uint) -> EncodeResult { + if let Some(v) = v.to_u8() { + self.wr_tagged_raw_u8(EsSub8 as uint, v) + } else if let Some(v) = v.to_u32() { + self.wr_tagged_raw_u32(EsSub32 as uint, v) + } else { + Err(old_io::IoError { + kind: old_io::OtherIoError, + desc: "length or variant id too big", + detail: Some(format!("{}", v)) + }) + } } pub fn emit_opaque(&mut self, f: F) -> EncodeResult where @@ -995,7 +1037,7 @@ pub mod writer { f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, { - try!(self._emit_tagged_uint(EsEnumVid, v_id)); + try!(self._emit_tagged_sub(v_id)); f(self) } @@ -1078,7 +1120,7 @@ pub mod writer { { try!(self.start_tag(EsVec as uint)); - try!(self._emit_tagged_uint(EsVecLen, len)); + try!(self._emit_tagged_sub(len)); try!(f(self)); self.end_tag() } @@ -1097,7 +1139,7 @@ pub mod writer { { try!(self.start_tag(EsMap as uint)); - try!(self._emit_tagged_uint(EsMapLen, len)); + try!(self._emit_tagged_sub(len)); try!(f(self)); self.end_tag() } From 84e9a61e9c697ffb6f6f783e9190d5a93dfdc10a Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Mon, 2 Mar 2015 14:34:16 +0900 Subject: [PATCH 07/12] metadata: Implement relaxation of short RBML lengths. We try to move the data when the length can be encoded in the much smaller number of bytes. This interferes with indices and type abbreviations however, so this commit introduces a public interface to get and mark a "stable" (i.e. not affected by relaxation) position of the current pointer. The relaxation logic only moves a small data, currently at most 256 bytes, as moving the data can be costly. There might be further opportunities to allow more relaxation by moving fields around, which I didn't seriously try. --- src/librbml/lib.rs | 94 ++++++++++++++++++++----------- src/librustc/metadata/encoder.rs | 57 ++++++++++--------- src/librustc/metadata/tyencode.rs | 68 +++++++++++----------- src/librustc/middle/astencode.rs | 21 ++++--- 4 files changed, 138 insertions(+), 102 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index fc4fbb36707ea..d74d9ae812511 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -729,11 +729,13 @@ pub mod writer { use std::num::Int; use std::old_io::{Writer, Seek}; use std::old_io; + use std::slice::bytes; use super::{ EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint, EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS }; + use super::io::SeekableMemWriter; use serialize; @@ -741,9 +743,10 @@ pub mod writer { pub type EncodeResult = old_io::IoResult<()>; // rbml writing - pub struct Encoder<'a, W:'a> { - pub writer: &'a mut W, + pub struct Encoder<'a> { + pub writer: &'a mut SeekableMemWriter, size_positions: Vec, + relax_limit: u64, // do not move encoded bytes before this position } fn write_tag(w: &mut W, n: uint) -> EncodeResult { @@ -788,19 +791,21 @@ pub mod writer { }) } - impl<'a, W: Writer + Seek> Encoder<'a, W> { - pub fn new(w: &'a mut W) -> Encoder<'a, W> { + impl<'a> Encoder<'a> { + pub fn new(w: &'a mut SeekableMemWriter) -> Encoder<'a> { Encoder { writer: w, size_positions: vec!(), + relax_limit: 0, } } /// FIXME(pcwalton): Workaround for badness in trans. DO NOT USE ME. - pub unsafe fn unsafe_clone(&self) -> Encoder<'a, W> { + pub unsafe fn unsafe_clone(&self) -> Encoder<'a> { Encoder { writer: mem::transmute_copy(&self.writer), size_positions: self.size_positions.clone(), + relax_limit: self.relax_limit, } } @@ -822,11 +827,29 @@ pub mod writer { let cur_pos = try!(self.writer.tell()); try!(self.writer.seek(last_size_pos as i64, old_io::SeekSet)); let size = cur_pos as uint - last_size_pos - 4; - try!(write_sized_vuint(self.writer, size, 4)); - let r = try!(self.writer.seek(cur_pos as i64, old_io::SeekSet)); + + // relax the size encoding for small tags (bigger tags are costly to move). + // we should never try to move the stable positions, however. + const RELAX_MAX_SIZE: uint = 0x100; + if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit as uint { + // we can't alter the buffer in place, so have a temporary buffer + let mut buf = [0u8; RELAX_MAX_SIZE]; + { + let data = &self.writer.get_ref()[last_size_pos+4..cur_pos as uint]; + bytes::copy_memory(&mut buf, data); + } + + // overwrite the size and data and continue + try!(write_vuint(self.writer, size)); + try!(self.writer.write_all(&buf[..size])); + } else { + // overwrite the size with an overlong encoding and skip past the data + try!(write_sized_vuint(self.writer, size, 4)); + try!(self.writer.seek(cur_pos as i64, old_io::SeekSet)); + } debug!("End tag (size = {:?})", size); - Ok(r) + Ok(()) } pub fn wr_tag(&mut self, tag_id: uint, blk: F) -> EncodeResult where @@ -933,12 +956,19 @@ pub mod writer { debug!("Write str: {:?}", s); self.writer.write_all(s.as_bytes()) } - } - // FIXME (#2743): optionally perform "relaxations" on end_tag to more - // efficiently encode sizes; this is a fixed point iteration + /// Returns the current position while marking it stable, i.e. + /// generated bytes so far woundn't be affected by relaxation. + pub fn mark_stable_position(&mut self) -> u64 { + let pos = self.writer.tell().unwrap(); + if self.relax_limit < pos { + self.relax_limit = pos; + } + pos + } + } - impl<'a, W: Writer + Seek> Encoder<'a, W> { + impl<'a> Encoder<'a> { // used internally to emit things like the vector length and so on fn _emit_tagged_sub(&mut self, v: uint) -> EncodeResult { if let Some(v) = v.to_u8() { @@ -955,7 +985,7 @@ pub mod writer { } pub fn emit_opaque(&mut self, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder) -> EncodeResult, + F: FnOnce(&mut Encoder) -> EncodeResult, { try!(self.start_tag(EsOpaque as uint)); try!(f(self)); @@ -963,7 +993,7 @@ pub mod writer { } } - impl<'a, W: Writer + Seek> serialize::Encoder for Encoder<'a, W> { + impl<'a> serialize::Encoder for Encoder<'a> { type Error = old_io::IoError; fn emit_nil(&mut self) -> EncodeResult { @@ -1023,7 +1053,7 @@ pub mod writer { } fn emit_enum(&mut self, _name: &str, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsEnum as uint)); try!(f(self)); @@ -1035,14 +1065,14 @@ pub mod writer { v_id: uint, _: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self._emit_tagged_sub(v_id)); f(self) } fn emit_enum_variant_arg(&mut self, _: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { f(self) } @@ -1052,7 +1082,7 @@ pub mod writer { v_id: uint, cnt: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum_variant(v_name, v_id, cnt, f) } @@ -1061,47 +1091,47 @@ pub mod writer { _: &str, idx: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum_variant_arg(idx, f) } fn emit_struct(&mut self, _: &str, _len: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { f(self) } fn emit_struct_field(&mut self, _name: &str, _: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { f(self) } fn emit_tuple(&mut self, len: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq(len, f) } fn emit_tuple_arg(&mut self, idx: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq_elt(idx, f) } fn emit_tuple_struct(&mut self, _: &str, len: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq(len, f) } fn emit_tuple_struct_arg(&mut self, idx: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq_elt(idx, f) } fn emit_option(&mut self, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum("Option", f) } @@ -1109,14 +1139,14 @@ pub mod writer { self.emit_enum_variant("None", 0, 0, |_| Ok(())) } fn emit_option_some(&mut self, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum_variant("Some", 1, 1, f) } fn emit_seq(&mut self, len: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsVec as uint)); @@ -1126,7 +1156,7 @@ pub mod writer { } fn emit_seq_elt(&mut self, _idx: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsVecElt as uint)); @@ -1135,7 +1165,7 @@ pub mod writer { } fn emit_map(&mut self, len: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsMap as uint)); @@ -1145,7 +1175,7 @@ pub mod writer { } fn emit_map_elt_key(&mut self, _idx: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsMapKey as uint)); @@ -1154,7 +1184,7 @@ pub mod writer { } fn emit_map_elt_val(&mut self, _idx: uint, f: F) -> EncodeResult where - F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult, + F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsMapVal as uint)); try!(f(self)); diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index 7c28f0e17b5e4..fa9e28bf56d23 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -46,7 +46,7 @@ use syntax::ptr::P; use syntax::visit::Visitor; use syntax::visit; use syntax; -use rbml::writer; +use rbml::writer::Encoder; use rbml::io::SeekableMemWriter; /// A borrowed version of `ast::InlinedItem`. @@ -57,8 +57,6 @@ pub enum InlinedItemRef<'a> { IIForeignRef(&'a ast::ForeignItem) } -pub type Encoder<'a> = writer::Encoder<'a, SeekableMemWriter>; - pub type EncodeInlinedItem<'a> = Box; @@ -115,7 +113,7 @@ fn encode_trait_ref<'a, 'tcx>(rbml_w: &mut Encoder, }; rbml_w.start_tag(tag); - tyencode::enc_trait_ref(rbml_w.writer, ty_str_ctxt, trait_ref); + tyencode::enc_trait_ref(rbml_w, ty_str_ctxt, trait_ref); rbml_w.end_tag(); } @@ -169,7 +167,7 @@ pub fn write_closure_type<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, tcx: ecx.tcx, abbrevs: &ecx.type_abbrevs }; - tyencode::enc_closure_ty(rbml_w.writer, ty_str_ctxt, closure_type); + tyencode::enc_closure_ty(rbml_w, ty_str_ctxt, closure_type); } pub fn write_type<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, @@ -181,7 +179,7 @@ pub fn write_type<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, tcx: ecx.tcx, abbrevs: &ecx.type_abbrevs }; - tyencode::enc_ty(rbml_w.writer, ty_str_ctxt, typ); + tyencode::enc_ty(rbml_w, ty_str_ctxt, typ); } pub fn write_trait_ref<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, @@ -193,7 +191,7 @@ pub fn write_trait_ref<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, tcx: ecx.tcx, abbrevs: &ecx.type_abbrevs }; - tyencode::enc_trait_ref(rbml_w.writer, ty_str_ctxt, trait_ref); + tyencode::enc_trait_ref(rbml_w, ty_str_ctxt, trait_ref); } pub fn write_region(ecx: &EncodeContext, @@ -205,7 +203,7 @@ pub fn write_region(ecx: &EncodeContext, tcx: ecx.tcx, abbrevs: &ecx.type_abbrevs }; - tyencode::enc_region(rbml_w.writer, ty_str_ctxt, r); + tyencode::enc_region(rbml_w, ty_str_ctxt, r); } fn encode_bounds<'a, 'tcx>(rbml_w: &mut Encoder, @@ -218,7 +216,7 @@ fn encode_bounds<'a, 'tcx>(rbml_w: &mut Encoder, ds: def_to_string, tcx: ecx.tcx, abbrevs: &ecx.type_abbrevs }; - tyencode::enc_bounds(rbml_w.writer, ty_str_ctxt, bounds); + tyencode::enc_bounds(rbml_w, ty_str_ctxt, bounds); rbml_w.end_tag(); } @@ -250,7 +248,7 @@ fn encode_method_fty<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, tcx: ecx.tcx, abbrevs: &ecx.type_abbrevs }; - tyencode::enc_bare_fn_ty(rbml_w.writer, ty_str_ctxt, typ); + tyencode::enc_bare_fn_ty(rbml_w, ty_str_ctxt, typ); rbml_w.end_tag(); } @@ -312,7 +310,7 @@ fn encode_enum_variant_info(ecx: &EncodeContext, let def_id = local_def(variant.node.id); index.push(entry { val: variant.node.id as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); rbml_w.start_tag(tag_items_data_item); encode_def_id(rbml_w, def_id); @@ -659,10 +657,11 @@ fn encode_info_for_struct(ecx: &EncodeContext, let nm = field.name; let id = field.id.node; - index.push(entry {val: id as i64, pos: rbml_w.writer.tell().unwrap()}); + let pos = rbml_w.mark_stable_position(); + index.push(entry {val: id as i64, pos: pos}); global_index.push(entry { val: id as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: pos, }); rbml_w.start_tag(tag_items_data_item); debug!("encode_info_for_struct: doing {} {}", @@ -688,7 +687,7 @@ fn encode_info_for_struct_ctor(ecx: &EncodeContext, struct_id: NodeId) { index.push(entry { val: ctor_id as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); rbml_w.start_tag(tag_items_data_item); @@ -731,7 +730,7 @@ fn encode_generics<'a, 'tcx>(rbml_w: &mut Encoder, }; for param in generics.types.iter() { rbml_w.start_tag(tag_type_param_def); - tyencode::enc_type_param_def(rbml_w.writer, ty_str_ctxt, param); + tyencode::enc_type_param_def(rbml_w, ty_str_ctxt, param); rbml_w.end_tag(); } @@ -765,7 +764,7 @@ fn encode_generics<'a, 'tcx>(rbml_w: &mut Encoder, rbml_w.wr_tagged_u8(tag_predicate_space, space as u8); rbml_w.start_tag(tag_predicate_data); - tyencode::enc_predicate(rbml_w.writer, ty_str_ctxt, predicate); + tyencode::enc_predicate(rbml_w, ty_str_ctxt, predicate); rbml_w.end_tag(); rbml_w.end_tag(); @@ -964,11 +963,11 @@ fn encode_info_for_item(ecx: &EncodeContext, vis: ast::Visibility) { let tcx = ecx.tcx; - fn add_to_index(item: &ast::Item, rbml_w: &Encoder, + fn add_to_index(item: &ast::Item, rbml_w: &mut Encoder, index: &mut Vec>) { index.push(entry { val: item.id as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); } @@ -1224,7 +1223,7 @@ fn encode_info_for_item(ecx: &EncodeContext, index.push(entry { val: trait_item_def_id.def_id().node as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); let trait_item_type = @@ -1322,7 +1321,7 @@ fn encode_info_for_item(ecx: &EncodeContext, index.push(entry { val: item_def_id.def_id().node as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); rbml_w.start_tag(tag_items_data_item); @@ -1427,7 +1426,7 @@ fn encode_info_for_foreign_item(ecx: &EncodeContext, abi: abi::Abi) { index.push(entry { val: nitem.id as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); rbml_w.start_tag(tag_items_data_item); @@ -1527,7 +1526,7 @@ fn encode_info_for_items(ecx: &EncodeContext, rbml_w.start_tag(tag_items_data); index.push(entry { val: ast::CRATE_NODE_ID as i64, - pos: rbml_w.writer.tell().unwrap(), + pos: rbml_w.mark_stable_position(), }); encode_info_for_mod(ecx, rbml_w, @@ -1567,7 +1566,7 @@ fn encode_index(rbml_w: &mut Encoder, index: Vec>, mut write_fn: let mut bucket_locs = Vec::new(); rbml_w.start_tag(tag_index_buckets); for bucket in &buckets { - bucket_locs.push(rbml_w.writer.tell().unwrap()); + bucket_locs.push(rbml_w.mark_stable_position()); rbml_w.start_tag(tag_index_buckets_bucket); for elt in bucket { rbml_w.start_tag(tag_index_buckets_bucket_elt); @@ -1926,7 +1925,13 @@ pub const metadata_encoding_version : &'static [u8] = &[b'r', b'u', b's', b't', pub fn encode_metadata(parms: EncodeParams, krate: &ast::Crate) -> Vec { let mut wr = SeekableMemWriter::new(); encode_metadata_inner(&mut wr, parms, krate); + + // RBML compacts the encoded bytes whenever appropriate, + // so there are some garbages left after the end of the data. + let metalen = wr.tell().unwrap() as uint; let mut v = wr.unwrap(); + v.truncate(metalen); + assert_eq!(v.len(), metalen); // And here we run into yet another obscure archive bug: in which metadata // loaded from archives may have trailing garbage bytes. Awhile back one of @@ -2008,7 +2013,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter, reachable: reachable, }; - let mut rbml_w = writer::Encoder::new(wr); + let mut rbml_w = Encoder::new(wr); encode_crate_name(&mut rbml_w, &ecx.link_meta.crate_name); encode_crate_triple(&mut rbml_w, @@ -2099,7 +2104,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter, // Get the encoded string for a type pub fn encoded_ty<'tcx>(tcx: &ty::ctxt<'tcx>, t: Ty<'tcx>) -> String { let mut wr = SeekableMemWriter::new(); - tyencode::enc_ty(&mut wr, &tyencode::ctxt { + tyencode::enc_ty(&mut Encoder::new(&mut wr), &tyencode::ctxt { diag: tcx.sess.diagnostic(), ds: def_to_string, tcx: tcx, diff --git a/src/librustc/metadata/tyencode.rs b/src/librustc/metadata/tyencode.rs index ebb4153e32bce..86f1605b8bfaa 100644 --- a/src/librustc/metadata/tyencode.rs +++ b/src/librustc/metadata/tyencode.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -27,9 +27,9 @@ use syntax::ast; use syntax::diagnostic::SpanHandler; use syntax::parse::token; -use rbml::io::SeekableMemWriter; +use rbml::writer::Encoder; -macro_rules! mywrite { ($($arg:tt)*) => ({ write!($($arg)*); }) } +macro_rules! mywrite { ($w:expr, $($arg:tt)*) => ({ write!($w.writer, $($arg)*); }) } pub struct ctxt<'a, 'tcx: 'a> { pub diag: &'a SpanHandler, @@ -49,12 +49,14 @@ pub struct ty_abbrev { pub type abbrev_map<'tcx> = RefCell, ty_abbrev>>; -pub fn enc_ty<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, t: Ty<'tcx>) { +pub fn enc_ty<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, t: Ty<'tcx>) { match cx.abbrevs.borrow_mut().get(&t) { - Some(a) => { w.write_all(a.s.as_bytes()); return; } + Some(a) => { w.writer.write_all(a.s.as_bytes()); return; } None => {} } - let pos = w.tell().unwrap(); + + // type abbreviations needs a stable position + let pos = w.mark_stable_position(); match t.sty { ty::ty_bool => mywrite!(w, "b"), @@ -154,7 +156,7 @@ pub fn enc_ty<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, t: Ty<'t } } - let end = w.tell().unwrap(); + let end = w.mark_stable_position(); let len = end - pos; fn estimate_sz(u: u64) -> u64 { let mut n = u; @@ -171,21 +173,21 @@ pub fn enc_ty<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, t: Ty<'t } } -fn enc_mutability(w: &mut SeekableMemWriter, mt: ast::Mutability) { +fn enc_mutability(w: &mut Encoder, mt: ast::Mutability) { match mt { ast::MutImmutable => (), ast::MutMutable => mywrite!(w, "m"), } } -fn enc_mt<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +fn enc_mt<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, mt: ty::mt<'tcx>) { enc_mutability(w, mt.mutbl); enc_ty(w, cx, mt.ty); } -fn enc_opt(w: &mut SeekableMemWriter, t: Option, enc_f: F) where - F: FnOnce(&mut SeekableMemWriter, T), +fn enc_opt(w: &mut Encoder, t: Option, enc_f: F) where + F: FnOnce(&mut Encoder, T), { match t { None => mywrite!(w, "n"), @@ -196,11 +198,11 @@ fn enc_opt(w: &mut SeekableMemWriter, t: Option, enc_f: F) where } } -fn enc_vec_per_param_space<'a, 'tcx, T, F>(w: &mut SeekableMemWriter, +fn enc_vec_per_param_space<'a, 'tcx, T, F>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, v: &VecPerParamSpace, mut op: F) where - F: FnMut(&mut SeekableMemWriter, &ctxt<'a, 'tcx>, &T), + F: FnMut(&mut Encoder, &ctxt<'a, 'tcx>, &T), { for &space in &subst::ParamSpace::all() { mywrite!(w, "["); @@ -211,14 +213,14 @@ fn enc_vec_per_param_space<'a, 'tcx, T, F>(w: &mut SeekableMemWriter, } } -pub fn enc_substs<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +pub fn enc_substs<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, substs: &subst::Substs<'tcx>) { enc_region_substs(w, cx, &substs.regions); enc_vec_per_param_space(w, cx, &substs.types, |w, cx, &ty| enc_ty(w, cx, ty)); } -fn enc_region_substs(w: &mut SeekableMemWriter, cx: &ctxt, substs: &subst::RegionSubsts) { +fn enc_region_substs(w: &mut Encoder, cx: &ctxt, substs: &subst::RegionSubsts) { match *substs { subst::ErasedRegions => { mywrite!(w, "e"); @@ -231,7 +233,7 @@ fn enc_region_substs(w: &mut SeekableMemWriter, cx: &ctxt, substs: &subst::Regio } } -pub fn enc_region(w: &mut SeekableMemWriter, cx: &ctxt, r: ty::Region) { +pub fn enc_region(w: &mut Encoder, cx: &ctxt, r: ty::Region) { match r { ty::ReLateBound(id, br) => { mywrite!(w, "b[{}|", id.depth); @@ -270,7 +272,7 @@ pub fn enc_region(w: &mut SeekableMemWriter, cx: &ctxt, r: ty::Region) { } } -fn enc_scope(w: &mut SeekableMemWriter, _cx: &ctxt, scope: region::CodeExtent) { +fn enc_scope(w: &mut Encoder, _cx: &ctxt, scope: region::CodeExtent) { match scope { region::CodeExtent::Misc(node_id) => mywrite!(w, "M{}", node_id), region::CodeExtent::Remainder(region::BlockRemainder { @@ -279,12 +281,12 @@ fn enc_scope(w: &mut SeekableMemWriter, _cx: &ctxt, scope: region::CodeExtent) { } } -fn enc_destruction_scope_data(w: &mut SeekableMemWriter, +fn enc_destruction_scope_data(w: &mut Encoder, d: region::DestructionScopeData) { mywrite!(w, "{}", d.node_id); } -fn enc_bound_region(w: &mut SeekableMemWriter, cx: &ctxt, br: ty::BoundRegion) { +fn enc_bound_region(w: &mut Encoder, cx: &ctxt, br: ty::BoundRegion) { match br { ty::BrAnon(idx) => { mywrite!(w, "a{}|", idx); @@ -303,40 +305,40 @@ fn enc_bound_region(w: &mut SeekableMemWriter, cx: &ctxt, br: ty::BoundRegion) { } } -pub fn enc_trait_ref<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +pub fn enc_trait_ref<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, s: &ty::TraitRef<'tcx>) { mywrite!(w, "{}|", (cx.ds)(s.def_id)); enc_substs(w, cx, s.substs); } -fn enc_unsafety(w: &mut SeekableMemWriter, p: ast::Unsafety) { +fn enc_unsafety(w: &mut Encoder, p: ast::Unsafety) { match p { ast::Unsafety::Normal => mywrite!(w, "n"), ast::Unsafety::Unsafe => mywrite!(w, "u"), } } -fn enc_abi(w: &mut SeekableMemWriter, abi: Abi) { +fn enc_abi(w: &mut Encoder, abi: Abi) { mywrite!(w, "["); mywrite!(w, "{}", abi.name()); mywrite!(w, "]") } -pub fn enc_bare_fn_ty<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +pub fn enc_bare_fn_ty<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, ft: &ty::BareFnTy<'tcx>) { enc_unsafety(w, ft.unsafety); enc_abi(w, ft.abi); enc_fn_sig(w, cx, &ft.sig); } -pub fn enc_closure_ty<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +pub fn enc_closure_ty<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, ft: &ty::ClosureTy<'tcx>) { enc_unsafety(w, ft.unsafety); enc_fn_sig(w, cx, &ft.sig); enc_abi(w, ft.abi); } -fn enc_fn_sig<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +fn enc_fn_sig<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, fsig: &ty::PolyFnSig<'tcx>) { mywrite!(w, "["); for ty in &fsig.0.inputs { @@ -358,7 +360,7 @@ fn enc_fn_sig<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, } } -pub fn enc_builtin_bounds(w: &mut SeekableMemWriter, _cx: &ctxt, bs: &ty::BuiltinBounds) { +pub fn enc_builtin_bounds(w: &mut Encoder, _cx: &ctxt, bs: &ty::BuiltinBounds) { for bound in bs { match bound { ty::BoundSend => mywrite!(w, "S"), @@ -371,7 +373,7 @@ pub fn enc_builtin_bounds(w: &mut SeekableMemWriter, _cx: &ctxt, bs: &ty::Builti mywrite!(w, "."); } -pub fn enc_existential_bounds<'a,'tcx>(w: &mut SeekableMemWriter, +pub fn enc_existential_bounds<'a,'tcx>(w: &mut Encoder, cx: &ctxt<'a,'tcx>, bs: &ty::ExistentialBounds<'tcx>) { let param_bounds = ty::ParamBounds { trait_bounds: vec!(), @@ -381,7 +383,7 @@ pub fn enc_existential_bounds<'a,'tcx>(w: &mut SeekableMemWriter, enc_bounds(w, cx, ¶m_bounds); } -pub fn enc_bounds<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +pub fn enc_bounds<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, bs: &ty::ParamBounds<'tcx>) { enc_builtin_bounds(w, cx, &bs.builtin_bounds); @@ -400,7 +402,7 @@ pub fn enc_bounds<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, mywrite!(w, "."); } -pub fn enc_region_bounds<'a, 'tcx>(w: &mut SeekableMemWriter, +pub fn enc_region_bounds<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, rs: &[ty::Region]) { for &r in rs { @@ -411,7 +413,7 @@ pub fn enc_region_bounds<'a, 'tcx>(w: &mut SeekableMemWriter, mywrite!(w, "."); } -pub fn enc_type_param_def<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tcx>, +pub fn enc_type_param_def<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, v: &ty::TypeParameterDef<'tcx>) { mywrite!(w, "{}:{}|{}|{}|", token::get_name(v.name), (cx.ds)(v.def_id), @@ -420,7 +422,7 @@ pub fn enc_type_param_def<'a, 'tcx>(w: &mut SeekableMemWriter, cx: &ctxt<'a, 'tc enc_object_lifetime_default(w, cx, v.object_lifetime_default); } -fn enc_object_lifetime_default<'a, 'tcx>(w: &mut SeekableMemWriter, +fn enc_object_lifetime_default<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, default: Option) { @@ -434,7 +436,7 @@ fn enc_object_lifetime_default<'a, 'tcx>(w: &mut SeekableMemWriter, } } -pub fn enc_predicate<'a, 'tcx>(w: &mut SeekableMemWriter, +pub fn enc_predicate<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, p: &ty::Predicate<'tcx>) { @@ -465,7 +467,7 @@ pub fn enc_predicate<'a, 'tcx>(w: &mut SeekableMemWriter, } } -fn enc_projection_predicate<'a, 'tcx>(w: &mut SeekableMemWriter, +fn enc_projection_predicate<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, data: &ty::ProjectionPredicate<'tcx>) { enc_trait_ref(w, cx, &*data.projection_ty.trait_ref); diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index 5983829ed8fbe..da3e6b4765b98 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -43,13 +43,14 @@ use std::old_io::Seek; use std::num::FromPrimitive; use std::rc::Rc; -use rbml::io::SeekableMemWriter; -use rbml::{reader, writer}; +use rbml::reader; +use rbml::writer::Encoder; use rbml; use serialize; use serialize::{Decodable, Decoder, DecoderHelpers, Encodable}; use serialize::{EncoderHelpers}; +#[cfg(test)] use rbml::io::SeekableMemWriter; #[cfg(test)] use syntax::parse; #[cfg(test)] use syntax::print::pprust; @@ -68,8 +69,6 @@ trait tr_intern { fn tr_intern(&self, dcx: &DecodeContext) -> ast::DefId; } -pub type Encoder<'a> = writer::Encoder<'a, SeekableMemWriter>; - // ______________________________________________________________________ // Top-level methods. @@ -911,7 +910,7 @@ impl<'a, 'tcx> rbml_writer_helpers<'tcx> for Encoder<'a> { fn emit_type_param_def<'b>(&mut self, ecx: &e::EncodeContext<'b, 'tcx>, type_param_def: &ty::TypeParameterDef<'tcx>) { self.emit_opaque(|this| { - Ok(tyencode::enc_type_param_def(this.writer, + Ok(tyencode::enc_type_param_def(this, &ecx.ty_str_ctxt(), type_param_def)) }); @@ -920,7 +919,7 @@ impl<'a, 'tcx> rbml_writer_helpers<'tcx> for Encoder<'a> { fn emit_predicate<'b>(&mut self, ecx: &e::EncodeContext<'b, 'tcx>, predicate: &ty::Predicate<'tcx>) { self.emit_opaque(|this| { - Ok(tyencode::enc_predicate(this.writer, + Ok(tyencode::enc_predicate(this, &ecx.ty_str_ctxt(), predicate)) }); @@ -954,20 +953,20 @@ impl<'a, 'tcx> rbml_writer_helpers<'tcx> for Encoder<'a> { fn emit_existential_bounds<'b>(&mut self, ecx: &e::EncodeContext<'b,'tcx>, bounds: &ty::ExistentialBounds<'tcx>) { - self.emit_opaque(|this| Ok(tyencode::enc_existential_bounds(this.writer, + self.emit_opaque(|this| Ok(tyencode::enc_existential_bounds(this, &ecx.ty_str_ctxt(), bounds))); } fn emit_builtin_bounds(&mut self, ecx: &e::EncodeContext, bounds: &ty::BuiltinBounds) { - self.emit_opaque(|this| Ok(tyencode::enc_builtin_bounds(this.writer, + self.emit_opaque(|this| Ok(tyencode::enc_builtin_bounds(this, &ecx.ty_str_ctxt(), bounds))); } fn emit_substs<'b>(&mut self, ecx: &e::EncodeContext<'b, 'tcx>, substs: &subst::Substs<'tcx>) { - self.emit_opaque(|this| Ok(tyencode::enc_substs(this.writer, + self.emit_opaque(|this| Ok(tyencode::enc_substs(this, &ecx.ty_str_ctxt(), substs))); } @@ -1995,7 +1994,7 @@ fn mk_ctxt() -> parse::ParseSess { fn roundtrip(in_item: Option>) { let in_item = in_item.unwrap(); let mut wr = SeekableMemWriter::new(); - encode_item_ast(&mut writer::Encoder::new(&mut wr), &*in_item); + encode_item_ast(&mut Encoder::new(&mut wr), &*in_item); let rbml_doc = rbml::Doc::new(wr.get_ref()); let out_item = decode_item_ast(rbml_doc); From 7b6e43c07f18243873d7cff428cca8cf8283467b Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Mon, 2 Mar 2015 18:54:51 +0900 Subject: [PATCH 08/12] metadata: Space-optimize empty vectors and maps. So that `EsVec 82 EsSub8 00` becomes `EsVec 80` now. --- src/librbml/lib.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index d74d9ae812511..fee9a69897e0e 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -64,6 +64,10 @@ impl<'doc> Doc<'doc> { reader::get_doc(*self, tag) } + pub fn is_empty(&self) -> bool { + self.start == self.end + } + pub fn as_str_slice<'a>(&'a self) -> &'a str { str::from_utf8(&self.data[self.start..self.end]).unwrap() } @@ -462,6 +466,11 @@ pub mod reader { } fn _next_sub(&mut self) -> DecodeResult { + // empty vector/map optimization + if self.parent.is_empty() { + return Ok(0); + } + let (big, doc) = try!(self.next_doc2(EsSub8, EsSub32)); let r = if big { doc_as_u32(doc) as uint @@ -1148,6 +1157,10 @@ pub mod writer { fn emit_seq(&mut self, len: uint, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { + if len == 0 { + // empty vector optimization + return self.wr_tagged_bytes(EsVec as uint, &[]); + } try!(self.start_tag(EsVec as uint)); try!(self._emit_tagged_sub(len)); @@ -1167,6 +1180,10 @@ pub mod writer { fn emit_map(&mut self, len: uint, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { + if len == 0 { + // empty map optimization + return self.wr_tagged_bytes(EsMap as uint, &[]); + } try!(self.start_tag(EsMap as uint)); try!(self._emit_tagged_sub(len)); From 36a09a162dd04cf1ba092a837978cecea88bc529 Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Mon, 2 Mar 2015 20:26:36 +0900 Subject: [PATCH 09/12] metadata: Flatten `tag_table_id` and `tag_table_val` tags. This avoids a biggish eight-byte `tag_table_id` tag in favor of autoserialized integer tags, which are smaller and can be later used to encode them in the optimal number of bytes. `NodeId` was u32 after all. Previously: <------------- len1 --------------> tag_table_* tag_table_id 88 tag_table_val <-- len2 ---> Now: <--------------- len ---------------> tag_table_* U32 --- src/librustc/metadata/common.rs | 3 +- src/librustc/middle/astencode.rs | 97 +++++++++++--------------------- 2 files changed, 35 insertions(+), 65 deletions(-) diff --git a/src/librustc/metadata/common.rs b/src/librustc/metadata/common.rs index f0a465b73f60d..a58ef53de9a71 100644 --- a/src/librustc/metadata/common.rs +++ b/src/librustc/metadata/common.rs @@ -127,8 +127,7 @@ pub enum astencode_tag { // Reserves 0x50 -- 0x6f tag_id_range = 0x52, tag_table = 0x53, - tag_table_id = 0x54, - tag_table_val = 0x55, + // GAP 0x54, 0x55 tag_table_def = 0x56, tag_table_node_type = 0x57, tag_table_item_subst = 0x58, diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index da3e6b4765b98..7a75d844838a6 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -413,9 +413,8 @@ fn decode_ast(par_doc: rbml::Doc) -> ast::InlinedItem { // ______________________________________________________________________ // Encoding and decoding of ast::def -fn decode_def(dcx: &DecodeContext, doc: rbml::Doc) -> def::Def { - let mut dsr = reader::Decoder::new(doc); - let def: def::Def = Decodable::decode(&mut dsr).unwrap(); +fn decode_def(dcx: &DecodeContext, dsr: &mut reader::Decoder) -> def::Def { + let def: def::Def = Decodable::decode(dsr).unwrap(); def.tr(dcx) } @@ -1114,7 +1113,7 @@ impl<'a> write_tag_and_id for Encoder<'a> { } fn id(&mut self, id: ast::NodeId) { - self.wr_tagged_u64(c::tag_table_id as uint, id as u64); + id.encode(self).unwrap(); } } @@ -1151,51 +1150,44 @@ fn encode_side_tables_for_id(ecx: &e::EncodeContext, if let Some(def) = tcx.def_map.borrow().get(&id).map(|d| d.full_def()) { rbml_w.tag(c::tag_table_def, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| def.encode(rbml_w).unwrap()); + def.encode(rbml_w).unwrap(); }) } if let Some(ty) = tcx.node_types.borrow().get(&id) { rbml_w.tag(c::tag_table_node_type, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_ty(ecx, *ty); - }) + rbml_w.emit_ty(ecx, *ty); }) } if let Some(item_substs) = tcx.item_substs.borrow().get(&id) { rbml_w.tag(c::tag_table_item_subst, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_substs(ecx, &item_substs.substs); - }) + rbml_w.emit_substs(ecx, &item_substs.substs); }) } if let Some(fv) = tcx.freevars.borrow().get(&id) { rbml_w.tag(c::tag_table_freevars, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_from_vec(fv, |rbml_w, fv_entry| { - Ok(encode_freevar_entry(rbml_w, fv_entry)) - }); - }) + rbml_w.emit_from_vec(fv, |rbml_w, fv_entry| { + Ok(encode_freevar_entry(rbml_w, fv_entry)) + }); }); for freevar in fv { rbml_w.tag(c::tag_table_upvar_capture_map, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - let var_id = freevar.def.def_id().node; - let upvar_id = ty::UpvarId { - var_id: var_id, - closure_expr_id: id - }; - let upvar_capture = tcx.upvar_capture_map.borrow()[upvar_id].clone(); - var_id.encode(rbml_w); - upvar_capture.encode(rbml_w); - }) + + let var_id = freevar.def.def_id().node; + let upvar_id = ty::UpvarId { + var_id: var_id, + closure_expr_id: id + }; + let upvar_capture = tcx.upvar_capture_map.borrow()[upvar_id].clone(); + var_id.encode(rbml_w); + upvar_capture.encode(rbml_w); }) } } @@ -1204,18 +1196,14 @@ fn encode_side_tables_for_id(ecx: &e::EncodeContext, if let Some(type_scheme) = tcx.tcache.borrow().get(&lid) { rbml_w.tag(c::tag_table_tcache, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_type_scheme(ecx, type_scheme.clone()); - }) + rbml_w.emit_type_scheme(ecx, type_scheme.clone()); }) } if let Some(type_param_def) = tcx.ty_param_defs.borrow().get(&id) { rbml_w.tag(c::tag_table_param_defs, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_type_param_def(ecx, type_param_def) - }) + rbml_w.emit_type_param_def(ecx, type_param_def) }) } @@ -1223,18 +1211,14 @@ fn encode_side_tables_for_id(ecx: &e::EncodeContext, if let Some(method) = tcx.method_map.borrow().get(&method_call) { rbml_w.tag(c::tag_table_method_map, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - encode_method_callee(ecx, rbml_w, method_call.adjustment, method) - }) + encode_method_callee(ecx, rbml_w, method_call.adjustment, method) }) } if let Some(trait_ref) = tcx.object_cast_map.borrow().get(&id) { rbml_w.tag(c::tag_table_object_cast_map, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_trait_ref(ecx, &*trait_ref.0); - }) + rbml_w.emit_trait_ref(ecx, &*trait_ref.0); }) } @@ -1245,9 +1229,7 @@ fn encode_side_tables_for_id(ecx: &e::EncodeContext, if let Some(method) = tcx.method_map.borrow().get(&method_call) { rbml_w.tag(c::tag_table_method_map, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - encode_method_callee(ecx, rbml_w, method_call.adjustment, method) - }) + encode_method_callee(ecx, rbml_w, method_call.adjustment, method) }) } } @@ -1258,10 +1240,8 @@ fn encode_side_tables_for_id(ecx: &e::EncodeContext, if let Some(method) = tcx.method_map.borrow().get(&method_call) { rbml_w.tag(c::tag_table_method_map, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - encode_method_callee(ecx, rbml_w, - method_call.adjustment, method) - }) + encode_method_callee(ecx, rbml_w, + method_call.adjustment, method) }) } } @@ -1273,36 +1253,28 @@ fn encode_side_tables_for_id(ecx: &e::EncodeContext, rbml_w.tag(c::tag_table_adjustments, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_auto_adjustment(ecx, adjustment); - }) + rbml_w.emit_auto_adjustment(ecx, adjustment); }) } if let Some(closure_type) = tcx.closure_tys.borrow().get(&ast_util::local_def(id)) { rbml_w.tag(c::tag_table_closure_tys, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - rbml_w.emit_closure_type(ecx, closure_type); - }) + rbml_w.emit_closure_type(ecx, closure_type); }) } if let Some(closure_kind) = tcx.closure_kinds.borrow().get(&ast_util::local_def(id)) { rbml_w.tag(c::tag_table_closure_kinds, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - encode_closure_kind(rbml_w, *closure_kind) - }) + encode_closure_kind(rbml_w, *closure_kind) }) } for &qualif in tcx.const_qualif_map.borrow().get(&id).iter() { rbml_w.tag(c::tag_table_const_qualif, |rbml_w| { rbml_w.id(id); - rbml_w.tag(c::tag_table_val, |rbml_w| { - qualif.encode(rbml_w).unwrap() - }) + qualif.encode(rbml_w).unwrap() }) } } @@ -1830,8 +1802,9 @@ fn decode_side_tables(dcx: &DecodeContext, ast_doc: rbml::Doc) { let tbl_doc = ast_doc.get(c::tag_table as uint); reader::docs(tbl_doc, |tag, entry_doc| { - let id0 = entry_doc.get(c::tag_table_id as uint).as_int(); - let id = dcx.tr_id(id0 as ast::NodeId); + let mut entry_dsr = reader::Decoder::new(entry_doc); + let id0: ast::NodeId = Decodable::decode(&mut entry_dsr).unwrap(); + let id = dcx.tr_id(id0); debug!(">> Side table document with tag 0x{:x} \ found for id {} (orig {})", @@ -1844,13 +1817,11 @@ fn decode_side_tables(dcx: &DecodeContext, tag)); } Some(value) => { - let val_doc = entry_doc.get(c::tag_table_val as uint); - let mut val_dsr = reader::Decoder::new(val_doc); - let val_dsr = &mut val_dsr; + let val_dsr = &mut entry_dsr; match value { c::tag_table_def => { - let def = decode_def(dcx, val_doc); + let def = decode_def(dcx, val_dsr); dcx.tcx.def_map.borrow_mut().insert(id, def::PathResolution { base_def: def, // This doesn't matter cross-crate. From fe73d382eeabaed0c37425388ab15ad690e5906f Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Tue, 3 Mar 2015 00:34:50 +0900 Subject: [PATCH 10/12] metadata: Compact integer encoding. Previously every auto-serialized tags are strongly typed. However this is not strictly required, and instead it can be exploited to provide the optimal encoding for smaller integers. This commit repurposes `EsI8`/`EsU8` through `EsI64`/`EsU64` tags to represent *any* integers with given ranges: It is now possible to encode `42u64` as two bytes `EsU8 0x2a`, for example. There are some limitations: * It does not apply to non-auto-serialized tags for obvious reasons. Fortunately, we have already eliminated the biggest source of such tag in favor of auto-serialized tags: `tag_table_id`. * Bigger tags cannot be used to represent smaller types. * Signed tags and unsigned tags do not mix. --- src/librbml/lib.rs | 207 +++++++++++++++++++++++++-------------------- 1 file changed, 114 insertions(+), 93 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index fee9a69897e0e..c13aeb4cd614b 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -87,39 +87,37 @@ pub enum EbmlEncoderTag { // tags 00..1f are reserved for auto-serialization. // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded. - EsUint = 0x00, // + 8 bytes - EsU64 = 0x01, // + 8 bytes - EsU32 = 0x02, // + 4 bytes - EsU16 = 0x03, // + 2 bytes - EsU8 = 0x04, // + 1 byte - EsInt = 0x05, // + 8 bytes - EsI64 = 0x06, // + 8 bytes - EsI32 = 0x07, // + 4 bytes - EsI16 = 0x08, // + 2 bytes - EsI8 = 0x09, // + 1 byte - EsBool = 0x0a, // + 1 byte - EsChar = 0x0b, // + 4 bytes - EsF64 = 0x0c, // + 8 bytes - EsF32 = 0x0d, // + 4 bytes - EsSub8 = 0x0e, // + 1 byte - EsSub32 = 0x0f, // + 4 bytes - - EsStr = 0x10, - EsEnum = 0x11, // encodes the variant id as the first EsSub* - EsVec = 0x12, // encodes the # of elements as the first EsSub* - EsVecElt = 0x13, - EsMap = 0x14, // encodes the # of pairs as the first EsSub* - EsMapKey = 0x15, - EsMapVal = 0x16, - EsOpaque = 0x17, + EsU64 = 0x00, // + 8 bytes + EsU32 = 0x01, // + 4 bytes + EsU16 = 0x02, // + 2 bytes + EsU8 = 0x03, // + 1 byte + EsI64 = 0x04, // + 8 bytes + EsI32 = 0x05, // + 4 bytes + EsI16 = 0x06, // + 2 bytes + EsI8 = 0x07, // + 1 byte + EsBool = 0x08, // + 1 byte + EsChar = 0x09, // + 4 bytes + EsF64 = 0x0a, // + 8 bytes + EsF32 = 0x0b, // + 4 bytes + EsSub8 = 0x0c, // + 1 byte + EsSub32 = 0x0d, // + 4 bytes + + EsStr = 0x0e, + EsEnum = 0x0f, // encodes the variant id as the first EsSub* + EsVec = 0x10, // encodes the # of elements as the first EsSub* + EsVecElt = 0x11, + EsMap = 0x12, // encodes the # of pairs as the first EsSub* + EsMapKey = 0x13, + EsMapVal = 0x14, + EsOpaque = 0x15, } const NUM_TAGS: uint = 0x1000; -const NUM_IMPLICIT_TAGS: uint = 0x10; +const NUM_IMPLICIT_TAGS: uint = 0x0e; static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [ - 8, 8, 4, 2, 1, // EsU* - 8, 8, 4, 2, 1, // ESI* + 8, 4, 2, 1, // EsU* + 8, 4, 2, 1, // ESI* 1, // EsBool 4, // EsChar 8, 4, // EsF* @@ -154,9 +152,9 @@ pub mod reader { use serialize; use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, - EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, + EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, - EsUint, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, + EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN }; pub type DecodeResult = Result; @@ -420,37 +418,6 @@ pub mod reader { Ok(r_doc) } - fn next_doc2(&mut self, - exp_tag1: EbmlEncoderTag, - exp_tag2: EbmlEncoderTag) -> DecodeResult<(bool, Doc<'doc>)> { - assert!((exp_tag1 as uint) != (exp_tag2 as uint)); - debug!(". next_doc2(exp_tag1={:?}, exp_tag2={:?})", exp_tag1, exp_tag2); - if self.pos >= self.parent.end { - return Err(Expected(format!("no more documents in \ - current node!"))); - } - let TaggedDoc { tag: r_tag, doc: r_doc } = - try!(doc_at(self.parent.data, self.pos)); - debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}", - self.parent.start, - self.parent.end, - self.pos, - r_tag, - r_doc.start, - r_doc.end); - if r_tag != (exp_tag1 as uint) && r_tag != (exp_tag2 as uint) { - return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but \ - found tag {:?}", exp_tag1, exp_tag2, r_tag))); - } - if r_doc.end > self.parent.end { - return Err(Expected(format!("invalid EBML, child extends to \ - {:#x}, parent to {:#x}", - r_doc.end, self.parent.end))); - } - self.pos = r_doc.end; - Ok((r_tag == (exp_tag2 as uint), r_doc)) - } - fn push_doc(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { @@ -471,16 +438,59 @@ pub mod reader { return Ok(0); } - let (big, doc) = try!(self.next_doc2(EsSub8, EsSub32)); - let r = if big { - doc_as_u32(doc) as uint + let TaggedDoc { tag: r_tag, doc: r_doc } = + try!(doc_at(self.parent.data, self.pos)); + let r = if r_tag == (EsSub8 as uint) { + doc_as_u8(r_doc) as uint + } else if r_tag == (EsSub32 as uint) { + doc_as_u32(r_doc) as uint } else { - doc_as_u8(doc) as uint + return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but \ + found tag {:?}", EsSub8, EsSub32, r_tag))); }; + if r_doc.end > self.parent.end { + return Err(Expected(format!("invalid EBML, child extends to \ + {:#x}, parent to {:#x}", + r_doc.end, self.parent.end))); + } + self.pos = r_doc.end; debug!("_next_sub result={:?}", r); Ok(r) } + // variable-length unsigned integer with different tags + fn _next_int(&mut self, + first_tag: EbmlEncoderTag, + last_tag: EbmlEncoderTag) -> DecodeResult { + if self.pos >= self.parent.end { + return Err(Expected(format!("no more documents in \ + current node!"))); + } + + let TaggedDoc { tag: r_tag, doc: r_doc } = + try!(doc_at(self.parent.data, self.pos)); + let r = if first_tag as uint <= r_tag && r_tag <= last_tag as uint { + match last_tag as uint - r_tag { + 0 => doc_as_u8(r_doc) as u64, + 1 => doc_as_u16(r_doc) as u64, + 2 => doc_as_u32(r_doc) as u64, + 3 => doc_as_u64(r_doc) as u64, + _ => unreachable!(), + } + } else { + return Err(Expected(format!("expected EBML doc with tag {:?} through {:?} but \ + found tag {:?}", first_tag, last_tag, r_tag))); + }; + if r_doc.end > self.parent.end { + return Err(Expected(format!("invalid EBML, child extends to \ + {:#x}, parent to {:#x}", + r_doc.end, self.parent.end))); + } + self.pos = r_doc.end; + debug!("_next_int({:?}, {:?}) result={:?}", first_tag, last_tag, r); + Ok(r) + } + pub fn read_opaque(&mut self, op: F) -> DecodeResult where F: FnOnce(&mut Decoder, Doc) -> DecodeResult, { @@ -502,12 +512,12 @@ pub mod reader { type Error = Error; fn read_nil(&mut self) -> DecodeResult<()> { Ok(()) } - fn read_u64(&mut self) -> DecodeResult { Ok(doc_as_u64(try!(self.next_doc(EsU64)))) } - fn read_u32(&mut self) -> DecodeResult { Ok(doc_as_u32(try!(self.next_doc(EsU32)))) } - fn read_u16(&mut self) -> DecodeResult { Ok(doc_as_u16(try!(self.next_doc(EsU16)))) } - fn read_u8 (&mut self) -> DecodeResult { Ok(doc_as_u8 (try!(self.next_doc(EsU8 )))) } + fn read_u64(&mut self) -> DecodeResult { self._next_int(EsU64, EsU8) } + fn read_u32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU32, EsU8)) as u32) } + fn read_u16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU16, EsU8)) as u16) } + fn read_u8(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsU8)))) } fn read_uint(&mut self) -> DecodeResult { - let v = doc_as_u64(try!(self.next_doc(EsUint))); + let v = try!(self._next_int(EsU64, EsU8)); if v > (::std::usize::MAX as u64) { Err(IntTooBig(v as uint)) } else { @@ -515,20 +525,12 @@ pub mod reader { } } - fn read_i64(&mut self) -> DecodeResult { - Ok(doc_as_u64(try!(self.next_doc(EsI64))) as i64) - } - fn read_i32(&mut self) -> DecodeResult { - Ok(doc_as_u32(try!(self.next_doc(EsI32))) as i32) - } - fn read_i16(&mut self) -> DecodeResult { - Ok(doc_as_u16(try!(self.next_doc(EsI16))) as i16) - } - fn read_i8 (&mut self) -> DecodeResult { - Ok(doc_as_u8(try!(self.next_doc(EsI8 ))) as i8) - } + fn read_i64(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI64, EsI8)) as i64) } + fn read_i32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI32, EsI8)) as i32) } + fn read_i16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI16, EsI8)) as i16) } + fn read_i8(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsI8))) as i8) } fn read_int(&mut self) -> DecodeResult { - let v = doc_as_u64(try!(self.next_doc(EsInt))) as i64; + let v = try!(self._next_int(EsI64, EsI8)) as i64; if v > (isize::MAX as i64) || v < (isize::MIN as i64) { debug!("FIXME \\#6122: Removing this makes this function miscompile"); Err(IntTooBig(v as uint)) @@ -739,10 +741,11 @@ pub mod writer { use std::old_io::{Writer, Seek}; use std::old_io; use std::slice::bytes; + use std::num::ToPrimitive; use super::{ EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, - EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8, - EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint, + EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, + EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS }; use super::io::SeekableMemWriter; @@ -1010,32 +1013,50 @@ pub mod writer { } fn emit_uint(&mut self, v: uint) -> EncodeResult { - self.wr_tagged_raw_u64(EsUint as uint, v as u64) + self.emit_u64(v as u64) } fn emit_u64(&mut self, v: u64) -> EncodeResult { - self.wr_tagged_raw_u64(EsU64 as uint, v) + match v.to_u32() { + Some(v) => self.emit_u32(v), + None => self.wr_tagged_raw_u64(EsU64 as uint, v) + } } fn emit_u32(&mut self, v: u32) -> EncodeResult { - self.wr_tagged_raw_u32(EsU32 as uint, v) + match v.to_u16() { + Some(v) => self.emit_u16(v), + None => self.wr_tagged_raw_u32(EsU32 as uint, v) + } } fn emit_u16(&mut self, v: u16) -> EncodeResult { - self.wr_tagged_raw_u16(EsU16 as uint, v) + match v.to_u8() { + Some(v) => self.emit_u8(v), + None => self.wr_tagged_raw_u16(EsU16 as uint, v) + } } fn emit_u8(&mut self, v: u8) -> EncodeResult { self.wr_tagged_raw_u8(EsU8 as uint, v) } fn emit_int(&mut self, v: int) -> EncodeResult { - self.wr_tagged_raw_i64(EsInt as uint, v as i64) + self.emit_i64(v as i64) } fn emit_i64(&mut self, v: i64) -> EncodeResult { - self.wr_tagged_raw_i64(EsI64 as uint, v) + match v.to_i32() { + Some(v) => self.emit_i32(v), + None => self.wr_tagged_raw_i64(EsI64 as uint, v) + } } fn emit_i32(&mut self, v: i32) -> EncodeResult { - self.wr_tagged_raw_i32(EsI32 as uint, v) + match v.to_i16() { + Some(v) => self.emit_i16(v), + None => self.wr_tagged_raw_i32(EsI32 as uint, v) + } } fn emit_i16(&mut self, v: i16) -> EncodeResult { - self.wr_tagged_raw_i16(EsI16 as uint, v) + match v.to_i8() { + Some(v) => self.emit_i8(v), + None => self.wr_tagged_raw_i16(EsI16 as uint, v) + } } fn emit_i8(&mut self, v: i8) -> EncodeResult { self.wr_tagged_raw_i8(EsI8 as uint, v) From ef3c7af172de035f77732f8444ca073154b10307 Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Tue, 3 Mar 2015 01:54:49 +0900 Subject: [PATCH 11/12] metadata: Bump the metadata encoding version. We have changed the encoding enough to bump that. Also added some notes about metadata encoding to librbml/lib.rs. --- src/librbml/lib.rs | 106 +++++++++++++++++++++++++++++-- src/librustc/metadata/encoder.rs | 2 +- 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index c13aeb4cd614b..77204c91aba3b 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -8,12 +8,108 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Really Bad Markup Language (rbml) is a temporary measure until we migrate -//! the rust object metadata to a better serialization format. It is not -//! intended to be used by users. +//! Really Bad Markup Language (rbml) is an internal serialization format of rustc. +//! This is not intended to be used by users. //! -//! It is loosely based on the Extensible Binary Markup Language (ebml): -//! http://www.matroska.org/technical/specs/rfc/index.html +//! Originally based on the Extensible Binary Markup Language +//! (ebml; http://www.matroska.org/technical/specs/rfc/index.html), +//! it is now a separate format tuned for the rust object metadata. +//! +//! # Encoding +//! +//! RBML document consists of the tag, length and data. +//! The encoded data can contain multiple RBML documents concatenated. +//! +//! **Tags** are a hint for the following data. +//! Tags are a number from 0x000 to 0xfff, where 0xf0 through 0xff is reserved. +//! Tags less than 0xf0 are encoded in one literal byte. +//! Tags greater than 0xff are encoded in two big-endian bytes, +//! where the tag number is ORed with 0xf000. (E.g. tag 0x123 = `f1 23`) +//! +//! **Lengths** encode the length of the following data. +//! It is a variable-length unsigned int, and one of the following forms: +//! +//! - `80` through `fe` for lengths up to 0x7e; +//! - `40 ff` through `7f ff` for lengths up to 0x3fff; +//! - `20 40 00` through `3f ff ff` for lengths up to 0x1fffff; +//! - `10 20 00 00` through `1f ff ff ff` for lengths up to 0xfffffff. +//! +//! The "overlong" form is allowed so that the length can be encoded +//! without the prior knowledge of the encoded data. +//! For example, the length 0 can be represented either by `80`, `40 00`, +//! `20 00 00` or `10 00 00 00`. +//! The encoder tries to minimize the length if possible. +//! Also, some predefined tags listed below are so commonly used that +//! their lengths are omitted ("implicit length"). +//! +//! **Data** can be either binary bytes or zero or more nested RBML documents. +//! Nested documents cannot overflow, and should be entirely contained +//! within a parent document. +//! +//! # Predefined Tags +//! +//! Most RBML tags are defined by the application. +//! (For the rust object metadata, see also `rustc::metadata::common`.) +//! RBML itself does define a set of predefined tags however, +//! intended for the auto-serialization implementation. +//! +//! Predefined tags with an implicit length: +//! +//! - `U64` (`00`): 8-byte big endian unsigned integer. +//! - `U32` (`01`): 4-byte big endian unsigned integer. +//! - `U16` (`02`): 2-byte big endian unsigned integer. +//! - `U8` (`03`): 1-byte unsigned integer. +//! Any of `U*` tags can be used to encode primitive unsigned integer types, +//! as long as it is no greater than the actual size. +//! For example, `u8` can only be represented via the `U8` tag. +//! +//! - `I64` (`04`): 8-byte big endian signed integer. +//! - `I32` (`05`): 4-byte big endian signed integer. +//! - `I16` (`06`): 2-byte big endian signed integer. +//! - `I8` (`07`): 1-byte signed integer. +//! Similar to `U*` tags. Always uses two's complement encoding. +//! +//! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true. +//! +//! - `Char` (`09`): 4-byte big endian Unicode scalar value. +//! Surrogate pairs or out-of-bound values are invalid. +//! +//! - `F64` (`0a`): 8-byte big endian unsigned integer representing +//! IEEE 754 binary64 floating-point format. +//! - `F32` (`0b`): 4-byte big endian unsigned integer representing +//! IEEE 754 binary32 floating-point format. +//! +//! - `Sub8` (`0c`): 1-byte unsigned integer for supplementary information. +//! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information. +//! Those two tags normally occur as the first subdocument of certain tags, +//! namely `Enum`, `Vec` and `Map`, to provide a variant or size information. +//! They can be used interchangably. +//! +//! Predefined tags with an explicit length: +//! +//! - `Str` (`0e`): A UTF-8-encoded string. +//! +//! - `Enum` (`0f`): An enum. +//! The first subdocument should be `Sub*` tags with a variant ID. +//! Subsequent subdocuments, if any, encode variant arguments. +//! +//! - `Vec` (`10`): A vector (sequence). +//! - `VecElt` (`11`): A vector element. +//! The first subdocument should be `Sub*` tags with the number of elements. +//! Subsequent subdocuments should be `VecElt` tag per each element. +//! +//! - `Map` (`12`): A map (associated array). +//! - `MapKey` (`13`): A key part of the map entry. +//! - `MapVal` (`14`): A value part of the map entry. +//! The first subdocument should be `Sub*` tags with the number of entries. +//! Subsequent subdocuments should be an alternating sequence of +//! `MapKey` and `MapVal` tags per each entry. +//! +//! - `Opaque` (`15`): An opaque, custom-format tag. +//! Used to wrap ordinary custom tags or data in the auto-serialized context. +//! Rustc typically uses this to encode type informations. +//! +//! First 0x20 tags are reserved by RBML; custom tags start at 0x20. #![crate_name = "rbml"] #![unstable(feature = "rustc_private")] diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index fa9e28bf56d23..131a299cc500f 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -1920,7 +1920,7 @@ fn encode_dylib_dependency_formats(rbml_w: &mut Encoder, ecx: &EncodeContext) { // NB: Increment this as you change the metadata encoding version. #[allow(non_upper_case_globals)] -pub const metadata_encoding_version : &'static [u8] = &[b'r', b'u', b's', b't', 0, 0, 0, 1 ]; +pub const metadata_encoding_version : &'static [u8] = &[b'r', b'u', b's', b't', 0, 0, 0, 2 ]; pub fn encode_metadata(parms: EncodeParams, krate: &ast::Crate) -> Vec { let mut wr = SeekableMemWriter::new(); From 2008b54bf3687996d2e3a3ab151a0b6330a51b7a Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Tue, 3 Mar 2015 11:18:12 +0900 Subject: [PATCH 12/12] metadata: Reordered integral tags in the ascending order. Also clarified the mysterious `_next_int` method. --- src/librbml/lib.rs | 106 +++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index 77204c91aba3b..844d097bdafa9 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -55,18 +55,18 @@ //! //! Predefined tags with an implicit length: //! -//! - `U64` (`00`): 8-byte big endian unsigned integer. -//! - `U32` (`01`): 4-byte big endian unsigned integer. -//! - `U16` (`02`): 2-byte big endian unsigned integer. -//! - `U8` (`03`): 1-byte unsigned integer. +//! - `U8` (`00`): 1-byte unsigned integer. +//! - `U16` (`01`): 2-byte big endian unsigned integer. +//! - `U32` (`02`): 4-byte big endian unsigned integer. +//! - `U64` (`03`): 8-byte big endian unsigned integer. //! Any of `U*` tags can be used to encode primitive unsigned integer types, //! as long as it is no greater than the actual size. //! For example, `u8` can only be represented via the `U8` tag. //! -//! - `I64` (`04`): 8-byte big endian signed integer. -//! - `I32` (`05`): 4-byte big endian signed integer. -//! - `I16` (`06`): 2-byte big endian signed integer. -//! - `I8` (`07`): 1-byte signed integer. +//! - `I8` (`04`): 1-byte signed integer. +//! - `I16` (`05`): 2-byte big endian signed integer. +//! - `I32` (`06`): 4-byte big endian signed integer. +//! - `I64` (`07`): 8-byte big endian signed integer. //! Similar to `U*` tags. Always uses two's complement encoding. //! //! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true. @@ -74,10 +74,10 @@ //! - `Char` (`09`): 4-byte big endian Unicode scalar value. //! Surrogate pairs or out-of-bound values are invalid. //! -//! - `F64` (`0a`): 8-byte big endian unsigned integer representing -//! IEEE 754 binary64 floating-point format. -//! - `F32` (`0b`): 4-byte big endian unsigned integer representing +//! - `F32` (`0a`): 4-byte big endian unsigned integer representing //! IEEE 754 binary32 floating-point format. +//! - `F64` (`0b`): 8-byte big endian unsigned integer representing +//! IEEE 754 binary64 floating-point format. //! //! - `Sub8` (`0c`): 1-byte unsigned integer for supplementary information. //! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information. @@ -87,25 +87,25 @@ //! //! Predefined tags with an explicit length: //! -//! - `Str` (`0e`): A UTF-8-encoded string. +//! - `Str` (`10`): A UTF-8-encoded string. //! -//! - `Enum` (`0f`): An enum. +//! - `Enum` (`11`): An enum. //! The first subdocument should be `Sub*` tags with a variant ID. //! Subsequent subdocuments, if any, encode variant arguments. //! -//! - `Vec` (`10`): A vector (sequence). -//! - `VecElt` (`11`): A vector element. +//! - `Vec` (`12`): A vector (sequence). +//! - `VecElt` (`13`): A vector element. //! The first subdocument should be `Sub*` tags with the number of elements. //! Subsequent subdocuments should be `VecElt` tag per each element. //! -//! - `Map` (`12`): A map (associated array). -//! - `MapKey` (`13`): A key part of the map entry. -//! - `MapVal` (`14`): A value part of the map entry. +//! - `Map` (`14`): A map (associated array). +//! - `MapKey` (`15`): A key part of the map entry. +//! - `MapVal` (`16`): A value part of the map entry. //! The first subdocument should be `Sub*` tags with the number of entries. //! Subsequent subdocuments should be an alternating sequence of //! `MapKey` and `MapVal` tags per each entry. //! -//! - `Opaque` (`15`): An opaque, custom-format tag. +//! - `Opaque` (`17`): An opaque, custom-format tag. //! Used to wrap ordinary custom tags or data in the auto-serialized context. //! Rustc typically uses this to encode type informations. //! @@ -183,40 +183,41 @@ pub enum EbmlEncoderTag { // tags 00..1f are reserved for auto-serialization. // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded. - EsU64 = 0x00, // + 8 bytes - EsU32 = 0x01, // + 4 bytes - EsU16 = 0x02, // + 2 bytes - EsU8 = 0x03, // + 1 byte - EsI64 = 0x04, // + 8 bytes - EsI32 = 0x05, // + 4 bytes - EsI16 = 0x06, // + 2 bytes - EsI8 = 0x07, // + 1 byte + EsU8 = 0x00, // + 1 byte + EsU16 = 0x01, // + 2 bytes + EsU32 = 0x02, // + 4 bytes + EsU64 = 0x03, // + 8 bytes + EsI8 = 0x04, // + 1 byte + EsI16 = 0x05, // + 2 bytes + EsI32 = 0x06, // + 4 bytes + EsI64 = 0x07, // + 8 bytes EsBool = 0x08, // + 1 byte EsChar = 0x09, // + 4 bytes - EsF64 = 0x0a, // + 8 bytes - EsF32 = 0x0b, // + 4 bytes + EsF32 = 0x0a, // + 4 bytes + EsF64 = 0x0b, // + 8 bytes EsSub8 = 0x0c, // + 1 byte EsSub32 = 0x0d, // + 4 bytes - - EsStr = 0x0e, - EsEnum = 0x0f, // encodes the variant id as the first EsSub* - EsVec = 0x10, // encodes the # of elements as the first EsSub* - EsVecElt = 0x11, - EsMap = 0x12, // encodes the # of pairs as the first EsSub* - EsMapKey = 0x13, - EsMapVal = 0x14, - EsOpaque = 0x15, + // 0x0e and 0x0f are reserved + + EsStr = 0x10, + EsEnum = 0x11, // encodes the variant id as the first EsSub* + EsVec = 0x12, // encodes the # of elements as the first EsSub* + EsVecElt = 0x13, + EsMap = 0x14, // encodes the # of pairs as the first EsSub* + EsMapKey = 0x15, + EsMapVal = 0x16, + EsOpaque = 0x17, } const NUM_TAGS: uint = 0x1000; const NUM_IMPLICIT_TAGS: uint = 0x0e; static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [ - 8, 4, 2, 1, // EsU* - 8, 4, 2, 1, // ESI* + 1, 2, 4, 8, // EsU* + 1, 2, 4, 8, // ESI* 1, // EsBool 4, // EsChar - 8, 4, // EsF* + 4, 8, // EsF* 1, 4, // EsSub* ]; @@ -554,7 +555,10 @@ pub mod reader { Ok(r) } - // variable-length unsigned integer with different tags + // variable-length unsigned integer with different tags. + // `first_tag` should be a tag for u8 or i8. + // `last_tag` should be the largest allowed integer tag with the matching signedness. + // all tags between them should be valid, in the order of u8, u16, u32 and u64. fn _next_int(&mut self, first_tag: EbmlEncoderTag, last_tag: EbmlEncoderTag) -> DecodeResult { @@ -566,7 +570,7 @@ pub mod reader { let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos)); let r = if first_tag as uint <= r_tag && r_tag <= last_tag as uint { - match last_tag as uint - r_tag { + match r_tag - first_tag as uint { 0 => doc_as_u8(r_doc) as u64, 1 => doc_as_u16(r_doc) as u64, 2 => doc_as_u32(r_doc) as u64, @@ -608,12 +612,12 @@ pub mod reader { type Error = Error; fn read_nil(&mut self) -> DecodeResult<()> { Ok(()) } - fn read_u64(&mut self) -> DecodeResult { self._next_int(EsU64, EsU8) } - fn read_u32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU32, EsU8)) as u32) } - fn read_u16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU16, EsU8)) as u16) } + fn read_u64(&mut self) -> DecodeResult { self._next_int(EsU8, EsU64) } + fn read_u32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU8, EsU32)) as u32) } + fn read_u16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU8, EsU16)) as u16) } fn read_u8(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsU8)))) } fn read_uint(&mut self) -> DecodeResult { - let v = try!(self._next_int(EsU64, EsU8)); + let v = try!(self._next_int(EsU8, EsU64)); if v > (::std::usize::MAX as u64) { Err(IntTooBig(v as uint)) } else { @@ -621,12 +625,12 @@ pub mod reader { } } - fn read_i64(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI64, EsI8)) as i64) } - fn read_i32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI32, EsI8)) as i32) } - fn read_i16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI16, EsI8)) as i16) } + fn read_i64(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI8, EsI64)) as i64) } + fn read_i32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI8, EsI32)) as i32) } + fn read_i16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI8, EsI16)) as i16) } fn read_i8(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsI8))) as i8) } fn read_int(&mut self) -> DecodeResult { - let v = try!(self._next_int(EsI64, EsI8)) as i64; + let v = try!(self._next_int(EsI8, EsI64)) as i64; if v > (isize::MAX as i64) || v < (isize::MIN as i64) { debug!("FIXME \\#6122: Removing this makes this function miscompile"); Err(IntTooBig(v as uint))