From 879eb972adc681cb02b84505760ccebedd7465cd Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 18 May 2018 11:48:11 +0200 Subject: [PATCH 1/6] Add SortedMap to rustc_data_structures. --- src/librustc_data_structures/lib.rs | 1 + src/librustc_data_structures/sorted_map.rs | 501 +++++++++++++++++++++ 2 files changed, 502 insertions(+) create mode 100644 src/librustc_data_structures/sorted_map.rs diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs index 9a6705fe9cac3..cd707152af6c2 100644 --- a/src/librustc_data_structures/lib.rs +++ b/src/librustc_data_structures/lib.rs @@ -73,6 +73,7 @@ pub mod control_flow_graph; pub mod flock; pub mod sync; pub mod owning_ref; +pub mod sorted_map; pub struct OnDrop(pub F); diff --git a/src/librustc_data_structures/sorted_map.rs b/src/librustc_data_structures/sorted_map.rs new file mode 100644 index 0000000000000..a8a43ebc1ad55 --- /dev/null +++ b/src/librustc_data_structures/sorted_map.rs @@ -0,0 +1,501 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::borrow::Borrow; +use std::cmp::Ordering; +use std::convert::From; +use std::mem; +use std::ops::{RangeBounds, Bound, Index, IndexMut}; + +#[derive(Clone, PartialEq, Eq, Hash, Debug, RustcEncodable, RustcDecodable)] +pub struct SortedMap { + data: Vec<(K,V)> +} + +impl SortedMap { + + #[inline] + pub fn new() -> SortedMap { + SortedMap { + data: vec![] + } + } + + // It is up to the caller to make sure that the elements are sorted by key + // and that there are no duplicates. + #[inline] + pub fn from_presorted_elements(elements: Vec<(K, V)>) -> SortedMap + { + debug_assert!(elements.windows(2).all(|w| w[0].0 < w[1].0)); + + SortedMap { + data: elements + } + } + + #[inline] + pub fn insert(&mut self, key: K, mut value: V) -> Option { + let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(&key)); + + match index { + Ok(index) => { + let mut slot = unsafe { + self.data.get_unchecked_mut(index) + }; + mem::swap(&mut slot.1, &mut value); + Some(value) + } + Err(index) => { + self.data.insert(index, (key, value)); + None + } + } + } + + #[inline] + pub fn remove(&mut self, key: &K) -> Option { + let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(key)); + + match index { + Ok(index) => { + Some(self.data.remove(index).1) + } + Err(_) => { + None + } + } + } + + #[inline] + pub fn get(&self, key: &K) -> Option<&V> { + let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(key)); + + match index { + Ok(index) => { + unsafe { + Some(&self.data.get_unchecked(index).1) + } + } + Err(_) => { + None + } + } + } + + #[inline] + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(key)); + + match index { + Ok(index) => { + unsafe { + Some(&mut self.data.get_unchecked_mut(index).1) + } + } + Err(_) => { + None + } + } + } + + #[inline] + pub fn clear(&mut self) { + self.data.clear(); + } + + /// Iterate over elements, sorted by key + #[inline] + pub fn iter(&self) -> ::std::slice::Iter<(K, V)> { + self.data.iter() + } + + /// Iterate over the keys, sorted + #[inline] + pub fn keys(&self) -> impl Iterator + ExactSizeIterator { + self.data.iter().map(|&(ref k, _)| k) + } + + /// Iterate over values, sorted by key + #[inline] + pub fn values(&self) -> impl Iterator + ExactSizeIterator { + self.data.iter().map(|&(_, ref v)| v) + } + + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + #[inline] + pub fn range(&self, range: R) -> &[(K, V)] + where R: RangeBounds + { + let (start, end) = self.range_slice_indices(range); + (&self.data[start .. end]) + } + + #[inline] + pub fn range_mut(&mut self, range: R) -> &mut [(K, V)] + where R: RangeBounds + { + let (start, end) = self.range_slice_indices(range); + (&mut self.data[start .. end]) + } + + #[inline] + pub fn remove_range(&mut self, range: R) + where R: RangeBounds + { + let (start, end) = self.range_slice_indices(range); + self.data.splice(start .. end, ::std::iter::empty()); + } + + /// Mutate all keys with the given function `f`. This mutation must not + /// change the sort-order of keys. + #[inline] + pub fn offset_keys(&mut self, f: F) + where F: Fn(&mut K) + { + self.data.iter_mut().map(|&mut (ref mut k, _)| k).for_each(f); + } + + // It is up to the caller to make sure that the elements are sorted by key + // and that there are no duplicates. + #[inline] + pub fn insert_presorted(&mut self, mut elements: Vec<(K, V)>) { + if elements.is_empty() { + return + } + + debug_assert!(elements.windows(2).all(|w| w[0].0 < w[1].0)); + + let index = { + let first_element = &elements[0].0; + self.data.binary_search_by(|&(ref x, _)| x.cmp(first_element)) + }; + + let drain = match index { + Ok(index) => { + let mut drain = elements.drain(..); + self.data[index] = drain.next().unwrap(); + drain + } + Err(index) => { + if index == self.data.len() || + elements.last().unwrap().0 < self.data[index].0 { + // We can copy the whole range without having to mix with + // existing elements. + self.data.splice(index .. index, elements.drain(..)); + return + } + + let mut drain = elements.drain(..); + self.data.insert(index, drain.next().unwrap()); + drain + } + }; + + // Insert the rest + for (k, v) in drain { + self.insert(k, v); + } + } + + #[inline] + fn range_slice_indices(&self, range: R) -> (usize, usize) + where R: RangeBounds + { + let start = match range.start() { + Bound::Included(ref k) => { + match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + Ok(index) | Err(index) => index + } + } + Bound::Excluded(ref k) => { + match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + Ok(index) => index + 1, + Err(index) => index, + } + } + Bound::Unbounded => 0, + }; + + let end = match range.end() { + Bound::Included(ref k) => { + match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + Ok(index) => index + 1, + Err(index) => index, + } + } + Bound::Excluded(ref k) => { + match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + Ok(index) | Err(index) => index, + } + } + Bound::Unbounded => self.data.len(), + }; + + (start, end) + } +} + +impl IntoIterator for SortedMap { + type Item = (K, V); + type IntoIter = ::std::vec::IntoIter<(K, V)>; + fn into_iter(self) -> Self::IntoIter { + self.data.into_iter() + } +} + +impl> Index for SortedMap { + type Output = V; + fn index(&self, index: Q) -> &Self::Output { + let k: &K = index.borrow(); + self.get(k).unwrap() + } +} + +impl> IndexMut for SortedMap { + fn index_mut(&mut self, index: Q) -> &mut Self::Output { + let k: &K = index.borrow(); + self.get_mut(k).unwrap() + } +} + +impl> From for SortedMap { + fn from(data: I) -> Self { + let mut data: Vec<(K, V)> = data.collect(); + data.sort_unstable_by(|&(ref k1, _), &(ref k2, _)| k1.cmp(k2)); + data.dedup_by(|&mut (ref k1, _), &mut (ref k2, _)| { + k1.cmp(k2) == Ordering::Equal + }); + SortedMap { + data + } + } +} + +#[cfg(test)] +mod tests { + use super::SortedMap; + use test::{self, Bencher}; + + #[test] + fn test_insert_and_iter() { + let mut map = SortedMap::new(); + let mut expected = Vec::new(); + + for x in 0 .. 100 { + assert_eq!(map.iter().cloned().collect::>(), expected); + + let x = 1000 - x * 2; + map.insert(x, x); + expected.insert(0, (x, x)); + } + } + + #[test] + fn test_get_and_index() { + let mut map = SortedMap::new(); + let mut expected = Vec::new(); + + for x in 0 .. 100 { + let x = 1000 - x; + if x & 1 == 0 { + map.insert(x, x); + } + expected.push(x); + } + + for mut x in expected { + if x & 1 == 0 { + assert_eq!(map.get(&x), Some(&x)); + assert_eq!(map.get_mut(&x), Some(&mut x)); + assert_eq!(map[&x], x); + assert_eq!(&mut map[&x], &mut x); + } else { + assert_eq!(map.get(&x), None); + assert_eq!(map.get_mut(&x), None); + } + } + } + + #[test] + fn test_range() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(3, 3); + map.insert(6, 6); + map.insert(9, 9); + + let keys = |s: &[(_, _)]| { + s.into_iter().map(|e| e.0).collect::>() + }; + + for start in 0 .. 11 { + for end in 0 .. 11 { + if end < start { + continue + } + + let mut expected = vec![1, 3, 6, 9]; + expected.retain(|&x| x >= start && x < end); + + assert_eq!(keys(map.range(start..end)), expected, "range = {}..{}", start, end); + } + } + } + + + #[test] + fn test_offset_keys() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(3, 3); + map.insert(6, 6); + + map.offset_keys(|k| *k += 1); + + let mut expected = SortedMap::new(); + expected.insert(2, 1); + expected.insert(4, 3); + expected.insert(7, 6); + + assert_eq!(map, expected); + } + + fn keys(s: SortedMap) -> Vec { + s.into_iter().map(|(k, _)| k).collect::>() + } + + fn elements(s: SortedMap) -> Vec<(u32, u32)> { + s.into_iter().collect::>() + } + + #[test] + fn test_remove_range() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(3, 3); + map.insert(6, 6); + map.insert(9, 9); + + for start in 0 .. 11 { + for end in 0 .. 11 { + if end < start { + continue + } + + let mut expected = vec![1, 3, 6, 9]; + expected.retain(|&x| x < start || x >= end); + + let mut map = map.clone(); + map.remove_range(start .. end); + + assert_eq!(keys(map), expected, "range = {}..{}", start, end); + } + } + } + + #[test] + fn test_remove() { + let mut map = SortedMap::new(); + let mut expected = Vec::new(); + + for x in 0..10 { + map.insert(x, x); + expected.push((x, x)); + } + + for x in 0 .. 10 { + let mut map = map.clone(); + let mut expected = expected.clone(); + + assert_eq!(map.remove(&x), Some(x)); + expected.remove(x as usize); + + assert_eq!(map.iter().cloned().collect::>(), expected); + } + } + + #[test] + fn test_insert_presorted_non_overlapping() { + let mut map = SortedMap::new(); + map.insert(2, 0); + map.insert(8, 0); + + map.insert_presorted(vec![(3, 0), (7, 0)]); + + let expected = vec![2, 3, 7, 8]; + assert_eq!(keys(map), expected); + } + + #[test] + fn test_insert_presorted_first_elem_equal() { + let mut map = SortedMap::new(); + map.insert(2, 2); + map.insert(8, 8); + + map.insert_presorted(vec![(2, 0), (7, 7)]); + + let expected = vec![(2, 0), (7, 7), (8, 8)]; + assert_eq!(elements(map), expected); + } + + #[test] + fn test_insert_presorted_last_elem_equal() { + let mut map = SortedMap::new(); + map.insert(2, 2); + map.insert(8, 8); + + map.insert_presorted(vec![(3, 3), (8, 0)]); + + let expected = vec![(2, 2), (3, 3), (8, 0)]; + assert_eq!(elements(map), expected); + } + + #[test] + fn test_insert_presorted_shuffle() { + let mut map = SortedMap::new(); + map.insert(2, 2); + map.insert(7, 7); + + map.insert_presorted(vec![(1, 1), (3, 3), (8, 8)]); + + let expected = vec![(1, 1), (2, 2), (3, 3), (7, 7), (8, 8)]; + assert_eq!(elements(map), expected); + } + + macro_rules! mk_bench { + ($name:ident, $size:expr) => ( + #[bench] + fn $name(b: &mut Bencher) { + let mut map = SortedMap::new(); + for x in 0 .. $size { + map.insert(x * 3, 0); + } + + b.iter(|| { + test::black_box(map.range(..)); + test::black_box(map.range( $size / 2.. $size )); + test::black_box(map.range( 0 .. $size / 3 )); + test::black_box(map.range( $size / 4 .. $size / 3 )); + }) + } + ) + } + + mk_bench!(bench_range_1, 1); + mk_bench!(bench_range_2, 2); + mk_bench!(bench_range_5, 5); + mk_bench!(bench_range_10, 10); + mk_bench!(bench_range_32, 32); + mk_bench!(bench_range_1000, 1000); +} From 3ed23a4bd0f6ab5db47f0ff41a457d09241228b5 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 18 May 2018 16:06:20 +0200 Subject: [PATCH 2/6] Use SortedMap instead of BTreeMap for relocations in MIRI. --- src/librustc/mir/interpret/mod.rs | 38 +++++++++++++++++--- src/librustc_codegen_llvm/mir/constant.rs | 2 +- src/librustc_mir/interpret/memory.rs | 42 +++++++++++------------ 3 files changed, 56 insertions(+), 26 deletions(-) diff --git a/src/librustc/mir/interpret/mod.rs b/src/librustc/mir/interpret/mod.rs index 6f5401f54dc98..9827ee51ba29d 100644 --- a/src/librustc/mir/interpret/mod.rs +++ b/src/librustc/mir/interpret/mod.rs @@ -12,7 +12,6 @@ pub use self::error::{EvalError, EvalResult, EvalErrorKind, AssertMessage}; pub use self::value::{PrimVal, PrimValKind, Value, Pointer, ConstValue}; -use std::collections::BTreeMap; use std::fmt; use mir; use hir::def_id::DefId; @@ -21,9 +20,11 @@ use ty::layout::{self, Align, HasDataLayout, Size}; use middle::region; use std::iter; use std::io; +use std::ops::{Deref, DerefMut}; use std::hash::Hash; use syntax::ast::Mutability; use rustc_serialize::{Encoder, Decoder, Decodable, Encodable}; +use rustc_data_structures::sorted_map::SortedMap; use rustc_data_structures::fx::FxHashMap; use byteorder::{WriteBytesExt, ReadBytesExt, LittleEndian, BigEndian}; @@ -341,7 +342,7 @@ pub struct Allocation { pub bytes: Vec, /// Maps from byte addresses to allocations. /// Only the first byte of a pointer is inserted into the map. - pub relocations: BTreeMap, + pub relocations: Relocations, /// Denotes undefined memory. Reading from undefined memory is forbidden in miri pub undef_mask: UndefMask, /// The alignment of the allocation to detect unaligned reads. @@ -358,7 +359,7 @@ impl Allocation { undef_mask.grow(Size::from_bytes(slice.len() as u64), true); Self { bytes: slice.to_owned(), - relocations: BTreeMap::new(), + relocations: Relocations::new(), undef_mask, align, runtime_mutability: Mutability::Immutable, @@ -373,7 +374,7 @@ impl Allocation { assert_eq!(size.bytes() as usize as u64, size.bytes()); Allocation { bytes: vec![0; size.bytes() as usize], - relocations: BTreeMap::new(), + relocations: Relocations::new(), undef_mask: UndefMask::new(size), align, runtime_mutability: Mutability::Immutable, @@ -383,6 +384,35 @@ impl Allocation { impl<'tcx> ::serialize::UseSpecializedDecodable for &'tcx Allocation {} +#[derive(Clone, PartialEq, Eq, Hash, Debug, RustcEncodable, RustcDecodable)] +pub struct Relocations(SortedMap); + +impl Relocations { + pub fn new() -> Relocations { + Relocations(SortedMap::new()) + } + + // The caller must guarantee that the given relocations are already sorted + // by address and contain no duplicates. + pub fn from_presorted(r: Vec<(Size, AllocId)>) -> Relocations { + Relocations(SortedMap::from_presorted_elements(r)) + } +} + +impl Deref for Relocations { + type Target = SortedMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Relocations { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + //////////////////////////////////////////////////////////////////////////////// // Methods to access integers in the target endianness //////////////////////////////////////////////////////////////////////////////// diff --git a/src/librustc_codegen_llvm/mir/constant.rs b/src/librustc_codegen_llvm/mir/constant.rs index 07fb683a84f67..a4fe85135de7a 100644 --- a/src/librustc_codegen_llvm/mir/constant.rs +++ b/src/librustc_codegen_llvm/mir/constant.rs @@ -83,7 +83,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx, alloc: &Allocation) -> ValueRef { let pointer_size = layout.pointer_size.bytes() as usize; let mut next_offset = 0; - for (&offset, &alloc_id) in &alloc.relocations { + for &(offset, alloc_id) in alloc.relocations.iter() { let offset = offset.bytes(); assert_eq!(offset as usize as u64, offset); let offset = offset as usize; diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index b2a6e2b452721..3f7ecf9dfb282 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -1,4 +1,4 @@ -use std::collections::{btree_map, VecDeque}; +use std::collections::VecDeque; use std::ptr; use rustc::hir::def_id::DefId; @@ -519,7 +519,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { fn get_bytes(&self, ptr: MemoryPointer, size: Size, align: Align) -> EvalResult<'tcx, &[u8]> { assert_ne!(size.bytes(), 0); - if self.relocations(ptr, size)?.count() != 0 { + if self.relocations(ptr, size)?.len() != 0 { return err!(ReadPointerAsBytes); } self.check_defined(ptr, size)?; @@ -614,9 +614,9 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { // first copy the relocations to a temporary buffer, because // `get_bytes_mut` will clear the relocations, which is correct, // since we don't want to keep any relocations at the target. - let relocations: Vec<_> = self.relocations(src, size)? - .map(|(&offset, &alloc_id)| { + .iter() + .map(|&(offset, alloc_id)| { // Update relocation offsets for the new positions in the destination allocation. (offset + dest.offset - src.offset, alloc_id) }) @@ -648,7 +648,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { self.copy_undef_mask(src, dest, size)?; // copy back the relocations - self.get_mut(dest.alloc_id)?.relocations.extend(relocations); + self.get_mut(dest.alloc_id)?.relocations.insert_presorted(relocations); Ok(()) } @@ -660,7 +660,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { match alloc.bytes[offset..].iter().position(|&c| c == 0) { Some(size) => { let p1 = Size::from_bytes((size + 1) as u64); - if self.relocations(ptr, p1)?.count() != 0 { + if self.relocations(ptr, p1)?.len() != 0 { return err!(ReadPointerAsBytes); } self.check_defined(ptr, p1)?; @@ -720,7 +720,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { let bytes = read_target_uint(endianness, bytes).unwrap(); // See if we got a pointer if size != self.pointer_size() { - if self.relocations(ptr, size)?.count() != 0 { + if self.relocations(ptr, size)?.len() != 0 { return err!(ReadPointerAsBytes); } } else { @@ -808,24 +808,26 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { &self, ptr: MemoryPointer, size: Size, - ) -> EvalResult<'tcx, btree_map::Range> { + ) -> EvalResult<'tcx, &[(Size, AllocId)]> { let start = ptr.offset.bytes().saturating_sub(self.pointer_size().bytes() - 1); let end = ptr.offset + size; Ok(self.get(ptr.alloc_id)?.relocations.range(Size::from_bytes(start)..end)) } fn clear_relocations(&mut self, ptr: MemoryPointer, size: Size) -> EvalResult<'tcx> { - // Find all relocations overlapping the given range. - let keys: Vec<_> = self.relocations(ptr, size)?.map(|(&k, _)| k).collect(); - if keys.is_empty() { - return Ok(()); - } - // Find the start and end of the given range and its outermost relocations. + let (first, last) = { + // Find all relocations overlapping the given range. + let relocations = self.relocations(ptr, size)?; + if relocations.is_empty() { + return Ok(()); + } + + (relocations.first().unwrap().0, + relocations.last().unwrap().0 + self.pointer_size()) + }; let start = ptr.offset; let end = start + size; - let first = *keys.first().unwrap(); - let last = *keys.last().unwrap() + self.pointer_size(); let alloc = self.get_mut(ptr.alloc_id)?; @@ -839,16 +841,14 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { } // Forget all the relocations. - for k in keys { - alloc.relocations.remove(&k); - } + alloc.relocations.remove_range(first ..= last); Ok(()) } fn check_relocation_edges(&self, ptr: MemoryPointer, size: Size) -> EvalResult<'tcx> { - let overlapping_start = self.relocations(ptr, Size::from_bytes(0))?.count(); - let overlapping_end = self.relocations(ptr.offset(size, self)?, Size::from_bytes(0))?.count(); + let overlapping_start = self.relocations(ptr, Size::from_bytes(0))?.len(); + let overlapping_end = self.relocations(ptr.offset(size, self)?, Size::from_bytes(0))?.len(); if overlapping_start + overlapping_end != 0 { return err!(ReadPointerAsBytes); } From eaa796c8b885c0aecf0c85d8299a3b52cca72370 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 21 May 2018 16:12:02 +0200 Subject: [PATCH 3/6] Remove benchmarks from SortedMap. --- src/librustc_data_structures/sorted_map.rs | 33 ++++++---------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/librustc_data_structures/sorted_map.rs b/src/librustc_data_structures/sorted_map.rs index a8a43ebc1ad55..c3f462f29c856 100644 --- a/src/librustc_data_structures/sorted_map.rs +++ b/src/librustc_data_structures/sorted_map.rs @@ -285,7 +285,6 @@ impl> From for SortedMap { #[cfg(test)] mod tests { use super::SortedMap; - use test::{self, Bencher}; #[test] fn test_insert_and_iter() { @@ -473,29 +472,15 @@ mod tests { assert_eq!(elements(map), expected); } - macro_rules! mk_bench { - ($name:ident, $size:expr) => ( - #[bench] - fn $name(b: &mut Bencher) { - let mut map = SortedMap::new(); - for x in 0 .. $size { - map.insert(x * 3, 0); - } + #[test] + fn test_insert_presorted_at_end() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(2, 2); - b.iter(|| { - test::black_box(map.range(..)); - test::black_box(map.range( $size / 2.. $size )); - test::black_box(map.range( 0 .. $size / 3 )); - test::black_box(map.range( $size / 4 .. $size / 3 )); - }) - } - ) - } + map.insert_presorted(vec![(3, 3), (8, 8)]); - mk_bench!(bench_range_1, 1); - mk_bench!(bench_range_2, 2); - mk_bench!(bench_range_5, 5); - mk_bench!(bench_range_10, 10); - mk_bench!(bench_range_32, 32); - mk_bench!(bench_range_1000, 1000); + let expected = vec![(1, 1), (2, 2), (3, 3), (8, 8)]; + assert_eq!(elements(map), expected); + } } From e850d78bcc42d4ef1e2e938f13fde319b37ced9c Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 21 May 2018 19:15:00 +0200 Subject: [PATCH 4/6] Remove SortedMap::iter_mut() since that allows to break the element sorting order which lookup relies on. --- src/librustc_data_structures/sorted_map.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/librustc_data_structures/sorted_map.rs b/src/librustc_data_structures/sorted_map.rs index c3f462f29c856..fd2bf9272a7ec 100644 --- a/src/librustc_data_structures/sorted_map.rs +++ b/src/librustc_data_structures/sorted_map.rs @@ -141,14 +141,6 @@ impl SortedMap { (&self.data[start .. end]) } - #[inline] - pub fn range_mut(&mut self, range: R) -> &mut [(K, V)] - where R: RangeBounds - { - let (start, end) = self.range_slice_indices(range); - (&mut self.data[start .. end]) - } - #[inline] pub fn remove_range(&mut self, range: R) where R: RangeBounds From 4bedc314597c20cbaf68ac094e42ba569fcc8973 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 21 May 2018 19:16:26 +0200 Subject: [PATCH 5/6] Cleanup SortedMap by wrapping element lookup in a method. --- src/librustc_data_structures/sorted_map.rs | 37 ++++++++++------------ 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/src/librustc_data_structures/sorted_map.rs b/src/librustc_data_structures/sorted_map.rs index fd2bf9272a7ec..cdce1e77c0fb2 100644 --- a/src/librustc_data_structures/sorted_map.rs +++ b/src/librustc_data_structures/sorted_map.rs @@ -42,9 +42,7 @@ impl SortedMap { #[inline] pub fn insert(&mut self, key: K, mut value: V) -> Option { - let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(&key)); - - match index { + match self.lookup_index_for(&key) { Ok(index) => { let mut slot = unsafe { self.data.get_unchecked_mut(index) @@ -61,9 +59,7 @@ impl SortedMap { #[inline] pub fn remove(&mut self, key: &K) -> Option { - let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(key)); - - match index { + match self.lookup_index_for(key) { Ok(index) => { Some(self.data.remove(index).1) } @@ -75,9 +71,7 @@ impl SortedMap { #[inline] pub fn get(&self, key: &K) -> Option<&V> { - let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(key)); - - match index { + match self.lookup_index_for(key) { Ok(index) => { unsafe { Some(&self.data.get_unchecked(index).1) @@ -91,9 +85,7 @@ impl SortedMap { #[inline] pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { - let index = self.data.binary_search_by(|&(ref x, _)| x.cmp(key)); - - match index { + match self.lookup_index_for(key) { Ok(index) => { unsafe { Some(&mut self.data.get_unchecked_mut(index).1) @@ -168,12 +160,9 @@ impl SortedMap { debug_assert!(elements.windows(2).all(|w| w[0].0 < w[1].0)); - let index = { - let first_element = &elements[0].0; - self.data.binary_search_by(|&(ref x, _)| x.cmp(first_element)) - }; + let start_index = self.lookup_index_for(&elements[0].0); - let drain = match index { + let drain = match start_index { Ok(index) => { let mut drain = elements.drain(..); self.data[index] = drain.next().unwrap(); @@ -200,18 +189,24 @@ impl SortedMap { } } + /// Looks up the key in `self.data` via `slice::binary_search()`. + #[inline(always)] + fn lookup_index_for(&self, key: &K) -> Result { + self.data.binary_search_by(|&(ref x, _)| x.cmp(key)) + } + #[inline] fn range_slice_indices(&self, range: R) -> (usize, usize) where R: RangeBounds { let start = match range.start() { Bound::Included(ref k) => { - match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + match self.lookup_index_for(k) { Ok(index) | Err(index) => index } } Bound::Excluded(ref k) => { - match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + match self.lookup_index_for(k) { Ok(index) => index + 1, Err(index) => index, } @@ -221,13 +216,13 @@ impl SortedMap { let end = match range.end() { Bound::Included(ref k) => { - match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + match self.lookup_index_for(k) { Ok(index) => index + 1, Err(index) => index, } } Bound::Excluded(ref k) => { - match self.data.binary_search_by(|&(ref x, _)| x.cmp(k)) { + match self.lookup_index_for(k) { Ok(index) | Err(index) => index, } } From 95fac99a2036a0120be26262110a1e473a85950d Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 21 May 2018 19:17:00 +0200 Subject: [PATCH 6/6] Add some doc comments to SortedMap. --- src/librustc_data_structures/sorted_map.rs | 25 +++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/librustc_data_structures/sorted_map.rs b/src/librustc_data_structures/sorted_map.rs index cdce1e77c0fb2..e14bd33c82c1a 100644 --- a/src/librustc_data_structures/sorted_map.rs +++ b/src/librustc_data_structures/sorted_map.rs @@ -14,7 +14,15 @@ use std::convert::From; use std::mem; use std::ops::{RangeBounds, Bound, Index, IndexMut}; -#[derive(Clone, PartialEq, Eq, Hash, Debug, RustcEncodable, RustcDecodable)] +/// `SortedMap` is a data structure with similar characteristics as BTreeMap but +/// slightly different trade-offs: lookup, inseration, and removal are O(log(N)) +/// and elements can be iterated in order cheaply. +/// +/// `SortedMap` can be faster than a `BTreeMap` for small sizes (<50) since it +/// stores data in a more compact way. It also supports accessing contiguous +/// ranges of elements as a slice, and slices of already sorted elements can be +/// inserted efficiently. +#[derive(Clone, PartialEq, Eq, Hash, Default, Debug, RustcEncodable, RustcDecodable)] pub struct SortedMap { data: Vec<(K,V)> } @@ -28,8 +36,11 @@ impl SortedMap { } } - // It is up to the caller to make sure that the elements are sorted by key - // and that there are no duplicates. + /// Construct a `SortedMap` from a presorted set of elements. This is faster + /// than creating an empty map and then inserting the elements individually. + /// + /// It is up to the caller to make sure that the elements are sorted by key + /// and that there are no duplicates. #[inline] pub fn from_presorted_elements(elements: Vec<(K, V)>) -> SortedMap { @@ -150,8 +161,12 @@ impl SortedMap { self.data.iter_mut().map(|&mut (ref mut k, _)| k).for_each(f); } - // It is up to the caller to make sure that the elements are sorted by key - // and that there are no duplicates. + /// Inserts a presorted range of elements into the map. If the range can be + /// inserted as a whole in between to existing elements of the map, this + /// will be faster than inserting the elements individually. + /// + /// It is up to the caller to make sure that the elements are sorted by key + /// and that there are no duplicates. #[inline] pub fn insert_presorted(&mut self, mut elements: Vec<(K, V)>) { if elements.is_empty() {