Skip to content

Commit

Permalink
impl find index of oid in pack index
Browse files Browse the repository at this point in the history
  • Loading branch information
andyyu2004 committed May 5, 2021
1 parent a43fd16 commit c8a8e08
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 11 deletions.
46 changes: 43 additions & 3 deletions src/libbit/src/pack.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use crate::error::BitResult;
use crate::hash::{BitHash, SHA1Hash};
use crate::hash::{BitHash, SHA1Hash, BIT_HASH_SIZE};
use crate::io::{BufReadExt, HashReader, ReadExt};
use crate::serialize::Deserialize;
use std::io::BufRead;
use crate::serialize::{BufReadSeek, Deserialize};
use std::io::{BufRead, SeekFrom};

const PACK_IDX_MAGIC: u32 = 0xff744f63;
const FANOUT_ENTRYC: usize = 256;
const PACK_IDX_HEADER_SIZE: u64 = 8;

#[derive(Debug)]
pub struct PackIndex {
Expand All @@ -17,6 +18,45 @@ pub struct PackIndex {
pack_hash: SHA1Hash,
}

impl PackIndex {
fn find_oid_index(mut r: &mut dyn BufReadSeek, oid: BitHash) -> BitResult<usize> {
r.seek(SeekFrom::Start(PACK_IDX_HEADER_SIZE))?;
let fanout = r.read_array::<u32, FANOUT_ENTRYC>()?;
// fanout has 256 elements
// example
// [
// 2,
// 4,
// 5,
// 7,
// 11,
// 18
// ...
// n
// ]
// sorted list of n hashes
// 00....
// 00....
// 01....
// 01....
// 02....
// 03....
// 03....
//
let prefix = oid[0] as usize;
// low..high (inclusive lower bound, exclusive upper bound)
let low = if prefix == 0 { 0 } else { fanout[prefix - 1] } as i64;
let high = fanout[prefix] as i64;

r.seek(SeekFrom::Current(low * BIT_HASH_SIZE as i64))?;
let oids = r.read_vec((high - low) as usize)?;
match oids.binary_search(&oid) {
Ok(idx) => Ok(low as usize + idx),
Err(..) => Err(anyhow!("oid `{}` not found in packindex", oid)),
}
}
}

impl Deserialize for PackIndex {
fn deserialize(reader: &mut dyn BufRead) -> BitResult<Self>
where
Expand Down
33 changes: 30 additions & 3 deletions src/libbit/src/pack/tests.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,36 @@
use super::*;
use std::io::Cursor;
use std::str::FromStr;

// got this number by inspecting last entry of the fanout table
const PACK_LEN: usize = 11076;

#[test]
fn test_deserialize_pack_idx() -> BitResult<()> {
fn test_deserialize_pack_idx_is_ok() -> BitResult<()> {
let bytes = include_bytes!("../../tests/files/pack.idx") as &[u8];
let pack_idx = PackIndex::deserialize_unbuffered(bytes)?;
// dbg!(pack_idx);
let _pack_idx = PackIndex::deserialize_unbuffered(bytes)?;
Ok(())
}

#[test]
fn test_pack_idx_find_oid_start() -> BitResult<()> {
let mut cursor = Cursor::new(include_bytes!("../../tests/files/pack.idx"));
let pack_idx = PackIndex::find_oid_index(
&mut cursor,
// this hash is the first oid in sorted list
BitHash::from_str("0004a3cf85dbcbfbef916599145a0c370bb78cf5").unwrap(),
)?;
assert_eq!(pack_idx, 0);
Ok(())
}
#[test]
fn test_pack_idx_find_oid_end() -> BitResult<()> {
let mut cursor = Cursor::new(include_bytes!("../../tests/files/pack.idx"));
let pack_idx = PackIndex::find_oid_index(
&mut cursor,
// this hash is the last oid in sorted list
BitHash::from_str("fffc6e8cf5f6798732a6031ebf24d2f6aaa60e47").unwrap(),
)?;
assert_eq!(pack_idx, PACK_LEN - 1);
Ok(())
}
15 changes: 10 additions & 5 deletions src/libbit/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ pub trait Serialize {
fn serialize(&self, writer: &mut dyn Write) -> BitResult<()>;
}

pub trait BufReadSeek: BufRead + Seek {}

impl<R: BufRead + Seek> BufReadSeek for R {
}

// we use some explicit `Self: Sized` bounds on each function
// (instead of putting the bound on the trait) for object safety
// we ignore these methods as they are obviously not object safe
// however, we do wish BitObj to be object safe
// this is essentially an empty trait when used as a trait object
pub trait Deserialize {
// we use some explicit `Self: Sized` bounds on each function
// (instead of putting the bound on the trait) for object safety
// we ignore these methods as they are obviously not object safe
// however, we do wish BitObj to be object safe
// this is essentially an empty trait when used as a trait object
fn deserialize(reader: &mut dyn BufRead) -> BitResult<Self>
where
Self: Sized;
Expand Down

0 comments on commit c8a8e08

Please sign in to comment.