Skip to content

Commit a2ea498

Browse files
committed
parse TREE chunk (#293)
For now the data structure is just 'as-written' and we see what needs to change there as we have to maintain it.
1 parent 49fcb6f commit a2ea498

File tree

7 files changed

+92
-6
lines changed

7 files changed

+92
-6
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

git-index/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ filetime = "0.2.15"
2525

2626
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
2727
smallvec = "1.7.0"
28+
atoi = "0.4.0"
2829

2930
[dev-dependencies]
3031
git-testtools = { path = "../tests/tools"}

git-index/src/decode.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,16 @@ impl State {
7373
pub fn from_bytes(data: &[u8], timestamp: FileTime, object_hash: git_hash::Kind) -> Result<Self, Error> {
7474
let (version, num_entries, post_header_data) = header::decode(&data, object_hash)?;
7575
let start_of_extensions = extension::end_of_index_entry::decode(&data, object_hash);
76+
let mut cache_tree = None;
77+
78+
// Note that we ignore all errors for optional signatures.
7679
match start_of_extensions {
7780
Some(offset) => {
7881
let extensions = extension::Iter::new_without_checksum(&data[offset..], object_hash);
7982
for (signature, ext_data) in extensions {
8083
match signature {
8184
extension::tree::SIGNATURE => {
82-
let tree = extension::tree::decode(ext_data, object_hash);
83-
todo!("put tree somewhere")
85+
cache_tree = extension::tree::decode(ext_data, object_hash);
8486
}
8587
extension::end_of_index_entry::SIGNATURE => {} // skip already done
8688
_unknown => {} // skip unknown extensions, too
@@ -91,6 +93,10 @@ impl State {
9193
None => todo!("load entries singlge-threaded, then extensions"),
9294
}
9395

94-
Ok(State { timestamp, version })
96+
Ok(State {
97+
timestamp,
98+
version,
99+
cache_tree,
100+
})
95101
}
96102
}

git-index/src/extension/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@ fn decode_header(data: &[u8]) -> (Signature, u32, &[u8]) {
1111
(signature.try_into().unwrap(), read_u32(size), data)
1212
}
1313

14+
/// A structure to associate object ids of a tree with sections in the index entries list.
15+
///
16+
/// It allows to more quickly build trees by avoiding as it can quickly re-use portions of the index and its associated tree ids
17+
/// if there wa sno change to them. Portions of this tree are invalidated as the index is changed.
1418
pub struct Tree {
19+
name: SmallVec<[u8; 23]>,
1520
/// Only set if there are any entries in the index we are associated with.
1621
id: Option<tree::NodeId>,
17-
name: SmallVec<[u8; 23]>,
1822
children: Vec<Tree>,
1923
}
2024

git-index/src/extension/tree.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use crate::extension::{Signature, Tree};
2+
use crate::util::split_at_byte_exclusive;
3+
use git_hash::ObjectId;
24

35
pub const SIGNATURE: Signature = *b"TREE";
46

@@ -11,7 +13,50 @@ pub struct NodeId {
1113

1214
/// A recursive data structure
1315
pub fn decode(data: &[u8], object_hash: git_hash::Kind) -> Option<Tree> {
14-
todo!("decode tree")
16+
let (tree, data) = one_recursive(data, object_hash.len_in_bytes())?;
17+
assert!(
18+
data.is_empty(),
19+
"BUG: should fully consume the entire tree extension chunk, got {} left",
20+
data.len()
21+
);
22+
Some(tree)
23+
}
24+
25+
pub fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> {
26+
let (path, data) = split_at_byte_exclusive(data, 0)?;
27+
28+
let (entry_count, data) = split_at_byte_exclusive(data, b' ')?;
29+
let entry_count: u32 = atoi::atoi(entry_count)?;
30+
31+
let (subtree_count, mut data) = split_at_byte_exclusive(data, b'\n')?;
32+
let subtree_count: usize = atoi::atoi(subtree_count)?;
33+
34+
let node_id = (entry_count != 0)
35+
.then(|| {
36+
(data.len() >= hash_len).then(|| {
37+
let (hash, rest) = data.split_at(hash_len);
38+
data = rest;
39+
ObjectId::from(hash)
40+
})
41+
})
42+
.flatten()
43+
.map(|id| NodeId { id, entry_count });
44+
45+
let mut subtrees = Vec::with_capacity(subtree_count);
46+
for _ in 0..subtree_count {
47+
let (tree, rest) = one_recursive(data, hash_len)?;
48+
subtrees.push(tree);
49+
data = rest;
50+
}
51+
52+
Some((
53+
Tree {
54+
id: node_id,
55+
name: path.into(),
56+
children: subtrees,
57+
},
58+
data,
59+
))
1560
}
1661

1762
#[cfg(test)]

git-index/src/lib.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ pub mod init {
3030
fn new() -> Self {
3131
State {
3232
timestamp: FileTime::from_system_time(std::time::SystemTime::UNIX_EPOCH),
33-
version: Version::V4,
33+
version: Version::V3,
34+
cache_tree: None,
3435
}
3536
}
3637
}
@@ -71,11 +72,29 @@ pub struct State {
7172
/// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
7273
timestamp: FileTime,
7374
version: Version,
75+
pub cache_tree: Option<extension::Tree>,
7476
}
7577

7678
pub(crate) mod util {
7779
#[inline]
7880
pub fn read_u32(b: &[u8]) -> u32 {
7981
u32::from_be_bytes(b.try_into().unwrap())
8082
}
83+
84+
#[inline]
85+
pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
86+
if data.len() < 2 {
87+
return None;
88+
}
89+
data.iter().enumerate().find_map(|(idx, b)| {
90+
(*b == byte).then(|| {
91+
if idx == 0 {
92+
(&[] as &[u8], &data[1..])
93+
} else {
94+
let (a, b) = data.split_at(idx);
95+
(a, &b[1..])
96+
}
97+
})
98+
})
99+
}
81100
}

git-index/tests/file/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod init {
66
}
77

88
#[test]
9+
#[ignore]
910
fn read_v2_with_single_entry_tree() {
1011
let file = file("v2");
1112
assert_eq!(file.version(), Version::V2);

0 commit comments

Comments
 (0)