Skip to content

Commit 977e135

Browse files
committed
fix: static linked list delta cache with memory cap (#851)
Previously, the 64 slot big LRU cache for pack deltas didn't use any memory limit which could lead to memory exhaustion in the face of untypical, large objects. Now we add a generous default limit to do *better* in such situations. It's worth noting though that that even without any cache, the working set of buffers to do delta resolution takes considerable memory, despite trying to keep it minimal. Note that for bigger objects, the cache is now not used at all, which probably leads to terrible performance as not even the base object can be cached.
1 parent f89cbc6 commit 977e135

File tree

3 files changed

+99
-17
lines changed

3 files changed

+99
-17
lines changed

Diff for: Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ unit-tests: ## run all unit tests
151151
cd gix-ref/tests && cargo test --all-features
152152
cd gix-odb && cargo test && cargo test --all-features
153153
cd gix-object && cargo test && cargo test --features verbose-object-parsing-errors
154+
cd gix-pack && cargo test --all-features
154155
cd gix-pack/tests && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \
155156
&& cargo test --features "internal-testing-gix-features-parallel"
156157
cd gix-index/tests && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \

Diff for: gix-pack/src/cache/lru.rs

+98-15
Original file line numberDiff line numberDiff line change
@@ -107,41 +107,74 @@ mod _static {
107107
/// Values of 64 seem to improve performance.
108108
pub struct StaticLinkedList<const SIZE: usize> {
109109
inner: uluru::LRUCache<Entry, SIZE>,
110-
free_list: Vec<Vec<u8>>,
110+
last_evicted: Vec<u8>,
111111
debug: gix_features::cache::Debug,
112+
/// the amount of bytes we are currently holding, taking into account the capacities of all Vecs we keep.
113+
mem_used: usize,
114+
/// The total amount of memory we should be able to hold with all entries combined.
115+
mem_limit: usize,
112116
}
113117

114-
impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
115-
fn default() -> Self {
118+
impl<const SIZE: usize> StaticLinkedList<SIZE> {
119+
/// Create a new list with a memory limit of `mem_limit` in bytes. If 0, there is no memory limit.
120+
pub fn new(mem_limit: usize) -> Self {
116121
StaticLinkedList {
117122
inner: Default::default(),
118-
free_list: Vec::new(),
123+
last_evicted: Vec::new(),
119124
debug: gix_features::cache::Debug::new(format!("StaticLinkedList<{SIZE}>")),
125+
mem_used: 0,
126+
mem_limit: if mem_limit == 0 { usize::MAX } else { mem_limit },
120127
}
121128
}
122129
}
123130

131+
impl<const SIZE: usize> Default for StaticLinkedList<SIZE> {
132+
fn default() -> Self {
133+
Self::new(96 * 1024 * 1024)
134+
}
135+
}
136+
124137
impl<const SIZE: usize> DecodeEntry for StaticLinkedList<SIZE> {
125138
fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: gix_object::Kind, compressed_size: usize) {
139+
// We cannot possibly hold this much.
140+
if data.len() > self.mem_limit {
141+
return;
142+
}
143+
// If we could hold it but are are at limit, all we can do is make space.
144+
let mem_free = self.mem_limit - self.mem_used;
145+
if data.len() > mem_free {
146+
// prefer freeing free-lists instead of clearing our cache
147+
let free_list_cap = self.last_evicted.len();
148+
self.last_evicted = Vec::new();
149+
// still not enough? clear everything
150+
if data.len() > mem_free + free_list_cap {
151+
self.inner.clear();
152+
self.mem_used = 0;
153+
} else {
154+
self.mem_used -= free_list_cap;
155+
}
156+
}
126157
self.debug.put();
158+
let (prev_cap, cur_cap);
127159
if let Some(previous) = self.inner.insert(Entry {
128160
offset,
129161
pack_id,
130-
data: self
131-
.free_list
132-
.pop()
133-
.map(|mut v| {
134-
v.clear();
135-
v.resize(data.len(), 0);
136-
v.copy_from_slice(data);
137-
v
138-
})
139-
.unwrap_or_else(|| Vec::from(data)),
162+
data: {
163+
let mut v = std::mem::take(&mut self.last_evicted);
164+
prev_cap = v.capacity();
165+
v.clear();
166+
v.resize(data.len(), 0);
167+
v.copy_from_slice(data);
168+
cur_cap = v.capacity();
169+
v
170+
},
140171
kind,
141172
compressed_size,
142173
}) {
143-
self.free_list.push(previous.data)
174+
// No need to adjust capacity as we already counted it.
175+
self.last_evicted = previous.data;
144176
}
177+
self.mem_used = self.mem_used + cur_cap - prev_cap;
145178
}
146179

147180
fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(gix_object::Kind, usize)> {
@@ -162,6 +195,56 @@ mod _static {
162195
res
163196
}
164197
}
198+
199+
#[cfg(test)]
200+
mod tests {
201+
use super::*;
202+
203+
#[test]
204+
fn no_limit() {
205+
let c = StaticLinkedList::<10>::new(0);
206+
assert_eq!(
207+
c.mem_limit,
208+
usize::MAX,
209+
"zero is automatically turned into a large limit that is equivalent to unlimited"
210+
);
211+
}
212+
213+
#[test]
214+
fn journey() {
215+
let mut c = StaticLinkedList::<10>::new(100);
216+
assert_eq!(c.mem_limit, 100);
217+
assert_eq!(c.mem_used, 0);
218+
219+
// enough memory for normal operation
220+
let mut last_mem_used = 0;
221+
for _ in 0..10 {
222+
c.put(0, 0, &[0], gix_object::Kind::Blob, 1);
223+
assert!(c.mem_used > last_mem_used);
224+
last_mem_used = c.mem_used;
225+
}
226+
assert_eq!(c.mem_used, 80, "there is a minimal vec size");
227+
assert_eq!(c.inner.len(), 10);
228+
assert_eq!(c.last_evicted.len(), 0);
229+
230+
c.put(0, 0, &(0..20).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
231+
assert_eq!(c.inner.len(), 10);
232+
assert_eq!(c.mem_used, 80 + 20);
233+
assert_eq!(c.last_evicted.len(), 1);
234+
235+
c.put(0, 0, &(0..50).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
236+
assert_eq!(c.inner.len(), 1, "cache clearance wasn't necessary");
237+
assert_eq!(c.last_evicted.len(), 0, "the free list was cleared");
238+
assert_eq!(c.mem_used, 50);
239+
240+
c.put(0, 0, &(0..101).collect::<Vec<_>>(), gix_object::Kind::Blob, 1);
241+
assert_eq!(
242+
c.inner.len(),
243+
1,
244+
"objects that won't ever fit within the memory limit are ignored"
245+
);
246+
}
247+
}
165248
}
166249

167250
#[cfg(feature = "pack-cache-lru-static")]

Diff for: gix-pack/src/cache/object.rs

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
//! # Note
2-
//!
31
//! This module is a bit 'misplaced' if spelled out like 'gix_pack::cache::object::*' but is best placed here for code re-use and
42
//! general usefulness.
53
use crate::cache;

0 commit comments

Comments
 (0)