Skip to content

Commit

Permalink
fix(resize): atomically check for the required size
Browse files Browse the repository at this point in the history
Ensures races don't lead into volume resize failures.

Signed-off-by: Tiago Castro <tiagolobocastro@gmail.com>
  • Loading branch information
tiagolobocastro committed Nov 26, 2024
1 parent 4f112ad commit 522139b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 13 deletions.
15 changes: 5 additions & 10 deletions control-plane/agents/src/bin/core/volume/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ use stor_port::{
pub(super) struct Service {
registry: Registry,
create_volume_limiter: std::sync::Arc<tokio::sync::Semaphore>,
capacity_limit_borrow: std::sync::Arc<parking_lot::RwLock<u64>>,
capacity_limit_borrow: std::sync::Arc<parking_lot::Mutex<u64>>,
}

#[tonic::async_trait]
Expand Down Expand Up @@ -257,7 +257,7 @@ impl Service {
create_volume_limiter: std::sync::Arc::new(tokio::sync::Semaphore::new(
registry.create_volume_limit(),
)),
capacity_limit_borrow: std::sync::Arc::new(parking_lot::RwLock::new(0)),
capacity_limit_borrow: std::sync::Arc::new(parking_lot::Mutex::new(0)),
registry,
}
}
Expand Down Expand Up @@ -614,20 +614,15 @@ impl Service {
.requested_size
.checked_sub(volume.as_ref().size)
.unwrap_or_default();
*self.capacity_limit_borrow.write() += required;
let capacity_limit = self.capacity_limit_borrow.lock();
// If there is a defined system wide capacity limit, ensure we don't breach that.
let current = *self.capacity_limit_borrow.read();
self.specs()
.check_capacity_limit_for_resize(limit, current)
.map_err(|e| {
*self.capacity_limit_borrow.write() -= required;
e
})?;
.check_capacity_limit_for_resize(limit, capacity_limit, required)?;

let resize_ret = volume.resize(&self.registry, request).await;
// Reset the capacity limit that we consumed and will now be accounted in the system's
// current total.
*self.capacity_limit_borrow.write() -= required;
*self.capacity_limit_borrow.lock() -= required;
resize_ret
}
}
8 changes: 5 additions & 3 deletions control-plane/agents/src/bin/core/volume/specs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -845,18 +845,20 @@ impl ResourceSpecsLocked {
pub(crate) fn check_capacity_limit_for_resize(
&self,
cluster_capacity_limit: u64,
current_borrowed_limit: u64,
mut capacity_limit: parking_lot::MutexGuard<u64>,
required: u64,
) -> Result<(), SvcError> {
let specs = self.write();
let total: u64 = specs.volumes.values().map(|v| v.lock().size).sum();
let forthcoming_total = current_borrowed_limit + total;
tracing::trace!(current_borrowed_limit=%current_borrowed_limit, total=%total, forthcoming_total=%forthcoming_total, "Cluster capacity limit checks ");
let forthcoming_total = *capacity_limit + total + required;
tracing::trace!(current_borrowed_limit=%capacity_limit, total=%total, forthcoming_total=%forthcoming_total, "Cluster capacity limit checks");
if forthcoming_total > cluster_capacity_limit {
return Err(SvcError::CapacityLimitExceeded {
cluster_capacity_limit,
excess: forthcoming_total - cluster_capacity_limit,
});
}
*capacity_limit += required;
Ok(())
}

Expand Down

0 comments on commit 522139b

Please sign in to comment.