Skip to content

Commit

Permalink
chore(bors): merge pull request #692
Browse files Browse the repository at this point in the history
692: feat(drain): disallow node draining if ha is disabled r=Abhinandan-Purkait a=Abhinandan-Purkait

Node Drain relies on the HA feature to move and reconnect the nexus. So if HA is disabled node drain will not work and can leave the volumes in an inconsistent state. 

This PR restricts node draining if the HA is not enabled.

Co-authored-by: Abhinandan Purkait <purkaitabhinandan@gmail.com>
  • Loading branch information
mayastor-bors and Abhinandan-Purkait committed Nov 30, 2023
2 parents fa880e7 + b4b3fb1 commit baa101c
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ async fn check_and_drain_node(
context: &PollContext,
node_spec: &mut OperationGuardArc<NodeSpec>,
) -> PollResult {
// Drain should not be allowed if HA is not enabled.
if context.registry().ha_disabled() {
return PollResult::Err(SvcError::DrainNotAllowedWhenHAisDisabled {});
}

if !node_spec.as_ref().is_draining() {
return PollResult::Ok(PollerState::Idle);
}
Expand Down
9 changes: 9 additions & 0 deletions control-plane/agents/src/bin/core/controller/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ pub(crate) struct RegistryInner<S: Store> {
legacy_prefix_present: bool,
/// Thin provisioning parameters.
thin_args: ThinArgs,
/// Check if the HA feature is enabled.
ha_disabled: bool,
}

impl Registry {
Expand All @@ -118,6 +120,7 @@ impl Registry {
create_volume_limit: usize,
host_acl: Vec<HostAccessControl>,
thin_args: ThinArgs,
ha_enabled: bool,
) -> Result<Self, SvcError> {
let store_endpoint = Self::format_store_endpoint(&store_url);
tracing::info!("Connecting to persistent store at {}", store_endpoint);
Expand Down Expand Up @@ -167,6 +170,7 @@ impl Registry {
host_acl,
legacy_prefix_present,
thin_args,
ha_disabled: ha_enabled,
}),
};
registry.init().await?;
Expand All @@ -193,6 +197,11 @@ impl Registry {
Ok(registry)
}

/// Check if the HA feature is enabled.
pub(crate) fn ha_disabled(&self) -> bool {
self.ha_disabled
}

/// Formats the store endpoint with a default port if one isn't supplied.
fn format_store_endpoint(endpoint: &str) -> String {
match endpoint.contains(':') {
Expand Down
6 changes: 6 additions & 0 deletions control-plane/agents/src/bin/core/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ pub(crate) struct CliArgs {
/// Events message-bus endpoint url.
#[clap(long, short)]
events_url: Option<url::Url>,

/// Disable the HA/Failover feature.
/// This is useful when the frontend nodes do not support the NVMe ANA feature.
#[clap(long, env = "HA_DISABLED")]
pub(crate) disable_ha: bool,
}
impl CliArgs {
fn args() -> Self {
Expand Down Expand Up @@ -172,6 +177,7 @@ async fn server(cli_args: CliArgs) -> anyhow::Result<()> {
cli_args.hosts_acl.clone()
},
cli_args.thin_args,
cli_args.disable_ha,
)
.await?;

Expand Down
5 changes: 5 additions & 0 deletions control-plane/agents/src/bin/core/node/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,11 @@ impl Service {

/// Apply a drain label to the specified node. The reconciler will perform the drain.
async fn drain(&self, id: NodeId, label: String) -> Result<Node, SvcError> {
// Don't allow draining if HA_ENABLED is false. If it is undefined we treat it as true.
if self.registry.ha_disabled() {
return Err(SvcError::DrainNotAllowedWhenHAisDisabled {});
}

let mut guarded_node = self.specs().guarded_node(&id).await?;

let spec = guarded_node.drain(&self.registry, label.clone()).await?;
Expand Down
4 changes: 4 additions & 0 deletions control-plane/agents/src/bin/core/volume/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ impl ResourcePublishing for OperationGuardArc<VolumeSpec> {
registry: &Registry,
request: &Self::Republish,
) -> Result<Self::PublishOutput, SvcError> {
// If HA is disabled there is no point in switchover.
if registry.ha_disabled() {
return Err(SvcError::SwitchoverNotAllowedWhenHAisDisabled {});
}
let specs = registry.specs();
let spec = self.as_ref().clone();
let state = registry.volume_state(&request.uuid).await?;
Expand Down
4 changes: 4 additions & 0 deletions control-plane/agents/src/bin/core/volume/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,10 @@ impl Service {
&self,
request: &RepublishVolume,
) -> Result<Volume, SvcError> {
// If HA is disabled there is no point in switchover.
if self.registry.ha_disabled() {
return Err(SvcError::SwitchoverNotAllowedWhenHAisDisabled {});
}
let mut volume = self.specs().volume(&request.uuid).await?;
volume.republish(&self.registry, request).await
}
Expand Down
16 changes: 16 additions & 0 deletions control-plane/agents/src/common/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ pub enum SvcError {
ClonedSnapshotVolumeRepl {},
#[snafu(display("The source snapshot is not created"))]
SnapshotNotCreated {},
#[snafu(display("Draining is not allowed without HA"))]
DrainNotAllowedWhenHAisDisabled {},
#[snafu(display("Target switchover is not allowed without HA"))]
SwitchoverNotAllowedWhenHAisDisabled {},
}

impl SvcError {
Expand Down Expand Up @@ -918,6 +922,18 @@ impl From<SvcError> for ReplyError {
source,
extra,
},
SvcError::DrainNotAllowedWhenHAisDisabled {} => ReplyError {
kind: ReplyErrorKind::FailedPrecondition,
resource: ResourceKind::Node,
source,
extra,
},
SvcError::SwitchoverNotAllowedWhenHAisDisabled {} => ReplyError {
kind: ReplyErrorKind::FailedPrecondition,
resource: ResourceKind::Nexus,
source,
extra,
},
}
}
}
Expand Down

0 comments on commit baa101c

Please sign in to comment.