From de45bab839eadd5c495c84401e4965ad59b1bc0a Mon Sep 17 00:00:00 2001
From: Tiago Castro <tiagolobocastro@gmail.com>
Date: Tue, 5 Dec 2023 17:53:13 +0000
Subject: [PATCH 1/3] fix(csi-node/nvmf/fixup): fixup correct nvme controller

When we replace an existing path, the new path has a different controller number. And so
the controller number and device number now mismatch, meaning we can not safely deref
/sys/class/nvme/nvme{major}
Instead, we can simply deref
/sys/class/block/nvme{major}c*n1/queue
The major ensures we use the original device number, and the glob ensures we modify the
timeout for all controllers.

Signed-off-by: Tiago Castro <tiagolobocastro@gmail.com>
---
 .../csi-driver/src/bin/node/dev/nvmf.rs       | 100 ++++++++++--------
 1 file changed, 55 insertions(+), 45 deletions(-)

diff --git a/control-plane/csi-driver/src/bin/node/dev/nvmf.rs b/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
index 57627bfb7..e1b506a33 100644
--- a/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
+++ b/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
@@ -13,7 +13,6 @@ use csi_driver::PublishParams;
 use glob::glob;
 use nvmeadm::nvmf_subsystem::Subsystem;
 use regex::Regex;
-use tracing::debug;
 use udev::{Device, Enumerator};
 use url::Url;
 use uuid::Uuid;
@@ -200,47 +199,59 @@ impl Attach for NvmfAttach {
     }
 
     async fn fixup(&self) -> Result<(), DeviceError> {
-        if let Some(io_timeout) = self.io_timeout {
-            let device = self
-                .get_device()?
-                .ok_or_else(|| DeviceError::new("NVMe device not found"))?;
-            let dev_name = device.sysname().to_str().unwrap();
-            let major = DEVICE_REGEX
-                .captures(dev_name)
-                .ok_or_else(|| {
-                    DeviceError::new(&format!(
-                        "NVMe device \"{}\" does not match \"{}\"",
-                        dev_name, *DEVICE_REGEX,
-                    ))
-                })?
-                .get(1)
-                .unwrap()
-                .as_str();
-            let pattern = format!("/sys/class/nvme/nvme{major}/nvme*n1/queue");
-            let path = glob(&pattern)
-                .unwrap()
-                .next()
-                .ok_or_else(|| {
-                    DeviceError::new(&format!(
-                        "failed to look up sysfs device directory \"{pattern}\"",
-                    ))
-                })?
-                .map_err(|_| {
-                    DeviceError::new(&format!(
-                        "IO error when reading device directory \"{pattern}\""
-                    ))
-                })?;
-            // If the timeout was higher than nexus's timeout then IOs could
-            // error out earlier than they should. Therefore we should make sure
-            // that timeouts in the nexus are set to a very high value.
-            debug!(
-                "Setting IO timeout on \"{}\" to {}s",
-                path.to_string_lossy(),
-                io_timeout
-            );
-            sysfs::write_value(&path, "io_timeout", 1000 * io_timeout)?;
+        let Some(io_timeout) = self.io_timeout else {
+            return Ok(());
+        };
+
+        let device = self
+            .get_device()?
+            .ok_or_else(|| DeviceError::new("NVMe device not found"))?;
+        let dev_name = device.sysname().to_str().unwrap();
+        let major = DEVICE_REGEX
+            .captures(dev_name)
+            .ok_or_else(|| {
+                DeviceError::new(&format!(
+                    "NVMe device \"{}\" does not match \"{}\"",
+                    dev_name, *DEVICE_REGEX,
+                ))
+            })?
+            .get(1)
+            .unwrap()
+            .as_str();
+        let pattern = format!("/sys/class/block/nvme{major}c*n1/queue");
+        let glob = glob(&pattern).unwrap();
+        let result = glob
+            .into_iter()
+            .map(|glob_result| {
+                match glob_result {
+                    Ok(path) => {
+                        let path_str = path.display();
+                        // If the timeout was higher than nexus's timeout then IOs could
+                        // error out earlier than they should. Therefore we should make sure
+                        // that timeouts in the nexus are set to a very high value.
+                        tracing::debug!("Setting IO timeout on \"{path_str}\" to {io_timeout}s",);
+                        sysfs::write_value(&path, "io_timeout", 1000 * io_timeout).map_err(
+                            |error| {
+                                tracing::error!(%error, path=%path_str, "Failed to set io_timeout to {io_timeout}s");
+                                error.into()
+                            },
+                        )
+                    }
+                    Err(error) => {
+                        // This should never happen as we should always have permissions to list.
+                        tracing::error!(%error, "Unable to collect sysfs for /dev/nvme{major}");
+                        Err(DeviceError::new(error.to_string().as_str()))
+                    }
+                }
+            })
+            .collect::<Result<Vec<()>, DeviceError>>();
+        match result {
+            Ok(r) if r.is_empty() => Err(DeviceError::new(&format!(
+                "look up of sysfs device directory \"{pattern}\" found 0 entries",
+            ))),
+            Ok(_) => Ok(()),
+            Err(error) => Err(error),
         }
-        Ok(())
     }
 }
 
@@ -284,10 +295,9 @@ pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
 /// (note, this is a system-wide parameter)
 pub(crate) fn set_nvmecore_iotimeout(io_timeout_secs: u32) -> Result<(), std::io::Error> {
     let path = Path::new("/sys/module/nvme_core/parameters");
-    debug!(
-        "Setting nvme_core IO timeout on \"{}\" to {}s",
-        path.to_string_lossy(),
-        io_timeout_secs
+    tracing::debug!(
+        "Setting nvme_core IO timeout on \"{path}\" to {io_timeout_secs}s",
+        path = path.to_string_lossy(),
     );
     sysfs::write_value(path, "io_timeout", io_timeout_secs)?;
     Ok(())

From 02e666b6c1162fd801424f73cafc167ecea6c8f3 Mon Sep 17 00:00:00 2001
From: Tiago Castro <tiagolobocastro@gmail.com>
Date: Wed, 6 Dec 2023 01:02:41 +0000
Subject: [PATCH 2/3] fix(nexus/add-child/v1): make add child v1 idempotent

When v1 nexus add child was added, it was not made idempotent.
Even though this is not an issue per se, as the child eventually gets
GCd and re-added it can cause strange logging..
TODO: should we have different behaviour depending on the state?
Example if faulted should we remove/readd?
Bonus: Fixes old test which stopped working a long time ago when
pstor was enabled for the data-plane by not enabling it for that
particular test only..

Signed-off-by: Tiago Castro <tiagolobocastro@gmail.com>
---
 .../bin/core/controller/io_engine/v1/nexus.rs | 37 ++++++++++++++-----
 .../agents/src/bin/core/tests/nexus/mod.rs    | 26 +++++++++----
 deployer/src/infra/io_engine.rs               |  2 +-
 deployer/src/lib.rs                           |  9 +++++
 utils/deployer-cluster/src/lib.rs             |  2 +-
 5 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/control-plane/agents/src/bin/core/controller/io_engine/v1/nexus.rs b/control-plane/agents/src/bin/core/controller/io_engine/v1/nexus.rs
index 4d88d153a..bc76fb933 100644
--- a/control-plane/agents/src/bin/core/controller/io_engine/v1/nexus.rs
+++ b/control-plane/agents/src/bin/core/controller/io_engine/v1/nexus.rs
@@ -204,22 +204,39 @@ impl crate::controller::io_engine::NexusChildApi<Nexus, Nexus, ()> for super::Rp
         err
     )]
     async fn add_child(&self, request: &AddNexusChild) -> Result<Nexus, SvcError> {
-        let rpc_nexus = self
+        let result = self
             .nexus()
             .add_child_nexus(request.to_rpc())
             .await
             .context(GrpcRequestError {
                 resource: ResourceKind::Child,
                 request: "add_child_nexus",
-            })?;
-        match rpc_nexus.into_inner().nexus {
-            None => Err(SvcError::Internal {
-                details: format!(
-                    "resource: {}, request: {}, err: {}",
-                    "Nexus", "add_child", "no nexus returned"
-                ),
-            }),
-            Some(nexus) => Ok(rpc_nexus_to_agent(&nexus, &request.node)?),
+            });
+        match result {
+            Ok(rpc_nexus) => match rpc_nexus.into_inner().nexus {
+                None => Err(SvcError::Internal {
+                    details: format!(
+                        "resource: {}, request: {}, err: {}",
+                        "Nexus", "add_child", "no nexus returned"
+                    ),
+                }),
+                Some(nexus) => Ok(rpc_nexus_to_agent(&nexus, &request.node)?),
+            },
+            Err(error) if error.tonic_code() == tonic::Code::AlreadyExists => {
+                let nexus = self.fetch_nexus(&request.nexus).await?;
+                if let Some(child) = nexus.child(request.uri.as_str()) {
+                    // todo: Should we do anything here depending on the state?
+                    tracing::warn!(
+                        ?child,
+                        nexus=%request.nexus,
+                        "Child is already part of the nexus"
+                    );
+                    Ok(nexus)
+                } else {
+                    Err(error)
+                }
+            }
+            Err(error) => Err(error),
         }
     }
 
diff --git a/control-plane/agents/src/bin/core/tests/nexus/mod.rs b/control-plane/agents/src/bin/core/tests/nexus/mod.rs
index 390acab42..74e55dd3f 100644
--- a/control-plane/agents/src/bin/core/tests/nexus/mod.rs
+++ b/control-plane/agents/src/bin/core/tests/nexus/mod.rs
@@ -410,6 +410,7 @@ async fn nexus_child_transaction() {
         .with_agents(vec!["core"])
         .with_req_timeouts(grpc_timeout, grpc_timeout)
         .with_grpc_timeouts(grpc_timeout_opts())
+        .with_reconcile_period(Duration::from_secs(100), Duration::from_secs(100))
         .build()
         .await
         .unwrap();
@@ -489,12 +490,12 @@ async fn nexus_child_transaction() {
     // unpause io_engine
     cluster.composer().thaw(io_engine.as_str()).await.unwrap();
 
-    // now it should be shared successfully
-    let uri = nexus_client
+    // now it should be added successfully
+    let child = nexus_client
         .add_nexus_child(&add_child, None)
         .await
         .unwrap();
-    println!("Share uri: {uri:?}");
+    println!("Child: {child:?}");
 
     cluster.composer().pause(io_engine.as_str()).await.unwrap();
 
@@ -520,13 +521,23 @@ async fn nexus_child_transaction() {
             .len(),
         1
     );
+
+    let mut io_engine = cluster.grpc_handle(&cluster.node(0)).await.unwrap();
+    io_engine
+        .add_child(add_child.nexus.as_str(), add_child.uri.as_str(), true)
+        .await
+        .unwrap();
+
+    // now it should be added successfully
+    let child = nexus_client
+        .add_nexus_child(&add_child, None)
+        .await
+        .unwrap();
+    println!("Child: {child:?}");
 }
 
-/// Tests child add and remove operations when the store is temporarily down
-/// TODO: these tests don't work anymore because the io_engine also writes child healthy states
-/// to etcd so we can't simply pause etcd anymore..
+/// Tests child add and remove operations when the store is temporarily down.
 #[tokio::test]
-#[ignore]
 async fn nexus_child_transaction_store() {
     let store_timeout = Duration::from_millis(250);
     let reconcile_period = Duration::from_millis(250);
@@ -539,6 +550,7 @@ async fn nexus_child_transaction_store() {
         .with_reconcile_period(reconcile_period, reconcile_period)
         .with_store_timeout(store_timeout)
         .with_grpc_timeouts(grpc_timeout_opts())
+        .with_options(|b| b.with_io_engine_no_pstor(true))
         .build()
         .await
         .unwrap();
diff --git a/deployer/src/infra/io_engine.rs b/deployer/src/infra/io_engine.rs
index b6a242a9e..0e68d1097 100644
--- a/deployer/src/infra/io_engine.rs
+++ b/deployer/src/infra/io_engine.rs
@@ -78,7 +78,7 @@ impl ComponentAction for IoEngine {
                 spec = spec.with_env("DEVELOPER_DELAYED", "1");
             }
 
-            if !options.no_etcd {
+            if !options.no_etcd && !options.io_engine_no_pstor {
                 let etcd = format!("etcd.{}:2379", options.cluster_label.name());
                 spec = spec.with_args(vec!["-p", &etcd]);
             }
diff --git a/deployer/src/lib.rs b/deployer/src/lib.rs
index 4be1f6cd5..2a358cc76 100644
--- a/deployer/src/lib.rs
+++ b/deployer/src/lib.rs
@@ -183,6 +183,10 @@ pub struct StartOptions {
     )]
     io_engine_api_versions: Vec<IoEngineApiVersion>,
 
+    /// Don't configure the persistent store with the io-engine.
+    #[clap(long)]
+    io_engine_no_pstor: bool,
+
     /// Set the developer delayed env flag of the io_engine reactor.
     #[clap(short, long)]
     pub developer_delayed: bool,
@@ -438,6 +442,11 @@ impl StartOptions {
         self
     }
     #[must_use]
+    pub fn with_io_engine_no_pstor(mut self, no_pstor: bool) -> Self {
+        self.io_engine_no_pstor = no_pstor;
+        self
+    }
+    #[must_use]
     pub fn with_io_engine_cores(mut self, cores: u32) -> Self {
         self.io_engine_cores = cores;
         self
diff --git a/utils/deployer-cluster/src/lib.rs b/utils/deployer-cluster/src/lib.rs
index 08f20332a..9e1303c80 100644
--- a/utils/deployer-cluster/src/lib.rs
+++ b/utils/deployer-cluster/src/lib.rs
@@ -288,7 +288,7 @@ impl Cluster {
         Ok(CsiNodeClient { csi, internal })
     }
 
-    /// restart the core agent
+    /// Restart the core agent.
     pub async fn restart_core(&self) {
         self.remove_store_lock(ControlPlaneService::CoreAgent).await;
         self.composer.restart("core").await.unwrap();

From d6f3380f1751622a2122d58792c2d79bd1a2b44b Mon Sep 17 00:00:00 2001
From: Tiago Castro <tiagolobocastro@gmail.com>
Date: Wed, 6 Dec 2023 11:43:31 +0000
Subject: [PATCH 3/3] feat(csi/node/timeout): add nvme-io-engine timeout and
 parse humantime

Adds new parameter "--nvme-io-timeout".
This is used to set the timeout per nvme block device.
TODO: Check if this is enough to avoid setting the global timeout..
Also let's parse the "--nvme-core-io-timeout" as humantime as well..

Signed-off-by: Tiago Castro <tiagolobocastro@gmail.com>
---
 .../csi-driver/src/bin/node/config.rs         | 34 ++++++++++++++++++-
 .../csi-driver/src/bin/node/dev/nvmf.rs       | 14 ++++----
 .../csi-driver/src/bin/node/main_.rs          | 29 +++++++++++++---
 control-plane/csi-driver/src/context.rs       | 27 +++++++++++++++
 .../bdd/features/csi/node/test_parameters.py  |  5 +++
 5 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/control-plane/csi-driver/src/bin/node/config.rs b/control-plane/csi-driver/src/bin/node/config.rs
index af99a6db6..2ab04cf54 100644
--- a/control-plane/csi-driver/src/bin/node/config.rs
+++ b/control-plane/csi-driver/src/bin/node/config.rs
@@ -18,6 +18,10 @@ pub fn nvme_keep_alive_tmo() -> String {
 pub fn nvme_ctrl_loss_tmo() -> String {
     Parameters::NvmeCtrlLossTmo.as_ref().to_kebab_case()
 }
+/// Command line arg name for `Parameters::NvmeIoTimeout`.
+pub fn nvme_io_tmo() -> String {
+    Parameters::NvmeIoTimeout.as_ref().to_kebab_case()
+}
 
 /// Global configuration parameters.
 #[derive(Debug, Default)]
@@ -42,17 +46,21 @@ pub(crate) struct NvmeConfig {
     /// Default value for `ctrl_loss_tmo` when not specified via the volume parameters (sc).
     ctrl_loss_tmo: Option<u32>,
     keep_alive_tmo: Option<u32>,
+    /// Default value for `io_tmo` when not specified via the volume parameters (sc).
+    io_tmo: Option<humantime::Duration>,
 }
 impl NvmeConfig {
     fn new(
         nr_io_queues: Option<u32>,
         ctrl_loss_tmo: Option<u32>,
         keep_alive_tmo: Option<u32>,
+        io_tmo: Option<humantime::Duration>,
     ) -> Self {
         Self {
             nr_io_queues,
             ctrl_loss_tmo,
             keep_alive_tmo,
+            io_tmo,
         }
     }
     /// Number of IO Queues.
@@ -68,6 +76,10 @@ impl NvmeConfig {
     pub(crate) fn keep_alive_tmo(&self) -> Option<u32> {
         self.keep_alive_tmo
     }
+    /// The io timeout.
+    pub(crate) fn io_tmo(&self) -> Option<humantime::Duration> {
+        self.io_tmo
+    }
 }
 
 /// Get a mutex guard over the `Config`.
@@ -112,7 +124,22 @@ impl TryFrom<NvmeArgValues> for NvmeConfig {
                 error
             )
         })?;
-        Ok(Self::new(nvme_nr_ioq, ctrl_loss_tmo, keep_alive_tmo))
+        let nvme_io_tmo = Parameters::nvme_io_timeout(
+            src.0.get(Parameters::NvmeIoTimeout.as_ref()),
+        )
+        .map_err(|error| {
+            anyhow::anyhow!(
+                "Invalid value for {}, error = {}",
+                Parameters::NvmeIoTimeout.as_ref(),
+                error
+            )
+        })?;
+        Ok(Self::new(
+            nvme_nr_ioq,
+            ctrl_loss_tmo,
+            keep_alive_tmo,
+            nvme_io_tmo,
+        ))
     }
 }
 /// Nvme Arguments taken from the CSI volume calls (storage class parameters).
@@ -155,6 +182,11 @@ impl TryFrom<&ArgMatches> for NvmeArgValues {
             map.0
                 .insert(Parameters::NvmeKeepAliveTmo.to_string(), value.to_string());
         }
+
+        if let Some(value) = matches.get_one::<String>(&nvme_io_tmo()) {
+            map.0
+                .insert(Parameters::NvmeIoTimeout.to_string(), value.to_string());
+        }
         Ok(map)
     }
 }
diff --git a/control-plane/csi-driver/src/bin/node/dev/nvmf.rs b/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
index e1b506a33..eff1e3db4 100644
--- a/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
+++ b/control-plane/csi-driver/src/bin/node/dev/nvmf.rs
@@ -34,7 +34,7 @@ pub(super) struct NvmfAttach {
     port: u16,
     uuid: Uuid,
     nqn: String,
-    io_timeout: Option<u32>,
+    io_tmo: Option<u32>,
     nr_io_queues: Option<u32>,
     ctrl_loss_tmo: Option<u32>,
     keep_alive_tmo: Option<u32>,
@@ -49,6 +49,7 @@ impl NvmfAttach {
         uuid: Uuid,
         nqn: String,
         nr_io_queues: Option<u32>,
+        io_tmo: Option<humantime::Duration>,
         ctrl_loss_tmo: Option<u32>,
         keep_alive_tmo: Option<u32>,
         hostnqn: Option<String>,
@@ -58,7 +59,7 @@ impl NvmfAttach {
             port,
             uuid,
             nqn,
-            io_timeout: None,
+            io_tmo: io_tmo.map(|io_tmo| io_tmo.as_secs().try_into().unwrap_or(u32::MAX)),
             nr_io_queues,
             ctrl_loss_tmo,
             keep_alive_tmo,
@@ -103,6 +104,7 @@ impl TryFrom<&Url> for NvmfAttach {
         let nr_io_queues = config().nvme().nr_io_queues();
         let ctrl_loss_tmo = config().nvme().ctrl_loss_tmo();
         let keep_alive_tmo = config().nvme().keep_alive_tmo();
+        let io_tmo = config().nvme().io_tmo();
 
         let hash_query: HashMap<_, _> = url.query_pairs().collect();
         let hostnqn = hash_query.get("hostnqn").map(ToString::to_string);
@@ -113,6 +115,7 @@ impl TryFrom<&Url> for NvmfAttach {
             uuid,
             segments[0].to_string(),
             nr_io_queues,
+            io_tmo,
             ctrl_loss_tmo,
             keep_alive_tmo,
             hostnqn,
@@ -129,9 +132,6 @@ impl Attach for NvmfAttach {
         let publish_context = PublishParams::try_from(context)
             .map_err(|error| DeviceError::new(&error.to_string()))?;
 
-        if let Some(val) = publish_context.io_timeout() {
-            self.io_timeout = Some(*val);
-        }
         if let Some(val) = publish_context.ctrl_loss_tmo() {
             self.ctrl_loss_tmo = Some(*val);
         }
@@ -158,7 +158,7 @@ impl Attach for NvmfAttach {
             Err(NvmeError::SubsystemNotFound { .. }) => {
                 // The default reconnect delay in linux kernel is set to 10s. Use the
                 // same default value unless the timeout is less or equal to 10.
-                let reconnect_delay = match self.io_timeout {
+                let reconnect_delay = match self.io_tmo {
                     Some(io_timeout) => {
                         if io_timeout <= 10 {
                             Some(1)
@@ -199,7 +199,7 @@ impl Attach for NvmfAttach {
     }
 
     async fn fixup(&self) -> Result<(), DeviceError> {
-        let Some(io_timeout) = self.io_timeout else {
+        let Some(io_timeout) = self.io_tmo else {
             return Ok(());
         };
 
diff --git a/control-plane/csi-driver/src/bin/node/main_.rs b/control-plane/csi-driver/src/bin/node/main_.rs
index e75b16f4a..52845cac3 100644
--- a/control-plane/csi-driver/src/bin/node/main_.rs
+++ b/control-plane/csi-driver/src/bin/node/main_.rs
@@ -26,6 +26,7 @@ use std::{
     future::Future,
     io::ErrorKind,
     pin::Pin,
+    str::FromStr,
     sync::Arc,
     task::{Context, Poll},
 };
@@ -124,7 +125,13 @@ pub(super) async fn main() -> anyhow::Result<()> {
                 .long("nvme-core-io-timeout")
                 .value_name("TIMEOUT")
                 .required(false)
-                .help("Sets the global nvme_core module io_timeout, in seconds")
+                .help("Sets the global nvme_core module io_timeout, in seconds or humantime")
+        )
+        .arg(
+            Arg::new("nvme-io-timeout")
+                .long("nvme-io-timeout")
+                .required(false)
+                .help("Sets io_timeout for nvme block devices")
         )
         .arg(
             Arg::new(crate::config::nvme_nr_io_queues())
@@ -162,6 +169,7 @@ pub(super) async fn main() -> anyhow::Result<()> {
         .get_matches();
 
     utils::print_package_info!();
+    println!("{:?}", env::args().collect::<Vec<String>>());
 
     let endpoint = matches.get_one::<String>("grpc-endpoint").unwrap();
     let csi_socket = matches
@@ -183,10 +191,21 @@ pub(super) async fn main() -> anyhow::Result<()> {
         check_ana_and_label_node(matches.get_one::<String>("node-name").expect("required")).await?;
     }
 
-    if let Some(nvme_io_timeout_secs) = matches.get_one::<String>("nvme-core-io-timeout") {
-        let io_timeout_secs: u32 = nvme_io_timeout_secs.parse().expect(
-            "nvme_core io_timeout should be an integer number, representing the timeout in seconds",
-        );
+    if let Some(nvme_io_timeout) = matches.get_one::<String>("nvme-io-timeout") {
+        let _ = humantime::Duration::from_str(nvme_io_timeout)
+            .map_err(|error| anyhow::format_err!("Failed to parse 'nvme-io-timeout': {error}"))?;
+    };
+    if let Some(nvme_io_timeout) = matches.get_one::<String>("nvme-core-io-timeout") {
+        let io_timeout_secs = match humantime::Duration::from_str(nvme_io_timeout) {
+            Ok(human_time) => {
+                human_time.as_secs() as u32
+            }
+            Err(_) => {
+                nvme_io_timeout.parse().expect(
+                    "nvme_core io_timeout should be in humantime or an integer number, representing the timeout in seconds",
+                )
+            }
+        };
 
         if let Err(error) = crate::dev::nvmf::set_nvmecore_iotimeout(io_timeout_secs) {
             anyhow::bail!("Failed to set nvme_core io_timeout: {}", error);
diff --git a/control-plane/csi-driver/src/context.rs b/control-plane/csi-driver/src/context.rs
index f7d2a0c39..9d018df28 100644
--- a/control-plane/csi-driver/src/context.rs
+++ b/control-plane/csi-driver/src/context.rs
@@ -27,7 +27,11 @@ pub fn parse_protocol(proto: Option<&String>) -> Result<VolumeShareProtocol, ton
 #[derive(AsRefStr, EnumString, Display)]
 #[strum(serialize_all = "camelCase")]
 pub enum Parameters {
+    /// This value is now considered deprecated.
     IoTimeout,
+    /// This value is used globally and not on a per volume context.
+    /// todo: split Parameters into 2 separate enums?
+    NvmeIoTimeout,
     NvmeNrIoQueues,
     NvmeCtrlLossTmo,
     NvmeKeepAliveTmo,
@@ -50,6 +54,14 @@ pub enum Parameters {
     FsId,
 }
 impl Parameters {
+    fn parse_human_time(
+        value: Option<&String>,
+    ) -> Result<Option<humantime::Duration>, humantime::DurationError> {
+        Ok(match value {
+            Some(value) => humantime::Duration::from_str(value).map(Some)?,
+            None => None,
+        })
+    }
     fn parse_u32(value: Option<&String>) -> Result<Option<u32>, ParseIntError> {
         Ok(match value {
             Some(value) => value.parse::<u32>().map(Some)?,
@@ -84,6 +96,12 @@ impl Parameters {
     pub fn io_timeout(value: Option<&String>) -> Result<Option<u32>, ParseIntError> {
         Self::parse_u32(value)
     }
+    /// Parse the value for `Self::IoTimeout`.
+    pub fn nvme_io_timeout(
+        value: Option<&String>,
+    ) -> Result<Option<humantime::Duration>, humantime::DurationError> {
+        Self::parse_human_time(value)
+    }
     /// Parse the value for `Self::StsAffinityGroup`
     pub fn sts_affinity_group(value: Option<&String>) -> Result<Option<bool>, ParseBoolError> {
         Self::parse_bool(value)
@@ -105,6 +123,7 @@ impl Parameters {
 #[derive(Debug)]
 pub struct PublishParams {
     io_timeout: Option<u32>,
+    nvme_io_timeout: Option<humantime::Duration>,
     ctrl_loss_tmo: Option<u32>,
     keep_alive_tmo: Option<u32>,
     fs_type: Option<FileSystem>,
@@ -115,6 +134,10 @@ impl PublishParams {
     pub fn io_timeout(&self) -> &Option<u32> {
         &self.io_timeout
     }
+    /// Get the `Parameters::NvmeIoTimeout` value.
+    pub fn nvme_io_timeout(&self) -> &Option<humantime::Duration> {
+        &self.nvme_io_timeout
+    }
     /// Get the `Parameters::NvmeCtrlLossTmo` value.
     pub fn ctrl_loss_tmo(&self) -> &Option<u32> {
         &self.ctrl_loss_tmo
@@ -166,6 +189,9 @@ impl TryFrom<&HashMap<String, String>> for PublishParams {
 
         let io_timeout = Parameters::io_timeout(args.get(Parameters::IoTimeout.as_ref()))
             .map_err(|_| tonic::Status::invalid_argument("Invalid I/O timeout"))?;
+        let nvme_io_timeout =
+            Parameters::nvme_io_timeout(args.get(Parameters::NvmeIoTimeout.as_ref()))
+                .map_err(|_| tonic::Status::invalid_argument("Invalid I/O timeout"))?;
         let ctrl_loss_tmo =
             Parameters::ctrl_loss_tmo(args.get(Parameters::NvmeCtrlLossTmo.as_ref()))
                 .map_err(|_| tonic::Status::invalid_argument("Invalid ctrl_loss_tmo"))?;
@@ -177,6 +203,7 @@ impl TryFrom<&HashMap<String, String>> for PublishParams {
 
         Ok(Self {
             io_timeout,
+            nvme_io_timeout,
             ctrl_loss_tmo,
             keep_alive_tmo,
             fs_type,
diff --git a/tests/bdd/features/csi/node/test_parameters.py b/tests/bdd/features/csi/node/test_parameters.py
index a173bbe70..48b943bf4 100644
--- a/tests/bdd/features/csi/node/test_parameters.py
+++ b/tests/bdd/features/csi/node/test_parameters.py
@@ -63,6 +63,10 @@ def the_nvme_device_should_report_total_queues(total):
     # max io queues is cpu_count
     # admin q is 1
     assert queue_count == min(total, os.cpu_count() + 1)
+    file = f"/sys/block/nvme2c2n1/queue/io_timeout"
+    io_timoout = int(subprocess.run(["sudo", "cat", file], capture_output=True).stdout)
+    print(f"io_timeout: {io_timoout}")
+    assert io_timoout == 33000
 
 
 @pytest.fixture
@@ -137,6 +141,7 @@ def monitor(proc, result):
             "--csi-socket=/var/tmp/csi-node.sock",
             "--grpc-endpoint=0.0.0.0:50050",
             "--node-name=msn-test",
+            "--nvme-io-timeout=33s",
             f"--nvme-nr-io-queues={io_queues}",
             "-v",
         ],