diff --git a/crates/stackable-operator/CHANGELOG.md b/crates/stackable-operator/CHANGELOG.md index f9ff6cc81..323f02ee2 100644 --- a/crates/stackable-operator/CHANGELOG.md +++ b/crates/stackable-operator/CHANGELOG.md @@ -4,12 +4,19 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Changed + +- BREAKING: Don't parse `/etc/resolv.conf` to auto-detect the Kubernetes cluster domain in case it is not explicitly configured. + Instead the operator will default to `cluster.local`. We revert this now after some concerns where raised, we will + create a follow-up decision instead addressing how we will continue with this ([#896]). + ### Fixed - Fix Kubernetes cluster domain parsing from resolv.conf, e.g. on AWS EKS. We now only consider Kubernetes services domains instead of all domains (which could include non-Kubernetes domains) ([#895]). [#895]: https://github.com/stackabletech/operator-rs/pull/895 +[#896]: https://github.com/stackabletech/operator-rs/pull/896 ## [0.79.0] - 2024-10-18 diff --git a/crates/stackable-operator/fixtures/cluster_domain/fail/invalid.resolv.conf b/crates/stackable-operator/fixtures/cluster_domain/fail/invalid.resolv.conf deleted file mode 100644 index f16e34dc2..000000000 --- a/crates/stackable-operator/fixtures/cluster_domain/fail/invalid.resolv.conf +++ /dev/null @@ -1,2 +0,0 @@ -nameserver 10.243.21.53 -options ndots:5 diff --git a/crates/stackable-operator/fixtures/cluster_domain/pass/kubernetes-multiple.resolv.conf b/crates/stackable-operator/fixtures/cluster_domain/pass/kubernetes-multiple.resolv.conf deleted file mode 100644 index 578cb8e4f..000000000 --- a/crates/stackable-operator/fixtures/cluster_domain/pass/kubernetes-multiple.resolv.conf +++ /dev/null @@ -1,4 +0,0 @@ -search baz svc.foo.bar foo.bar -search sble-operators.svc.cluster.local svc.cluster.local cluster.local -nameserver 10.243.21.53 -options ndots:5 diff --git a/crates/stackable-operator/fixtures/cluster_domain/pass/kubernetes.resolv.conf b/crates/stackable-operator/fixtures/cluster_domain/pass/kubernetes.resolv.conf deleted file mode 100644 index 9c210febf..000000000 --- a/crates/stackable-operator/fixtures/cluster_domain/pass/kubernetes.resolv.conf +++ /dev/null @@ -1,3 +0,0 @@ -search sble-operators.svc.cluster.local svc.cluster.local cluster.local -nameserver 10.243.21.53 -options ndots:5 diff --git a/crates/stackable-operator/fixtures/cluster_domain/pass/openshift.resolv.conf b/crates/stackable-operator/fixtures/cluster_domain/pass/openshift.resolv.conf deleted file mode 100644 index 305b7f795..000000000 --- a/crates/stackable-operator/fixtures/cluster_domain/pass/openshift.resolv.conf +++ /dev/null @@ -1,3 +0,0 @@ -search openshift-service-ca-operator.svc.cluster.local svc.cluster.local cluster.local cmx.repl-openshift.build -nameserver 172.30.0.10 -options ndots:5 diff --git a/crates/stackable-operator/src/cli.rs b/crates/stackable-operator/src/cli.rs index 022502564..8bfeb26c9 100644 --- a/crates/stackable-operator/src/cli.rs +++ b/crates/stackable-operator/src/cli.rs @@ -115,7 +115,10 @@ use clap::Args; use product_config::ProductConfigManager; use snafu::{ResultExt, Snafu}; -use crate::{logging::TracingTarget, namespace::WatchNamespace}; +use crate::{ + logging::TracingTarget, namespace::WatchNamespace, + utils::cluster_info::KubernetesClusterInfoOpts, +}; pub const AUTHOR: &str = "Stackable GmbH - info@stackable.tech"; @@ -171,15 +174,19 @@ pub enum Command { /// common: ProductOperatorRun, /// } /// use clap::Parser; -/// use stackable_operator::logging::TracingTarget; -/// use stackable_operator::namespace::WatchNamespace; +/// use stackable_operator::{ +/// logging::TracingTarget, +/// namespace::WatchNamespace, +/// }; +/// /// let opts = Command::::parse_from(["foobar-operator", "run", "--name", "foo", "--product-config", "bar", "--watch-namespace", "foobar"]); /// assert_eq!(opts, Command::Run(Run { /// name: "foo".to_string(), /// common: ProductOperatorRun { /// product_config: ProductConfigPath::from("bar".as_ref()), /// watch_namespace: WatchNamespace::One("foobar".to_string()), -/// tracing_target: TracingTarget::None +/// tracing_target: TracingTarget::None, +/// cluster_info_opts: Default::default(), /// }, /// })); /// ``` @@ -205,12 +212,17 @@ pub struct ProductOperatorRun { /// Provides the path to a product-config file #[arg(long, short = 'p', value_name = "FILE", default_value = "", env)] pub product_config: ProductConfigPath, + /// Provides a specific namespace to watch (instead of watching all namespaces) #[arg(long, env, default_value = "")] pub watch_namespace: WatchNamespace, + /// Tracing log collector system #[arg(long, env, default_value_t, value_enum)] pub tracing_target: TracingTarget, + + #[command(flatten)] + pub cluster_info_opts: KubernetesClusterInfoOpts, } /// A path to a [`ProductConfigManager`] spec file @@ -384,6 +396,7 @@ mod tests { product_config: ProductConfigPath::from("bar".as_ref()), watch_namespace: WatchNamespace::One("foo".to_string()), tracing_target: TracingTarget::None, + cluster_info_opts: Default::default(), } ); @@ -395,6 +408,7 @@ mod tests { product_config: ProductConfigPath::from("bar".as_ref()), watch_namespace: WatchNamespace::All, tracing_target: TracingTarget::None, + cluster_info_opts: Default::default(), } ); @@ -407,6 +421,7 @@ mod tests { product_config: ProductConfigPath::from("bar".as_ref()), watch_namespace: WatchNamespace::One("foo".to_string()), tracing_target: TracingTarget::None, + cluster_info_opts: Default::default(), } ); } diff --git a/crates/stackable-operator/src/client.rs b/crates/stackable-operator/src/client.rs index fec3e3501..de2b5f870 100644 --- a/crates/stackable-operator/src/client.rs +++ b/crates/stackable-operator/src/client.rs @@ -1,23 +1,29 @@ -use crate::kvp::LabelSelectorExt; -use crate::utils::cluster_domain::{self, retrieve_cluster_domain, KUBERNETES_CLUSTER_DOMAIN}; +use std::{ + convert::TryFrom, + fmt::{Debug, Display}, +}; use either::Either; use futures::StreamExt; -use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; -use k8s_openapi::{ClusterResourceScope, NamespaceResourceScope}; -use kube::api::{DeleteParams, ListParams, Patch, PatchParams, PostParams, Resource, ResourceExt}; -use kube::client::Client as KubeClient; -use kube::core::Status; -use kube::runtime::wait::delete::delete_and_finalize; -use kube::runtime::{watcher, WatchStreamExt}; -use kube::{Api, Config}; -use serde::de::DeserializeOwned; -use serde::Serialize; +use k8s_openapi::{ + apimachinery::pkg::apis::meta::v1::LabelSelector, ClusterResourceScope, NamespaceResourceScope, +}; +use kube::{ + api::{DeleteParams, ListParams, Patch, PatchParams, PostParams, Resource, ResourceExt}, + client::Client as KubeClient, + core::Status, + runtime::{wait::delete::delete_and_finalize, watcher, WatchStreamExt}, + Api, Config, +}; +use serde::{de::DeserializeOwned, Serialize}; use snafu::{OptionExt, ResultExt, Snafu}; -use std::convert::TryFrom; -use std::fmt::{Debug, Display}; use tracing::trace; +use crate::{ + kvp::LabelSelectorExt, + utils::cluster_info::{KubernetesClusterInfo, KubernetesClusterInfoOpts}, +}; + pub type Result = std::result::Result; #[derive(Debug, Snafu)] @@ -78,9 +84,6 @@ pub enum Error { #[snafu(display("unable to create kubernetes client"))] CreateKubeClient { source: kube::Error }, - - #[snafu(display("unable to to resolve kubernetes cluster domain"))] - ResolveKubernetesClusterDomain { source: cluster_domain::Error }, } /// This `Client` can be used to access Kubernetes. @@ -93,6 +96,8 @@ pub struct Client { delete_params: DeleteParams, /// Default namespace as defined in the kubeconfig this client has been created from. pub default_namespace: String, + + pub kubernetes_cluster_info: KubernetesClusterInfo, } impl Client { @@ -100,6 +105,7 @@ impl Client { client: KubeClient, field_manager: Option, default_namespace: String, + kubernetes_cluster_info: KubernetesClusterInfo, ) -> Self { Client { client, @@ -113,6 +119,7 @@ impl Client { }, delete_params: DeleteParams::default(), default_namespace, + kubernetes_cluster_info, } } @@ -517,9 +524,11 @@ impl Client { /// use stackable_operator::client::{Client, initialize_operator}; /// /// #[tokio::main] - /// async fn main(){ + /// async fn main() { /// - /// let client: Client = initialize_operator(None).await.expect("Unable to construct client."); + /// let client = initialize_operator(None, &Default::default()) + /// .await + /// .expect("Unable to construct client."); /// let watcher_config: watcher::Config = /// watcher::Config::default().fields(&format!("metadata.name=nonexistent-pod")); /// @@ -626,38 +635,45 @@ where } } -pub async fn initialize_operator(field_manager: Option) -> Result { - let _ = KUBERNETES_CLUSTER_DOMAIN - .set(retrieve_cluster_domain().context(ResolveKubernetesClusterDomainSnafu)?); - create_client(field_manager).await -} - -async fn create_client(field_manager: Option) -> Result { +pub async fn initialize_operator( + field_manager: Option, + cluster_info_opts: &KubernetesClusterInfoOpts, +) -> Result { let kubeconfig: Config = kube::Config::infer() .await .map_err(kube::Error::InferConfig) .context(InferKubeConfigSnafu)?; let default_namespace = kubeconfig.default_namespace.clone(); let client = kube::Client::try_from(kubeconfig).context(CreateKubeClientSnafu)?; - Ok(Client::new(client, field_manager, default_namespace)) + let cluster_info = KubernetesClusterInfo::new(cluster_info_opts); + + Ok(Client::new( + client, + field_manager, + default_namespace, + cluster_info, + )) } #[cfg(test)] mod tests { + use std::{collections::BTreeMap, time::Duration}; + use futures::StreamExt; - use k8s_openapi::api::core::v1::{Container, Pod, PodSpec}; - use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; - use kube::api::{ObjectMeta, PostParams, ResourceExt}; - use kube::runtime::watcher; - use kube::runtime::watcher::Event; - use std::collections::BTreeMap; - use std::time::Duration; + use k8s_openapi::{ + api::core::v1::{Container, Pod, PodSpec}, + apimachinery::pkg::apis::meta::v1::LabelSelector, + }; + use kube::{ + api::{ObjectMeta, PostParams, ResourceExt}, + runtime::watcher::{self, Event}, + }; use tokio::time::error::Elapsed; #[tokio::test] #[ignore = "Tests depending on Kubernetes are not ran by default"] async fn k8s_test_wait_created() { - let client = super::create_client(None) + let client = super::initialize_operator(None, &Default::default()) .await .expect("KUBECONFIG variable must be configured."); @@ -735,7 +751,7 @@ mod tests { #[tokio::test] #[ignore = "Tests depending on Kubernetes are not ran by default"] async fn k8s_test_wait_created_timeout() { - let client = super::create_client(None) + let client = super::initialize_operator(None, &Default::default()) .await .expect("KUBECONFIG variable must be configured."); @@ -755,7 +771,7 @@ mod tests { #[tokio::test] #[ignore = "Tests depending on Kubernetes are not ran by default"] async fn k8s_test_list_with_label_selector() { - let client = super::create_client(None) + let client = super::initialize_operator(None, &Default::default()) .await .expect("KUBECONFIG variable must be configured."); diff --git a/crates/stackable-operator/src/utils/cluster_domain.rs b/crates/stackable-operator/src/utils/cluster_domain.rs deleted file mode 100644 index c690369f6..000000000 --- a/crates/stackable-operator/src/utils/cluster_domain.rs +++ /dev/null @@ -1,190 +0,0 @@ -use std::{env, path::PathBuf, str::FromStr, sync::OnceLock}; - -use snafu::{OptionExt, ResultExt, Snafu}; -use tracing::instrument; - -use crate::commons::networking::DomainName; - -const KUBERNETES_CLUSTER_DOMAIN_ENV: &str = "KUBERNETES_CLUSTER_DOMAIN"; -const KUBERNETES_SERVICE_HOST_ENV: &str = "KUBERNETES_SERVICE_HOST"; - -const KUBERNETES_CLUSTER_DOMAIN_DEFAULT: &str = "cluster.local"; -const RESOLVE_CONF_FILE_PATH: &str = "/etc/resolv.conf"; - -#[derive(Debug, Snafu)] -pub enum Error { - #[snafu(display("failed to read resolv config from {RESOLVE_CONF_FILE_PATH:?}"))] - ReadResolvConfFile { source: std::io::Error }, - - #[snafu(display("failed to parse {cluster_domain:?} as domain name"))] - ParseDomainName { - source: crate::validation::Errors, - cluster_domain: String, - }, - - #[snafu(display(r#"unable to find "search" entry"#))] - NoSearchEntry, - - #[snafu(display( - r#"unable to find the Kubernetes service domain, which needs to start with "svc.""# - ))] - FindKubernetesServiceDomain, -} - -/// Tries to retrieve the Kubernetes cluster domain. -/// -/// 1. Return `KUBERNETES_CLUSTER_DOMAIN` if set, otherwise -/// 2. Return the cluster domain parsed from the `/etc/resolv.conf` file if `KUBERNETES_SERVICE_HOST` -/// is set, otherwise fall back to `cluster.local`. -/// -/// This variable is initialized in [`crate::client::initialize_operator`], which is called in the -/// main function. It can be used as suggested below. -/// -/// ## Usage -/// -/// ```no_run -/// use stackable_operator::utils::cluster_domain::KUBERNETES_CLUSTER_DOMAIN; -/// -/// let kubernetes_cluster_domain = KUBERNETES_CLUSTER_DOMAIN.get() -/// .expect("KUBERNETES_CLUSTER_DOMAIN must first be set by calling initialize_operator"); -/// -/// tracing::info!(%kubernetes_cluster_domain, "Found cluster domain"); -/// ``` -/// -/// ## See -/// -/// - -/// - -pub static KUBERNETES_CLUSTER_DOMAIN: OnceLock = OnceLock::new(); - -#[instrument] -pub(crate) fn retrieve_cluster_domain() -> Result { - // 1. Read KUBERNETES_CLUSTER_DOMAIN env var - tracing::debug!("Trying to determine the Kubernetes cluster domain..."); - - match env::var(KUBERNETES_CLUSTER_DOMAIN_ENV) { - Ok(cluster_domain) if !cluster_domain.is_empty() => { - let cluster_domain = DomainName::from_str(&cluster_domain) - .context(ParseDomainNameSnafu { cluster_domain })?; - tracing::info!( - %cluster_domain, - "Using Kubernetes cluster domain from {KUBERNETES_CLUSTER_DOMAIN_ENV:?} environment variable" - ); - return Ok(cluster_domain); - } - _ => {} - }; - - // 2. If no env var is set, check if we run in a clustered (Kubernetes/Openshift) environment - // by checking if KUBERNETES_SERVICE_HOST is set: If not default to 'cluster.local'. - tracing::debug!( - "Trying to determine the operator runtime environment as environment variable \ - {KUBERNETES_CLUSTER_DOMAIN_ENV:?} is not set" - ); - - match env::var(KUBERNETES_SERVICE_HOST_ENV) { - Ok(_) => { - let cluster_domain = retrieve_cluster_domain_from_resolv_conf(RESOLVE_CONF_FILE_PATH)?; - let cluster_domain = DomainName::from_str(&cluster_domain) - .context(ParseDomainNameSnafu { cluster_domain })?; - - tracing::info!( - %cluster_domain, - "Using Kubernetes cluster domain from {RESOLVE_CONF_FILE_PATH:?} file" - ); - - Ok(cluster_domain) - } - Err(_) => { - let cluster_domain = DomainName::from_str(KUBERNETES_CLUSTER_DOMAIN_DEFAULT) - .expect("KUBERNETES_CLUSTER_DOMAIN_DEFAULT constant must a valid domain"); - - tracing::info!( - %cluster_domain, - "Could not determine Kubernetes cluster domain as the operator is not running within Kubernetes, assuming default Kubernetes cluster domain" - ); - - Ok(cluster_domain) - } - } -} - -#[instrument] -fn retrieve_cluster_domain_from_resolv_conf( - path: impl Into + std::fmt::Debug, -) -> Result { - let path = path.into(); - let content = std::fs::read_to_string(&path) - .inspect_err(|error| { - tracing::error!(%error, path = %path.display(), "Cannot read resolv conf"); - }) - .context(ReadResolvConfFileSnafu)?; - - // If there are multiple search directives, only the last search directive is relevant. - // See `man 5 resolv.conf` - let last_search_entry = content - .lines() - .rev() - .map(|entry| entry.trim()) - .find(|&entry| entry.starts_with("search")) - .map(|entry| entry.trim_start_matches("search").trim()) - .context(NoSearchEntrySnafu)?; - - // We only care about entries starting with "svc." to limit the entries to the ones used by - // Kubernetes for Services. - let shortest_entry = last_search_entry - .split_ascii_whitespace() - // Normally there should only be one such entry, but we take the first on in any case. - .find(|&entry| entry.starts_with("svc.")) - // Strip the "svc." prefix to get only the cluster domain. - .map(|entry| entry.trim_start_matches("svc.").trim_end()) - .context(FindKubernetesServiceDomainSnafu)?; - - // NOTE (@Techassi): This is really sad and bothers me more than I would like to admit. This - // clone could be removed by using the code directly in the calling function. But that would - // remove the possibility to easily test the parsing. - Ok(shortest_entry.to_owned()) -} - -#[cfg(test)] -mod tests { - use std::path::PathBuf; - - use super::*; - use rstest::rstest; - - #[test] - fn use_different_kubernetes_cluster_domain_value() { - let cluster_domain = "my-cluster.local".to_string(); - - // set different domain via env var - unsafe { - env::set_var(KUBERNETES_CLUSTER_DOMAIN_ENV, &cluster_domain); - } - - // initialize the lock - let _ = KUBERNETES_CLUSTER_DOMAIN.set(retrieve_cluster_domain().unwrap()); - - assert_eq!( - cluster_domain, - KUBERNETES_CLUSTER_DOMAIN.get().unwrap().to_string() - ); - } - - #[rstest] - fn parse_resolv_conf_pass( - #[files("fixtures/cluster_domain/pass/*.resolv.conf")] path: PathBuf, - ) { - assert_eq!( - retrieve_cluster_domain_from_resolv_conf(path).unwrap(), - KUBERNETES_CLUSTER_DOMAIN_DEFAULT - ); - } - - #[rstest] - fn parse_resolv_conf_fail( - #[files("fixtures/cluster_domain/fail/*.resolv.conf")] path: PathBuf, - ) { - assert!(retrieve_cluster_domain_from_resolv_conf(path).is_err()); - } -} diff --git a/crates/stackable-operator/src/utils/cluster_info.rs b/crates/stackable-operator/src/utils/cluster_info.rs new file mode 100644 index 000000000..d31275668 --- /dev/null +++ b/crates/stackable-operator/src/utils/cluster_info.rs @@ -0,0 +1,45 @@ +use std::str::FromStr; + +use crate::commons::networking::DomainName; + +const KUBERNETES_CLUSTER_DOMAIN_DEFAULT: &str = "cluster.local"; + +/// Some information that we know about the Kubernetes cluster. +#[derive(Debug, Clone)] +pub struct KubernetesClusterInfo { + /// The Kubernetes cluster domain, typically `cluster.local`. + pub cluster_domain: DomainName, +} + +#[derive(clap::Parser, Debug, Default, PartialEq, Eq)] +pub struct KubernetesClusterInfoOpts { + /// Kubernetes cluster domain, usually this is `cluster.local`. + // We are not using a default value here, as operators will probably do an more advanced + // auto-detection of the cluster domain in case it is not specified in the future. + #[arg(long, env)] + pub kubernetes_cluster_domain: Option, +} + +impl KubernetesClusterInfo { + pub fn new(cluster_info_opts: &KubernetesClusterInfoOpts) -> Self { + let cluster_domain = match &cluster_info_opts.kubernetes_cluster_domain { + Some(cluster_domain) => { + tracing::info!(%cluster_domain, "Using configured Kubernetes cluster domain"); + + cluster_domain.clone() + } + None => { + // TODO(sbernauer): Do some sort of advanced auto-detection, see https://github.com/stackabletech/issues/issues/436. + // There have been attempts of parsing the `/etc/resolv.conf`, but they have been + // reverted. Please read on the linked issue for details. + let cluster_domain = DomainName::from_str(KUBERNETES_CLUSTER_DOMAIN_DEFAULT) + .expect("KUBERNETES_CLUSTER_DOMAIN_DEFAULT constant must a valid domain"); + tracing::info!(%cluster_domain, "Defaulting Kubernetes cluster domain as it has not been configured"); + + cluster_domain + } + }; + + Self { cluster_domain } + } +} diff --git a/crates/stackable-operator/src/utils/mod.rs b/crates/stackable-operator/src/utils/mod.rs index d6d0021b2..e400072d5 100644 --- a/crates/stackable-operator/src/utils/mod.rs +++ b/crates/stackable-operator/src/utils/mod.rs @@ -1,5 +1,5 @@ pub mod bash; -pub mod cluster_domain; +pub mod cluster_info; pub mod crds; pub mod logging; mod option;