diff --git a/src/dns/server.rs b/src/dns/server.rs index 8815fbb1..e681417d 100644 --- a/src/dns/server.rs +++ b/src/dns/server.rs @@ -25,7 +25,7 @@ use hickory_proto::error::ProtoErrorKind; use hickory_proto::op::ResponseCode; use hickory_proto::rr::rdata::{A, AAAA, CNAME}; use hickory_proto::rr::{Name, RData, Record, RecordType}; -use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::{NameServerConfig, ResolverConfig, ResolverOpts}; use hickory_resolver::system_conf::read_system_conf; use hickory_server::authority::LookupError; use hickory_server::server::Request; @@ -331,6 +331,10 @@ impl Store { // Iterate over all possible aliases for the requested hostname from the perspective of // the client (e.g. , ., ..svc, ..svc.cluster.local). + trace!( + "checking aliases {:#?}", + self.get_aliases(client, requested_name) + ); for alias in self.get_aliases(client, requested_name) { // For each alias, try matching against all possible wildcards. // @@ -340,6 +344,10 @@ impl Store { // 'www.example.com' (the alias, itself) // '*.example.com' // '*.com' + trace!( + "checking against wildcards {:#?}", + get_wildcards(&alias.name) + ); for mut search_name in get_wildcards(&alias.name) { // Convert the name to a string for lookup, removing the trailing '.'. search_name.set_fqdn(false); @@ -360,7 +368,8 @@ impl Store { // its better than *ALL* ServiceEntry doing this .filter(|service| { // Domain will be like `.svc.cluster.local.` (trailing .), so ignore the last character. - let domain = self.svc_domain.to_utf8(); + let domain = ".".to_string() + &self.svc_domain.to_utf8(); + let domain = domain .strip_suffix('.') .expect("the svc domain must have a trailing '.'"); @@ -538,6 +547,7 @@ impl Resolver for Store { // Find the service for the requested host. let requested_name = Name::from(request.query().name().clone()); + trace!("incoming request {requested_name:?}"); let Some(service_match) = self.find_server(&client, &requested_name) else { trace!("unknown host, forwarding"); // Unknown host. Forward to the upstream resolver. @@ -607,6 +617,7 @@ impl Resolver for Store { } /// An alias for the requested hostname. +#[derive(Debug)] struct Alias { /// The name to be used in the search. name: Name, @@ -623,6 +634,7 @@ impl Display for Alias { } /// Created for an alias generated by stripping a search domain from the requested host. +#[derive(Debug)] struct Stripped { /// The requested hostname with the `search_domain` removed. name: Name, @@ -632,6 +644,7 @@ struct Stripped { } /// Returned when a server was successfully found for the requested hostname. +#[derive(Debug)] struct ServerMatch { /// The hostname that was used to find the service. This is identical to the /// service hostname, except that it is an FQDN [Name]. @@ -715,13 +728,17 @@ pub trait Forwarder: Send + Sync { } /// Creates the appropriate DNS forwarder for the proxy mode. -pub fn forwarder_for_mode(proxy_mode: ProxyMode) -> Result, Error> { +pub fn forwarder_for_mode( + proxy_mode: ProxyMode, + cluster_domain: String, +) -> Result, Error> { Ok(match proxy_mode { ProxyMode::Shared => { // TODO(https://github.com/istio/ztunnel/issues/555): Use pod settings if available. - Arc::new(SystemForwarder::new()?) + // Today, we only support the basic namespace awareness + Arc::new(SystemForwarder::new(true, cluster_domain)?) } - ProxyMode::Dedicated => Arc::new(SystemForwarder::new()?), + ProxyMode::Dedicated => Arc::new(SystemForwarder::new(false, cluster_domain)?), }) } @@ -730,12 +747,17 @@ pub fn forwarder_for_mode(proxy_mode: ProxyMode) -> Result, E /// that would have been used by the client. For shared proxy mode, this will be the resolver /// configuration for the ztunnel DaemonSet (i.e. node-level resolver settings). struct SystemForwarder { - search_domains: Vec, + search_domains: SearchDomains, resolver: Arc, } +enum SearchDomains { + Static(Vec), + Dynamic(String), +} + impl SystemForwarder { - fn new() -> Result { + fn new(per_pod: bool, cluster_domain: String) -> Result { // Get the resolver config from /etc/resolv.conf. let (cfg, opts) = read_system_conf().map_err(|e| Error::Generic(Box::new(e)))?; @@ -744,6 +766,24 @@ impl SystemForwarder { let search_domains = cfg.search().to_vec(); let name_servers = cfg.name_servers().to_vec(); + Self::from_parts( + per_pod, + cluster_domain, + opts, + domain, + search_domains, + name_servers, + ) + } + + fn from_parts( + per_pod: bool, + cluster_domain: String, + opts: ResolverOpts, + domain: Option, + search_domains: Vec, + name_servers: Vec, + ) -> Result { // Remove the search list before passing to the resolver. The local resolver that // sends the original request will already have search domains applied. We want // this resolver to simply use the request host rather than re-adding search domains. @@ -753,6 +793,13 @@ impl SystemForwarder { let resolver = Arc::new( dns::forwarder::Forwarder::new(cfg, opts).map_err(|e| Error::Generic(Box::new(e)))?, ); + let search_domains = if per_pod { + // Standard Kubernetes search is 'istio-system.svc.cluster.local svc.cluster.local cluster.local' + // But we need a *per-pod* one, or we will search for the wrong thing + SearchDomains::Dynamic(cluster_domain) + } else { + SearchDomains::Static(search_domains) + }; Ok(Self { search_domains, @@ -763,8 +810,17 @@ impl SystemForwarder { #[async_trait::async_trait] impl Forwarder for SystemForwarder { - fn search_domains(&self, _: &Workload) -> Vec { - self.search_domains.clone() + fn search_domains(&self, wl: &Workload) -> Vec { + // TODO: https://github.com/istio/ztunnel/issues/555 really get this from pods + // Today we hardcode the default search assumptions + match &self.search_domains { + SearchDomains::Static(s) => s.clone(), + SearchDomains::Dynamic(cluster_domain) => vec![ + Name::from_utf8(format!("{}.svc.{cluster_domain}", wl.namespace)).unwrap(), + Name::from_utf8(format!("svc.{cluster_domain}")).unwrap(), + Name::from_utf8(cluster_domain).unwrap(), + ], + } } async fn forward( @@ -787,7 +843,6 @@ mod tests { use prometheus_client::registry::Registry; use super::*; - use crate::metrics; use crate::strng; use crate::test_helpers::dns::{ a, aaaa, cname, ip, ipv4, ipv6, n, new_message, new_tcp_client, new_udp_client, run_dns, @@ -800,6 +855,7 @@ mod tests { use crate::xds::istio::workload::PortList as XdsPortList; use crate::xds::istio::workload::Service as XdsService; use crate::xds::istio::workload::Workload as XdsWorkload; + use crate::{metrics, test_helpers}; const NS1: &str = "ns1"; const NS2: &str = "ns2"; @@ -937,8 +993,6 @@ mod tests { assert_eq!(expected, actual); } - // TODO(nmittler): Test headless services (https://github.com/istio/ztunnel/issues/554). - // TODO(nmittler): Test truncation once fixed (https://github.com/bluejekyll/trust-dns/issues/1973). #[tokio::test] async fn lookup() { initialize_telemetry(); @@ -1425,6 +1479,39 @@ mod tests { assert_eq!(75, resp.answers().len(), "expected UDP to be truncated"); } + #[test] + fn search_domains() { + let opts = ResolverOpts::default(); + let search = vec![ + as_name("istio-system.svc.cluster.local"), + as_name("svc.cluster.local"), + as_name("cluster.local"), + ]; + let f = SystemForwarder::from_parts( + true, + "cluster.local".to_string(), + opts, + None, + search, + vec![], + ) + .unwrap(); + let make_workload = |ns: &str| Workload { + name: "something".into(), + namespace: ns.into(), + ..test_helpers::test_default_workload() + }; + + assert_eq!( + f.search_domains(&make_workload("default-ns")), + vec![ + as_name("default-ns.svc.cluster.local"), + as_name("svc.cluster.local"), + as_name("cluster.local"), + ] + ); + } + /// Sort the IP records so that we can directly compare them to the expected. The resulting /// list will contain CNAME first, followed by A, and then by AAAA. Within each record type, /// records will be sorted in ascending order by their string representation.