Skip to content

Commit

Permalink
chore(policy): polish policy controller logging
Browse files Browse the repository at this point in the history
There are a few things about the policy controller logging that can be cleaned
up for consistency and clarity:

* We frequently log ERROR messages when processing resources with unexpected
  values. These messages are more appropriately emitted at WARN--we want to
  surface these situations, but they are not really exceptional.
* The leadership status of the status controller is not logged at INFO level, so
  it's not possible to know about status changes without DEBUG logging.
* We generally use Sentence-cased log messages when emitting user-facing
  messages. There are a few situations where we are not consistent.
* The status controller reconciliation logging is somewhat noisy and misleading.
* The status controller does not log any messages when patching resources.

```
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder has changed
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder has changed
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder has changed
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
DEBUG status::Index: linkerd_policy_controller_k8s_status::index: Lease holder reconciling cluster index.name=linkerd-destination-74d7fdc45d-xfb8l
```

The "Lease holder has changed" message actually indicates that the _lease_ has
changed, though the holder may be unchanged.

To improve logging clarity, this change does the following:

* Adds an INFO level log when the leadership status of the controller changes.
* Adds an INFO level log when the status controller patches resources.
* Adds DEBUG level logs when the status controller patches resources.
* Reconciliation housekeeping logging is moved to TRACE level.
* Consistently uses sentence capitalization in user-facing log messages
* Reduces ERROR messages to WARN when handling invalid user-provided data
  (including cluster resources). This ensures that ERRORs are reserved for
  exceptional policy controller states.
  • Loading branch information
olix0r committed Nov 24, 2024
1 parent 3c188d6 commit cbf6550
Show file tree
Hide file tree
Showing 15 changed files with 112 additions and 79 deletions.
2 changes: 1 addition & 1 deletion policy-controller/grpc/src/inbound.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ fn to_server(srv: &InboundServer, cluster_networks: &[IpNet]) -> proto::Server {
kind: match srv.protocol {
ProxyProtocol::Detect { timeout } => Some(proto::proxy_protocol::Kind::Detect(
proto::proxy_protocol::Detect {
timeout: timeout.try_into().map_err(|error| tracing::warn!(%error, "failed to convert protocol detect timeout to protobuf")).ok(),
timeout: timeout.try_into().map_err(|error| tracing::warn!(%error, "Failed to convert protocol detect timeout to protobuf")).ok(),
http_routes: http::to_route_list(&srv.http_routes, cluster_networks),
http_local_rate_limit: srv.ratelimit.as_ref().map(to_rate_limit),
},
Expand Down
59 changes: 34 additions & 25 deletions policy-controller/grpc/src/outbound.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,40 +531,49 @@ fn default_backend(policy: &OutboundPolicy, original_dst: Option<SocketAddr>) ->
},
)),
},

ParentInfo::EgressNetwork {
namespace, name, ..
} => match original_dst {
Some(original_dst) => outbound::Backend {
metadata: Some(Metadata {
kind: Some(metadata::Kind::Resource(api::meta::Resource {
group: "policy.linkerd.io".to_string(),
kind: "EgressNetwork".to_string(),
name,
namespace,
section: Default::default(),
port: u16::from(policy.port).into(),
})),
}),
} => {
debug_assert!(
original_dst.is_some(),
"Must not serve EgressNetwork for named lookups; IP:PORT required"
);
let metadata = Some(Metadata {
kind: Some(metadata::Kind::Resource(api::meta::Resource {
group: "policy.linkerd.io".to_string(),
kind: "EgressNetwork".to_string(),
name,
namespace,
section: Default::default(),
port: u16::from(policy.port).into(),
})),
});

let Some(addr) = original_dst else {
tracing::error!(
?metadata,
"Unexpected state: EgressNetworks should only be returned when lookup is by IP:PORT; synthesizing invalid backend"
);
return outbound::Backend {
metadata,
queue: None,
kind: None,
};
};

outbound::Backend {
metadata,
queue: Some(default_queue_config()),
kind: Some(outbound::backend::Kind::Forward(
destination::WeightedAddr {
addr: Some(original_dst.into()),
addr: Some(addr.into()),
weight: 1,
..Default::default()
},
)),
},
None => {
tracing::error!("no original_dst for Egresspolicy");
outbound::Backend {
metadata: Some(Metadata {
kind: Some(metadata::Kind::Default("invalid".to_string())),
}),
queue: None,
kind: None,
}
}
},
}
}
}

Expand Down Expand Up @@ -632,7 +641,7 @@ pub(crate) fn convert_duration(
duration
.try_into()
.map_err(|error| {
tracing::error!(%error, "Invalid {name} duration");
tracing::warn!(%error, "Invalid {name} duration");
})
.ok()
}
Expand Down
4 changes: 2 additions & 2 deletions policy-controller/grpc/src/outbound/grpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,11 +454,11 @@ fn convert_to_filter(filter: Filter) -> outbound::grpc_route::Filter {
convert_request_header_modifier_filter(filter),
)),
Filter::RequestRedirect(filter) => {
tracing::warn!(filter = ?filter, "declining to convert invalid filter type for GrpcRoute");
tracing::warn!(filter = ?filter, "Declining to convert invalid filter type for GrpcRoute");
None
}
Filter::ResponseHeaderModifier(filter) => {
tracing::warn!(filter = ?filter, "declining to convert invalid filter type for GrpcRoute");
tracing::warn!(filter = ?filter, "Declining to convert invalid filter type for GrpcRoute");
None
}
},
Expand Down
2 changes: 1 addition & 1 deletion policy-controller/grpc/src/workload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl FromStr for Workload {
fn from_str(s: &str) -> Result<Self, tonic::Status> {
if s.starts_with('{') {
return serde_json::from_str(s).map_err(|error| {
tracing::error!(%error, "Invalid {s} workload string");
tracing::warn!(%error, "Invalid {s} workload string");
tonic::Status::invalid_argument(format!("Invalid workload: {}", s))
});
}
Expand Down
2 changes: 1 addition & 1 deletion policy-controller/k8s/api/src/labels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ impl Expression {
(Operator::Exists, key, None) => labels.contains_key(key),
(Operator::DoesNotExist, key, None) => !labels.contains_key(key),
(operator, key, values) => {
tracing::warn!(?operator, %key, ?values, "illegal match expression");
tracing::warn!(?operator, %key, ?values, "Illegal match expression");
false
}
}
Expand Down
12 changes: 6 additions & 6 deletions policy-controller/k8s/index/src/inbound/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ impl kubert::index::IndexNamespacedResource<k8s::Pod> for Index {
Ok(None) => {}
Ok(Some(pod)) => pod.reindex_servers(&ns.policy, &self.authentications),
Err(error) => {
tracing::error!(%error, "Illegal pod update");
tracing::warn!(%error, "Illegal pod update");
}
}
}
Expand Down Expand Up @@ -523,7 +523,7 @@ impl kubert::index::IndexNamespacedResource<k8s::external_workload::ExternalWork
// Update, so re-index
Ok(Some(workload)) => workload.reindex_servers(&ns.policy, &self.authentications),
Err(error) => {
tracing::error!(%error, "Illegal external workload update");
tracing::warn!(%error, "Illegal external workload update");
}
}
}
Expand Down Expand Up @@ -626,7 +626,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::ServerAuthorization> fo
Ok(meta) => self.ns_or_default_with_reindex(ns, move |ns| {
ns.policy.update_server_authz(name, meta)
}),
Err(error) => tracing::error!(%error, "Illegal server authorization update"),
Err(error) => tracing::warn!(%error, "Illegal server authorization update"),
}
}

Expand Down Expand Up @@ -660,7 +660,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::ServerAuthorization> fo
.added
.push((name, saz)),
Err(error) => {
tracing::error!(ns = %namespace, %name, %error, "Illegal server authorization update")
tracing::warn!(ns = %namespace, %name, %error, "Illegal server authorization update")
}
}
}
Expand Down Expand Up @@ -746,7 +746,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::AuthorizationPolicy> fo
.added
.push((name, spec)),
Err(error) => {
tracing::error!(ns = %namespace, %name, %error, "Illegal server authorization update")
tracing::warn!(ns = %namespace, %name, %error, "Illegal server authorization update")
}
}
}
Expand Down Expand Up @@ -985,7 +985,7 @@ impl kubert::index::IndexNamespacedResource<k8s::policy::HttpLocalRateLimitPolic
.added
.push((name, spec)),
Err(error) => {
tracing::error!(ns = %namespace, %name, %error, "Illegal server ratelimit update")
tracing::warn!(ns = %namespace, %name, %error, "Illegal server ratelimit update")
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions policy-controller/k8s/index/src/inbound/ratelimit_policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ impl TryFrom<k8s::policy::HttpLocalRateLimitPolicy> for Spec {
let type_ = match condition.type_.as_ref() {
"Accepted" => ConditionType::Accepted,
condition_type => {
tracing::error!(%status.target_ref.name, %condition_type, "Unexpected condition type found in status");
tracing::warn!(%status.target_ref.name, %condition_type, "Unexpected condition type found in status");
return None;
}
};
let status = match condition.status.as_ref() {
"True" => true,
"False" => false,
condition_status => {
tracing::error!(%status.target_ref.name, %type_, %condition_status, "Unexpected condition status found in status");
tracing::warn!(%status.target_ref.name, %type_, %condition_status, "Unexpected condition status found in status");
return None
},
};
Expand Down
4 changes: 2 additions & 2 deletions policy-controller/k8s/index/src/inbound/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,15 @@ impl Status {
let type_ = match condition.type_.as_ref() {
"Accepted" => ConditionType::Accepted,
condition_type => {
tracing::error!(%status.parent_ref.name, %condition_type, "Unexpected condition type found in parent status");
tracing::warn!(%status.parent_ref.name, %condition_type, "Unexpected condition type found in parent status");
return None;
}
};
let status = match condition.status.as_ref() {
"True" => true,
"False" => false,
condition_status => {
tracing::error!(%status.parent_ref.name, %type_, %condition_status, "Unexpected condition status found in parent status");
tracing::warn!(%status.parent_ref.name, %type_, %condition_status, "Unexpected condition status found in parent status");
return None
},
};
Expand Down
4 changes: 2 additions & 2 deletions policy-controller/k8s/index/src/inbound/workload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ fn container_http_probe_paths(
match http::Uri::try_from(path) {
Ok(uri) => Some((port, uri.path().to_string())),
Err(error) => {
tracing::warn!(%error, path, "invalid probe path");
tracing::warn!(%error, path, "Invalid probe path");
None
}
}
Expand Down Expand Up @@ -161,7 +161,7 @@ impl Settings {
};

let default_policy = default_policy(anns).unwrap_or_else(|error| {
tracing::warn!(%error, "invalid default policy annotation value");
tracing::warn!(%error, "Invalid default policy annotation value");
None
});

Expand Down
26 changes: 13 additions & 13 deletions policy-controller/k8s/index/src/outbound/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,21 +216,21 @@ impl kubert::index::IndexNamespacedResource<Service> for Index {
let ns = service.namespace().expect("Service must have a namespace");
tracing::debug!(name, ns, "indexing service");
let accrual = parse_accrual_config(service.annotations())
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse accrual config"))
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse accrual config"))
.unwrap_or_default();
let opaque_ports =
ports_annotation(service.annotations(), "config.linkerd.io/opaque-ports")
.unwrap_or_else(|| self.namespaces.cluster_info.default_opaque_ports.clone());

let timeouts = parse_timeouts(service.annotations())
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse timeouts"))
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse timeouts"))
.unwrap_or_default();

let http_retry = http::parse_http_retry(service.annotations()).map_err(|error| {
tracing::error!(%error, service=name, namespace=ns, "failed to parse http retry")
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse http retry")
}).unwrap_or_default();
let grpc_retry = grpc::parse_grpc_retry(service.annotations()).map_err(|error| {
tracing::error!(%error, service=name, namespace=ns, "failed to parse grpc retry")
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse grpc retry")
}).unwrap_or_default();

if let Some(cluster_ips) = service
Expand All @@ -252,7 +252,7 @@ impl kubert::index::IndexNamespacedResource<Service> for Index {
self.services_by_ip.insert(addr, service_ref);
}
Err(error) => {
tracing::error!(%error, service=name, cluster_ip, "invalid cluster ip");
tracing::warn!(%error, service=name, cluster_ip, "Invalid cluster ip");
}
}
}
Expand Down Expand Up @@ -318,7 +318,7 @@ impl kubert::index::IndexNamespacedResource<linkerd_k8s_api::EgressNetwork> for
.expect("EgressNetwork must have a namespace");
tracing::debug!(name, ns, "indexing EgressNetwork");
let accrual = parse_accrual_config(egress_network.annotations())
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse accrual config"))
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse accrual config"))
.unwrap_or_default();
let opaque_ports = ports_annotation(
egress_network.annotations(),
Expand All @@ -327,14 +327,14 @@ impl kubert::index::IndexNamespacedResource<linkerd_k8s_api::EgressNetwork> for
.unwrap_or_else(|| self.namespaces.cluster_info.default_opaque_ports.clone());

let timeouts = parse_timeouts(egress_network.annotations())
.map_err(|error| tracing::error!(%error, service=name, namespace=ns, "failed to parse timeouts"))
.map_err(|error| tracing::warn!(%error, service=name, namespace=ns, "Failed to parse timeouts"))
.unwrap_or_default();

let http_retry = http::parse_http_retry(egress_network.annotations()).map_err(|error| {
tracing::error!(%error, service=name, namespace=ns, "failed to parse http retry")
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse http retry")
}).unwrap_or_default();
let grpc_retry = grpc::parse_grpc_retry(egress_network.annotations()).map_err(|error| {
tracing::error!(%error, service=name, namespace=ns, "failed to parse grpc retry")
tracing::warn!(%error, service=name, namespace=ns, "Failed to parse grpc retry")
}).unwrap_or_default();

let egress_net_ref = ResourceRef {
Expand Down Expand Up @@ -678,7 +678,7 @@ impl Namespace {
) {
Ok(route) => route,
Err(error) => {
tracing::error!(%error, "failed to convert route");
tracing::warn!(%error, "Failed to convert route");
return;
}
};
Expand Down Expand Up @@ -773,7 +773,7 @@ impl Namespace {
) {
Ok(route) => route,
Err(error) => {
tracing::error!(%error, "failed to convert route");
tracing::warn!(%error, "Failed to convert route");
return;
}
};
Expand Down Expand Up @@ -868,7 +868,7 @@ impl Namespace {
match tls::convert_route(&self.namespace, route.clone(), cluster_info, resource_info) {
Ok(route) => route,
Err(error) => {
tracing::error!(%error, "failed to convert route");
tracing::warn!(%error, "Failed to convert route");
return;
}
};
Expand Down Expand Up @@ -964,7 +964,7 @@ impl Namespace {
match tcp::convert_route(&self.namespace, route.clone(), cluster_info, resource_info) {
Ok(route) => route,
Err(error) => {
tracing::error!(%error, "failed to convert route");
tracing::warn!(%error, "Failed to convert route");
return;
}
};
Expand Down
Loading

0 comments on commit cbf6550

Please sign in to comment.