From ff9cb81d4d5df4716f356461e3e4bd8207d62e10 Mon Sep 17 00:00:00 2001 From: jaellio Date: Sat, 3 Jul 2021 12:20:37 -0700 Subject: [PATCH] pkg/envoy/ads: add error codes Adds error codes for errors related to Envoy ADS. Part of #2866 Signed-off-by: jaellio --- pkg/envoy/ads/cache_stream.go | 4 +- pkg/envoy/ads/grpc.go | 4 +- pkg/envoy/ads/response.go | 13 ++- pkg/envoy/ads/secrets.go | 4 +- pkg/envoy/ads/server.go | 4 +- pkg/envoy/ads/stream.go | 23 +++-- pkg/envoy/xdsutil.go | 17 ++-- pkg/errcode/errcode.go | 166 ++++++++++++++++++++++++++++++++++ 8 files changed, 213 insertions(+), 22 deletions(-) diff --git a/pkg/envoy/ads/cache_stream.go b/pkg/envoy/ads/cache_stream.go index 15e15fa4f8..250a14c350 100644 --- a/pkg/envoy/ads/cache_stream.go +++ b/pkg/envoy/ads/cache_stream.go @@ -14,6 +14,7 @@ import ( "github.com/openservicemesh/osm/pkg/certificate" "github.com/openservicemesh/osm/pkg/constants" "github.com/openservicemesh/osm/pkg/envoy" + "github.com/openservicemesh/osm/pkg/errcode" "github.com/openservicemesh/osm/pkg/k8s/events" ) @@ -33,7 +34,8 @@ func (s *Server) allPodUpdater() { for _, pod := range allpods { proxy, err := GetProxyFromPod(pod) if err != nil { - log.Error().Err(err).Msgf("Could not get proxy from pod %s/%s", pod.Namespace, pod.Name) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrGettingProxyFromPod.String()). + Msgf("Could not get proxy from pod %s/%s", pod.Namespace, pod.Name) continue } diff --git a/pkg/envoy/ads/grpc.go b/pkg/envoy/ads/grpc.go index d31d39b400..5226b1a62d 100644 --- a/pkg/envoy/ads/grpc.go +++ b/pkg/envoy/ads/grpc.go @@ -10,6 +10,7 @@ import ( "github.com/openservicemesh/osm/pkg/envoy" "github.com/openservicemesh/osm/pkg/envoy/registry" + "github.com/openservicemesh/osm/pkg/errcode" ) func receive(requests chan xds_discovery.DiscoveryRequest, server *xds_discovery.AggregatedDiscoveryService_StreamAggregatedResourcesServer, proxy *envoy.Proxy, quit chan struct{}, proxyRegistry *registry.ProxyRegistry) { @@ -23,7 +24,8 @@ func receive(requests chan xds_discovery.DiscoveryRequest, server *xds_discovery log.Debug().Err(recvErr).Msgf("[grpc] Connection terminated") return } - log.Error().Err(recvErr).Msgf("[grpc] Connection error") + log.Error().Err(recvErr).Str(errcode.Kind, errcode.ErrGRPCConnectionFailed.String()). + Msgf("[grpc] Connection error") return } log.Trace().Msgf("[grpc] Received DiscoveryRequest from Envoy with certificate SerialNumber %s", proxy.GetCertificateSerialNumber()) diff --git a/pkg/envoy/ads/response.go b/pkg/envoy/ads/response.go index 899f7ee1a2..3bb283edc7 100644 --- a/pkg/envoy/ads/response.go +++ b/pkg/envoy/ads/response.go @@ -12,6 +12,7 @@ import ( "github.com/openservicemesh/osm/pkg/configurator" "github.com/openservicemesh/osm/pkg/envoy" + "github.com/openservicemesh/osm/pkg/errcode" ) // getTypeResource invokes the XDS handler (LDS, CDS etc.) to respond to the XDS request containing the requests' type and associated resources @@ -73,7 +74,8 @@ func (s *Server) sendResponse(proxy *envoy.Proxy, server *xds_discovery.Aggregat // Generate the resources for this request resources, err := s.getTypeResources(proxy, finalReq) if err != nil { - log.Error().Err(err).Msgf("Creating %s update for Proxy %s", typeURI.Short(), proxy.GetCertificateCommonName()) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrGeneratingReqResource.String()). + Msgf("Error generating response for typeURI: %s, proxy %s", typeURI.Short(), proxy.String()) thereWereErrors = true continue } @@ -93,7 +95,8 @@ func (s *Server) sendResponse(proxy *envoy.Proxy, server *xds_discovery.Aggregat if s.cacheEnabled { // Store the aggregated resources as a full snapshot if err := s.RecordFullSnapshot(proxy, cacheResourceMap); err != nil { - log.Error().Err(err).Msgf("Failed to record snapshot for proxy %s: %v", proxy.GetCertificateCommonName(), err) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrRecordingSnapshot.String()). + Msgf("Failed to record snapshot for proxy %s: %v", proxy.GetCertificateCommonName(), err) thereWereErrors = true } } @@ -122,7 +125,8 @@ func (s *Server) SendDiscoveryResponse(proxy *envoy.Proxy, request *xds_discover for _, res := range resourcesToSend { proto, err := ptypes.MarshalAny(res) if err != nil { - log.Error().Err(err).Msgf("Error marshalling resource %s for proxy %s", typeURI, proxy.GetCertificateSerialNumber()) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrMarshallingXDSResource.String()). + Msgf("Error marshalling resource %s for proxy %s", typeURI, proxy.GetCertificateSerialNumber()) continue } response.Resources = append(response.Resources, proto) @@ -137,7 +141,8 @@ func (s *Server) SendDiscoveryResponse(proxy *envoy.Proxy, request *xds_discover // Send the response if err := (*server).Send(response); err != nil { - log.Error().Err(err).Msgf("Error sending response for type %s to proxy %s", typeURI.Short(), proxy.String()) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrSendingDiscoveryResponse.String()). + Msgf("Error sending response for type %s to proxy %s", typeURI.Short(), proxy.String()) return err } diff --git a/pkg/envoy/ads/secrets.go b/pkg/envoy/ads/secrets.go index 064c8e973d..a0a2b3ea0c 100644 --- a/pkg/envoy/ads/secrets.go +++ b/pkg/envoy/ads/secrets.go @@ -6,6 +6,7 @@ import ( "github.com/openservicemesh/osm/pkg/catalog" "github.com/openservicemesh/osm/pkg/envoy" "github.com/openservicemesh/osm/pkg/envoy/secrets" + "github.com/openservicemesh/osm/pkg/errcode" ) // makeRequestForAllSecrets constructs an SDS DiscoveryRequest as if an Envoy proxy sent it. @@ -25,7 +26,8 @@ func makeRequestForAllSecrets(proxy *envoy.Proxy, meshCatalog catalog.MeshCatalo // TODO(draychev): The proxy Certificate should revolve around ServiceIdentity, not specific to ServiceAccount [https://github.com/openservicemesh/osm/issues/3186] proxyIdentity, err := envoy.GetServiceIdentityFromProxyCertificate(proxy.GetCertificateCommonName()) if err != nil { - log.Error().Err(err).Msgf("Error looking up proxy identity for proxy %s", proxy.String()) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrGettingServiceIdentity.String()). + Msgf("Error looking up proxy identity for proxy %s", proxy.String()) return nil } diff --git a/pkg/envoy/ads/server.go b/pkg/envoy/ads/server.go index 82bf963b6f..20aaa41c24 100644 --- a/pkg/envoy/ads/server.go +++ b/pkg/envoy/ads/server.go @@ -20,6 +20,7 @@ import ( "github.com/openservicemesh/osm/pkg/envoy/rds" "github.com/openservicemesh/osm/pkg/envoy/registry" "github.com/openservicemesh/osm/pkg/envoy/sds" + "github.com/openservicemesh/osm/pkg/errcode" "github.com/openservicemesh/osm/pkg/k8s" "github.com/openservicemesh/osm/pkg/utils" "github.com/openservicemesh/osm/pkg/workerpool" @@ -71,7 +72,8 @@ func (s *Server) withXdsLogMutex(f func()) { func (s *Server) Start(ctx context.Context, cancel context.CancelFunc, port int, adsCert certificate.Certificater) error { grpcServer, lis, err := utils.NewGrpc(ServerType, port, adsCert.GetCertificateChain(), adsCert.GetPrivateKey(), adsCert.GetIssuingCA()) if err != nil { - log.Error().Err(err).Msg("Error starting ADS server") + log.Error().Err(err).Str(errcode.Kind, errcode.ErrStartingADSServer.String()). + Msg("Error starting ADS server") return err } diff --git a/pkg/envoy/ads/stream.go b/pkg/envoy/ads/stream.go index 55dca746eb..db236e2dfc 100644 --- a/pkg/envoy/ads/stream.go +++ b/pkg/envoy/ads/stream.go @@ -13,6 +13,7 @@ import ( "github.com/openservicemesh/osm/pkg/certificate" "github.com/openservicemesh/osm/pkg/constants" "github.com/openservicemesh/osm/pkg/envoy" + "github.com/openservicemesh/osm/pkg/errcode" "github.com/openservicemesh/osm/pkg/identity" "github.com/openservicemesh/osm/pkg/k8s/events" "github.com/openservicemesh/osm/pkg/metricsstore" @@ -43,7 +44,8 @@ func (s *Server) StreamAggregatedResources(server xds_discovery.AggregatedDiscov // When this arrives we will call RegisterProxy() a second time - this time with Pod context! proxy, err := envoy.NewProxy(certCommonName, certSerialNumber, utils.GetIPFromContext(server.Context())) if err != nil { - log.Error().Err(err).Msgf("Error initializing proxy with certificate SerialNumber=%s", certSerialNumber) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrInitializingProxy.String()). + Msgf("Error initializing proxy with certificate SerialNumber=%s", certSerialNumber) return err } @@ -97,7 +99,8 @@ func (s *Server) StreamAggregatedResources(server xds_discovery.AggregatedDiscov case discoveryRequest, ok := <-requests: if !ok { - log.Error().Msgf("gRPC stream closed by proxy %s!", proxy.String()) + log.Error().Str(errcode.Kind, errcode.ErrGRPCStreamClosedByProxy.String()). + Msgf("gRPC stream closed by proxy %s!", proxy.String()) metricsstore.DefaultMetricsStore.ProxyConnectCount.Dec() return errGrpcClosed } @@ -148,8 +151,9 @@ func shouldPushUpdate(proxy *envoy.Proxy) bool { // In ADS, CDS and LDS will come first in all cases. Only allow an control-plane-push update push if // we have sent either to the proxy already. if proxy.GetLastSentNonce(envoy.TypeLDS) == "" && proxy.GetLastSentNonce(envoy.TypeCDS) == "" { - log.Error().Msgf("Proxy %s: LDS and CDS unrequested yet, waiting for first request for this proxy to be responded to", - proxy.String()) + log.Error().Str(errcode.Kind, errcode.ErrUnexpectedXDSRequest.String()). + Msgf("Proxy %s: LDS and CDS unrequested yet, waiting for first request for this proxy to be responded to", + proxy.String()) return false } return true @@ -181,8 +185,9 @@ func respondToRequest(proxy *envoy.Proxy, discoveryRequest *xds_discovery.Discov // Parse TypeURL of the request typeURL, ok := envoy.ValidURI[discoveryRequest.TypeUrl] if !ok { - log.Error().Msgf("Proxy %s: Unknown/Unsupported URI: %s", - proxy.String(), discoveryRequest.TypeUrl) + log.Error().Str(errcode.Kind, errcode.ErrInvalidXDSTypeURI.String()). + Msgf("Proxy %s: Unknown/Unsupported URI: %s", + proxy.String(), discoveryRequest.TypeUrl) return false } @@ -195,7 +200,8 @@ func respondToRequest(proxy *envoy.Proxy, discoveryRequest *xds_discovery.Discov // Parse ACK'd verion on the proxy for this given resource requestVersion, err = parseRequestVersion(discoveryRequest) if err != nil { - log.Error().Err(err).Msgf("Proxy %s: Error parsing version %s for type %s", proxy.String(), discoveryRequest.VersionInfo, typeURL) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrParsingDiscoveryReqVersion.String()). + Msgf("Proxy %s: Error parsing version %s for type %s", proxy.String(), discoveryRequest.VersionInfo, typeURL) return false } @@ -348,7 +354,8 @@ func (s *Server) recordPodMetadata(p *envoy.Proxy) error { } if certSA.ToK8sServiceAccount() != p.PodMetadata.ServiceAccount { - log.Error().Msgf("Service Account referenced in NodeID (%s) does not match Service Account in Certificate (%s). This proxy is not allowed to join the mesh.", p.PodMetadata.ServiceAccount, certSA) + log.Error().Str(errcode.Kind, errcode.ErrMismatchedServiceAccount.String()). + Msgf("Service Account referenced in NodeID (%s) does not match Service Account in Certificate (%s). This proxy is not allowed to join the mesh.", p.PodMetadata.ServiceAccount, certSA) return errServiceAccountMismatch } diff --git a/pkg/envoy/xdsutil.go b/pkg/envoy/xdsutil.go index b4590a9c2c..bb286e9aaa 100644 --- a/pkg/envoy/xdsutil.go +++ b/pkg/envoy/xdsutil.go @@ -20,6 +20,7 @@ import ( "github.com/openservicemesh/osm/pkg/certificate" "github.com/openservicemesh/osm/pkg/constants" "github.com/openservicemesh/osm/pkg/envoy/secrets" + "github.com/openservicemesh/osm/pkg/errcode" "github.com/openservicemesh/osm/pkg/identity" "github.com/openservicemesh/osm/pkg/k8s" "github.com/openservicemesh/osm/pkg/service" @@ -84,7 +85,8 @@ func GetTLSParams() *xds_auth.TlsParameters { func GetAccessLog() []*xds_accesslog_filter.AccessLog { accessLog, err := ptypes.MarshalAny(getStdoutAccessLog()) if err != nil { - log.Error().Err(err).Msg("Error marshalling AccessLog object") + log.Error().Err(err).Str(errcode.Kind, errcode.ErrMarshallingXDSResource.String()). + Msgf("Error marshalling AccessLog object") return nil } return []*xds_accesslog_filter.AccessLog{{ @@ -317,7 +319,8 @@ func getCertificateCommonNameMeta(cn certificate.CommonName) (*certificateCommon } proxyUUID, err := uuid.Parse(chunks[0]) if err != nil { - log.Error().Err(err).Msgf("Error parsing %s into uuid.UUID", chunks[0]) + log.Error().Err(err).Str(errcode.Kind, errcode.ErrParsingXDSCertCN.String()). + Msgf("Error parsing %s into uuid.UUID", chunks[0]) return nil, err } @@ -348,8 +351,9 @@ func GetPodFromCertificate(cn certificate.CommonName, kubecontroller k8s.Control } if len(pods) == 0 { - log.Error().Msgf("Did not find Pod with label %s = %s in namespace %s", - constants.EnvoyUniqueIDLabelName, cnMeta.ProxyUUID, cnMeta.ServiceIdentity.ToK8sServiceAccount().Namespace) + log.Error().Str(errcode.Kind, errcode.ErrFetchingPodFromCert.String()). + Msgf("Did not find Pod with label %s = %s in namespace %s", + constants.EnvoyUniqueIDLabelName, cnMeta.ProxyUUID, cnMeta.ServiceIdentity.ToK8sServiceAccount().Namespace) return nil, ErrDidNotFindPodForCertificate } @@ -358,8 +362,9 @@ func GetPodFromCertificate(cn certificate.CommonName, kubecontroller k8s.Control // This is a limitation we set in place in order to make the mesh easy to understand and reason about. // When a pod belongs to more than one service XDS will not program the Envoy proxy, leaving it out of the mesh. if len(pods) > 1 { - log.Error().Msgf("Found more than one pod with label %s = %s in namespace %s. There can be only one!", - constants.EnvoyUniqueIDLabelName, cnMeta.ProxyUUID, cnMeta.ServiceIdentity.ToK8sServiceAccount().Namespace) + log.Error().Str(errcode.Kind, errcode.ErrPodBelongsToMultipleServices.String()). + Msgf("Found more than one pod with label %s = %s in namespace %s. There can be only one!", + constants.EnvoyUniqueIDLabelName, cnMeta.ProxyUUID, cnMeta.ServiceIdentity.ToK8sServiceAccount().Namespace) return nil, ErrMoreThanOnePodForCertificate } diff --git a/pkg/errcode/errcode.go b/pkg/errcode/errcode.go index 9e92d947bf..1c2c493a62 100644 --- a/pkg/errcode/errcode.go +++ b/pkg/errcode/errcode.go @@ -160,6 +160,66 @@ const ( ErrMeshConfigMarshaling ) +// Range 5000-5500 reserved for errors related to Envoy XDS control plane +const ( + // ErrMarshallingXDSResource indicates an XDS resource could not be marshalled + ErrMarshallingXDSResource ErrCode = iota + 5000 + + // ErrParsingXDSCertCN indicates the configured XDS certificate common name could not be parsed + ErrParsingXDSCertCN + + // ErrFetchingPodFromCert indicates the proxy UUID obtained from a certificate's common name metadata was not + // found as a osm-proxy-uuid label value for any pod + ErrFetchingPodFromCert + + // ErrPodBelongsToMultipleServices indicates a pod in the mesh belongs to more than one service + ErrPodBelongsToMultipleServices + + // ErrGettingProxyFromPod indicates the proxy data structure could not be obtained from the osm-proxy-uuid + // label value on a pods + ErrGettingProxyFromPod + + // ErrGRPCConnectionFailed indicates discovery requests cannot be received by ADS due to a GRPC connection failure + ErrGRPCConnectionFailed + + // ErrSendingDiscoveryResponse indicates the configured discovery response could not be sent + ErrSendingDiscoveryResponse + + // ErrGeneratingReqResource indicates the resources for the discovery response could not be generated + ErrGeneratingReqResource + + // ErrRecordingSnapshot indicates the aggregated resources generate for a discovery response could not be created + ErrRecordingSnapshot + + // ErrGettingServiceIdentity indicates the ServiceIdentity name encoded in the XDS certificate CN could not be + // obtained + ErrGettingServiceIdentity + + // ErrStartingADSServer indicates the gPRC service failed to start + ErrStartingADSServer + + // ERRInitializingProxy indicates an instance of the Envoy proxy that connected to the XDS server could not be + // initialized + ErrInitializingProxy + + // ErrMismatchedServiceAccount inicates the ServiceAccount referenced in the NodeID does not match the + // ServiceAccount specified in the proxy certificate + ErrMismatchedServiceAccount + + // ErrGRPCStreamClosedByProxy indicates the gRPC stream was closed by the proxy + ErrGRPCStreamClosedByProxy + + // ErrUnexpectedXDSRequest indicates that a proxy has not completed its init phase and is not ready to + // receive updates + ErrUnexpectedXDSRequest + + // ErrInvalidXDSTypeURI indicates the TypeURL of the discovery request is invalid + ErrInvalidXDSTypeURI + + // ErrParsingDiscoveryReqVersion indicates the discovery request response version could not be parsed + ErrParsingDiscoveryReqVersion +) + // String returns the error code as a string, ex. E1000 func (e ErrCode) String() string { return fmt.Sprintf("E%d", e) @@ -296,59 +356,76 @@ belong to the service account. ErrFetchingCertSecret: ` The Kubernetes secret containing the certificate could not be retrieved by the system. `, + ErrObtainingCertFromSecret: ` The certificate specified by name could not be obtained by key from the secret's data. `, + ErrObtainingPrivateKeyFromSecret: ` The private key specified by name could not be obtained by key from the secret's data. `, + ErrObtainingCertExpirationFromSecret: ` The certificate expiration specified by name could not be obtained by key from the secret's data. `, + ErrParsingCertExpiration: ` The certificate expiration obtained from the secret's data by name could not be parsed. `, + ErrCreatingCertSecret: ` The secret containing a certificate could not be created by the system. `, + ErrGeneratingPrivateKey: ` A private key failed to be generated. `, + ErrEncodingKeyDERtoPEM: ` The specified private key could be be could not be converted from a DER encoded key to a PEM encoded key. `, + ErrCreatingCertReq: ` The certificate request fails to be created when attempting to issue a certificate. `, + ErrDeletingCertReq: ` The certificate request could not be deleted. `, + ErrCreatingRootCert: ` When creating a new certificate authority, the root certificate could not be obtained by the system. `, + ErrEncodingCertDERtoPEM: ` The specified certificate could not be converted from a DER encoded certificate to a PEM encoded certificate. `, + ErrDecodingPEMCert: ` The specified PEM encoded certificate could not be decoded. `, + ErrDecodingPEMPrivateKey: ` The specified PEM privateKey for the certificate authority's root certificate could not be decoded. `, + ErrIssuingCert: ` An unspecified error occurred when issuing a certificate from the certificate manager. `, + ErrCreatingCert: ` An error occurred when creating a certificate to issue from the certificate manager. `, + ErrInvalidCA: ` The certificate authority privided when issuing a certificate was invalid. `, + ErrRotatingCert: ` The specified certificate could not be rotated. `, @@ -374,5 +451,94 @@ Failed to fetch MeshConfig from cache with specific key. `, ErrMeshConfigMarshaling: ` Failed to marshal MeshConfig into other format. +`, + + // + // Range 5000-5500 + // + ErrMarshallingXDSResource: ` +A XDS resource could not be marshalled. +`, + + ErrParsingXDSCertCN: ` +The XDS certificate common name could not be parsed. The CN should be of the form +... +`, + + ErrFetchingPodFromCert: ` +The proxy UUID obtained from parsing the XDS certificate's common name did not match +the osm-proxy-uuid label value for any pod. The pod associated with the specified Envoy +proxy could not be found. +`, + + ErrPodBelongsToMultipleServices: ` +A pod in the mesh belongs to more than one service. By Open Service Mesh convention +the number of services a pod can belong to is 1. This is a limitation we set in place +in order to make the mesh easy to understand and reason about. When a pod belongs to +more than one service XDS will not program the Envoy proxy, leaving it out of the mesh. +`, + + ErrGettingProxyFromPod: ` +The Envoy proxy data structure created by ADS to reference an Envoy proxy sidecar from +a pod's osm-proxy-uuid label could not be configured. +`, + + ErrGRPCConnectionFailed: ` +A GRPC connection failure occurred and the ADS is no longer able to receive +DiscoveryRequests. +`, + + ErrSendingDiscoveryResponse: ` +The DiscoveryResponse configured by ADS failed to send to the Envoy proxy. +`, + + ErrGeneratingReqResource: ` +The resources to be included in the DiscoveryResponse could not be generated. +`, + + ErrRecordingSnapshot: ` +The aggregated resources generated for a DiscoveryResponse failed to be configured as +a new snapshot in the Envoy xDS Aggregate Discovery Services cache. +`, + + ErrGettingServiceIdentity: ` +The ServiceIdentity specified in the XDS certificate CN could not be obtained when +creating SDS DiscoveryRequests corresponding to all types of secrets associated with +the proxy. +`, + + ErrStartingADSServer: ` +The Aggregate Discovery Server (ADS) created by the OSM controller failed to start. +`, + + ErrInitializingProxy: ` +An Envoy proxy data structure representing a newly connected envoy proxy to the XDS +server could not be initialized. +`, + + ErrMismatchedServiceAccount: ` +The ServiceAccount referenced in the NodeID does not match the ServiceAccount +specified in the proxy certificate. In this case, the proxy is not allowed to be a +part of the mesh. +`, + + ErrGRPCStreamClosedByProxy: ` +The gRPC stream is closed by the proxy and no DiscoveryRequests can be received. +The Stream Agreggated Resource server is terminated for the specified proxy +`, + + ErrUnexpectedXDSRequest: ` +The envoy proxy has not completed the initialization phase and it is not ready +to receive broadcast updates from control plane related changes. New versions +should not be pushed if the first request has not be received. +The broadcast update is ignored for that proxy. +`, + + ErrInvalidXDSTypeURI: ` +The TypeURL of the resource being requested in the DiscoveryRequest is invalid. +`, + + ErrParsingDiscoveryReqVersion: ` +The version of the DiscoveryRequest could not be parsed by ADS. `, }