diff --git a/.changelog/2412.txt b/.changelog/2412.txt new file mode 100644 index 00000000000..ffbf4ff7d8b --- /dev/null +++ b/.changelog/2412.txt @@ -0,0 +1,8 @@ +```release-note:improvement +cli/serverinstall/k8s: Fix a problem where deployments would be marked as "Degraded", but were actually fine. +``` + +```release-note:improvement +cli/serverinstall/k8s: Add new cluster role and binding to allow nodeport services to work +``` + diff --git a/builtin/k8s/platform.go b/builtin/k8s/platform.go index aba8e886a16..4db0b91a97f 100644 --- a/builtin/k8s/platform.go +++ b/builtin/k8s/platform.go @@ -185,17 +185,11 @@ func (p *Platform) resourceDeploymentStatus( } } - var deployHealth sdk.StatusReport_Health - switch mostRecentCondition.Type { - case v1.DeploymentAvailable: - deployHealth = sdk.StatusReport_READY - case v1.DeploymentProgressing: - deployHealth = sdk.StatusReport_ALIVE - case v1.DeploymentReplicaFailure: - deployHealth = sdk.StatusReport_DOWN - default: - deployHealth = sdk.StatusReport_UNKNOWN - } + // The most recently updated condition isn't always the most pertinent - a healthy deployment + // can have a "Progressing" most recently updated condition at steady-state. + // If the deployment exists, we'll mark it as "Ready", and rely on our pod status checks + // to give more detailed status. + deployHealth := sdk.StatusReport_READY // Redact env vars from containers - they can contain secrets for i := 0; i < len(deployResp.Spec.Template.Spec.Containers); i++ { diff --git a/builtin/k8s/releaser.go b/builtin/k8s/releaser.go index 44da89ec090..fbcb42af208 100644 --- a/builtin/k8s/releaser.go +++ b/builtin/k8s/releaser.go @@ -27,7 +27,7 @@ import ( "github.com/hashicorp/waypoint-plugin-sdk/terminal" ) -// The port that a service will forward to the pod(s) +// DefaultPort is the port that a service will forward to the pod(s) const DefaultPort = 80 // Releaser is the ReleaseManager implementation for Kubernetes. @@ -320,11 +320,15 @@ func (r *Releaser) resourceServiceCreate( nodeclient := clientSet.CoreV1().Nodes() nodes, err := nodeclient.List(ctx, metav1.ListOptions{}) if err != nil { - return err + // Rather than fail the whole release, report the error and then complete. + // Print in a standalone step, so the output won't get overwritten if we add more step output later in the future. + errStep := sg.Add("Cannot determine release URL for nodeport service due to failure to list nodes: %s", err) + errStep.Status(terminal.StatusError) + errStep.Done() + } else { + nodeIP := nodes.Items[0].Status.Addresses[0].Address + result.Url = fmt.Sprintf("http://%s:%d", nodeIP, service.Spec.Ports[0].NodePort) } - - nodeIP := nodes.Items[0].Status.Addresses[0].Address - result.Url = fmt.Sprintf("http://%s:%d", nodeIP, service.Spec.Ports[0].NodePort) } else { result.Url = fmt.Sprintf("http://%s:%d", service.Spec.ClusterIP, service.Spec.Ports[0].Port) } diff --git a/builtin/k8s/task.go b/builtin/k8s/task.go index ab3be7ffea0..a4acccc9a60 100644 --- a/builtin/k8s/task.go +++ b/builtin/k8s/task.go @@ -125,7 +125,7 @@ task { return doc, nil } -// TaskLauncher implements Configurable +// Config implements Configurable func (p *TaskLauncher) Config() (interface{}, error) { return &p.config, nil } diff --git a/internal/cli/runner_agent.go b/internal/cli/runner_agent.go index 312920c1f36..1ea9ed29ab3 100644 --- a/internal/cli/runner_agent.go +++ b/internal/cli/runner_agent.go @@ -42,7 +42,7 @@ type RunnerAgentCommand struct { flagODR bool } -// This is how long a runner in ODR mode will wait for it's job assignment before +// This is how long a runner in ODR mode will wait for its job assignment before // timing out. var defaultRunnerODRAcceptTimeout = 60 * time.Second diff --git a/internal/serverinstall/k8s.go b/internal/serverinstall/k8s.go index e657b851395..efbc8f9e5a0 100644 --- a/internal/serverinstall/k8s.go +++ b/internal/serverinstall/k8s.go @@ -11,6 +11,8 @@ import ( "time" "github.com/ghodss/yaml" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -309,8 +311,12 @@ func (i *K8sInstaller) Install( // Ensure the service is ready to use before returning _, err = net.DialTimeout("tcp", httpAddr, 1*time.Second) if err != nil { + // Depending on the platform, this can take a long time. On EKS, it's by far the longest step. Adding an explicit message helps + s.Update("Service %q exists and is configured, but isn't yet accepting incoming connections. Waiting...", serviceName) return false, nil } + + s.Update("Service %q is ready", serviceName) log.Info("http server ready", "httpAddr", addr) // Set our advertise address @@ -1358,6 +1364,43 @@ func newServiceAccount(c k8sConfig) (*apiv1.ServiceAccount, error) { }, nil } +// newServiceAccountClusterRoleWithBinding creates the cluster role and binding necessary to create and verify +// a nodeport type services. +func newServiceAccountClusterRoleWithBinding(c k8sConfig) (*rbacv1.ClusterRole, *rbacv1.ClusterRoleBinding, error) { + roleName := "waypoint-runner" + return &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: roleName, + }, + Rules: []rbacv1.PolicyRule{{ + APIGroups: []string{""}, + Resources: []string{"nodes"}, + Verbs: []string{"get", "list"}, + }}, + }, &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: roleName, + }, + + // Our default runner role is just the default "edit" role. This + // gives access to read/write most things in this namespace but + // disallows modifying roles and rolebindings. + RoleRef: rbacv1.RoleRef{ + APIGroup: "", + Kind: "ClusterRole", + Name: roleName, + }, + + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: c.odrServiceAccount, + Namespace: c.namespace, + }, + }, + }, nil +} + // newServiceAccountRoleBinding creates the role binding necessary to // map the ODR role to the service account. func newServiceAccountRoleBinding(c k8sConfig) (*rbacv1.RoleBinding, error) { @@ -1671,7 +1714,7 @@ func (i *K8sInstaller) initServiceAccount( } // Setup the role binding - s.Update("Initializing role binding for on-demand runner...") + s.Update("Initializing role bindings for on-demand runner...") rbClient := clientset.RbacV1().RoleBindings(i.config.namespace) rb, err := newServiceAccountRoleBinding(i.config) if err != nil { @@ -1690,6 +1733,31 @@ func (i *K8sInstaller) initServiceAccount( return err } + cr, crb, err := newServiceAccountClusterRoleWithBinding(i.config) + if err != nil { + return status.Errorf(codes.Internal, "Failed to get definition for runner service account's cluster role and binding: %q", err) + } + if cr != nil { + crClient := clientset.RbacV1().ClusterRoles() + _, err = crClient.Get(ctx, cr.Name, metav1.GetOptions{}) + if err != nil && !errors.IsNotFound(err) { + return status.Errorf(codes.Internal, "Failed to get cluster role %q: %q", cr.Name, err) + } + if _, err := crClient.Create(ctx, cr, metav1.CreateOptions{}); err != nil { + return status.Errorf(codes.Internal, "Failed to create cluster role %q: %q", cr.Name, err) + } + } + if crb != nil { + crbClient := clientset.RbacV1().ClusterRoleBindings() + _, err = crbClient.Get(ctx, crb.Name, metav1.GetOptions{}) + if err != nil && !errors.IsNotFound(err) { + return status.Errorf(codes.Internal, "Failed to get cluster role binding %q: %q", crb.Name, err) + } + if _, err := crbClient.Create(ctx, crb, metav1.CreateOptions{}); err != nil { + return status.Errorf(codes.Internal, "Failed to create cluster role binding %q: %q", cr.Name, err) + } + } + s.Update("Service account for on-demand runner initialized!") s.Done() return nil