Skip to content

Commit

Permalink
fix: wait for /var to be mounted in kubelet service controller
Browse files Browse the repository at this point in the history
This is a cosmetic fix: when `KubeletServiceController` tries to write
files to `/etc/kubernetes` before `/var` mounted, it would fail.
Controller will be restarted, but each restart involves a backoff on
each restart which gets longer with each restart.

On the first boot, or when EPHEMERAL is encrypted, mounting might take
considerable time (seconds), so during that time controller might enter
such long backoff timeout that it will delay whole boot sequence - it
won't finish before `kubelet` is started.

By waiting for `EPHEMERAL` to be mounted before starting the controller
we eliminate long backoff cycles.

Also fix a bug when `StartAllServices` task might start a kubelet early
(before `KubeletServiceController` is actually going to start it).

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
(cherry picked from commit c1aed62)
  • Loading branch information
smira committed Jun 6, 2022
1 parent 4712e73 commit 6229eef
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 6 deletions.
26 changes: 24 additions & 2 deletions internal/app/machined/pkg/controllers/k8s/kubelet_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ import (
"k8s.io/apimachinery/pkg/runtime/serializer/json"
kubeletconfig "k8s.io/kubelet/config/v1beta1"

runtimetalos "github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/system"
"github.com/talos-systems/talos/internal/app/machined/pkg/system/services"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/machinery/resources/files"
"github.com/talos-systems/talos/pkg/machinery/resources/k8s"
runtimeres "github.com/talos-systems/talos/pkg/machinery/resources/runtime"
"github.com/talos-systems/talos/pkg/machinery/resources/secrets"
)

Expand All @@ -42,6 +44,7 @@ type ServiceManager interface {
// KubeletServiceController renders kubelet configuration files and controls kubelet service lifecycle.
type KubeletServiceController struct {
V1Alpha1Services ServiceManager
V1Alpha1Mode runtimetalos.Mode
}

// Name implements controller.Controller interface.
Expand All @@ -63,14 +66,20 @@ func (ctrl *KubeletServiceController) Outputs() []controller.Output {
//
//nolint:gocyclo,cyclop
func (ctrl *KubeletServiceController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
// initially, wait for the machine-id to be generated
// initially, wait for the machine-id to be generated and /var to be mounted
if err := r.UpdateInputs([]controller.Input{
{
Namespace: files.NamespaceName,
Type: files.EtcFileStatusType,
ID: pointer.To("machine-id"),
Kind: controller.InputWeak,
},
{
Namespace: runtimeres.NamespaceName,
Type: runtimeres.MountStatusType,
ID: pointer.To(constants.EphemeralPartitionLabel),
Kind: controller.InputWeak,
},
}); err != nil {
return err
}
Expand All @@ -91,10 +100,23 @@ func (ctrl *KubeletServiceController) Run(ctx context.Context, r controller.Runt
return fmt.Errorf("error getting etc file status: %w", err)
}

_, err = r.Get(ctx, resource.NewMetadata(runtimeres.NamespaceName, runtimeres.MountStatusType, constants.EphemeralPartitionLabel, resource.VersionUndefined))
if err != nil {
if state.IsNotFoundError(err) {
// in container mode EPHEMERAL is always mounted
if ctrl.V1Alpha1Mode != runtimetalos.ModeContainer {
// wait for the EPHEMERAL to be mounted
continue
}
} else {
return fmt.Errorf("error getting ephemeral mount status: %w", err)
}
}

break
}

// normal reconcile loop, ignore cri state
// normal reconcile loop
if err := r.UpdateInputs([]controller.Input{
{
Namespace: k8s.NamespaceName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -731,19 +731,24 @@ func StartAllServices(seq runtime.Sequence, data interface{}) (runtime.TaskExecu

svcs := system.Services(r)

// load the kubelet service, but don't start it;
// KubeletServiceController will start it once it's ready.
svcs.Load(
&services.CRI{},
&services.Kubelet{},
)

serviceList := []system.Service{
&services.CRI{},
}

switch t := r.Config().Machine().Type(); t {
case machine.TypeInit:
svcs.Load(
serviceList = append(serviceList,
&services.Trustd{},
&services.Etcd{Bootstrap: true},
)
case machine.TypeControlPlane:
svcs.Load(
serviceList = append(serviceList,
&services.Trustd{},
&services.Etcd{},
)
Expand All @@ -755,7 +760,7 @@ func StartAllServices(seq runtime.Sequence, data interface{}) (runtime.TaskExecu
panic(fmt.Sprintf("unexpected machine type %v", t))
}

system.Services(r).StartAll()
svcs.LoadAndStart(serviceList...)

all := []conditions.Condition{}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
&k8s.KubeletConfigController{},
&k8s.KubeletServiceController{
V1Alpha1Services: system.Services(ctrl.v1alpha1Runtime),
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&k8s.KubeletSpecController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
Expand Down

0 comments on commit 6229eef

Please sign in to comment.