From 74861573a793f9e143d7d2638990f37ec639aa88 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 2 Oct 2024 14:38:00 +0400 Subject: [PATCH] fix: multiple fixes for LVM activation Two fixes were in pkgs/lvm2: * https://github.com/siderolabs/pkgs/pull/1041 * https://github.com/siderolabs/pkgs/pull/1042 Other fixes in this PR: * adjust the controller a bit for some interactions * make Rook test use more complicated, encrypted setup which uses LVM * adjust LVM test to handle a case when there's more than one worker Signed-off-by: Andrey Smirnov --- Makefile | 2 +- .../app/machined/pkg/controllers/block/lvm.go | 59 ++++++++----------- internal/integration/api/volumes.go | 23 ++++++-- internal/integration/base/k8s.go | 7 +++ .../testdata/rook-ceph-cluster-values.yaml | 5 ++ pkg/machinery/gendata/data/pkgs | 2 +- 6 files changed, 56 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index dadcb0998e..fdf05031b0 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ ARTIFACTS := _out TOOLS ?= ghcr.io/siderolabs/tools:v1.9.0-alpha.0-2-g9f2189b PKGS_PREFIX ?= ghcr.io/siderolabs -PKGS ?= v1.9.0-alpha.0-11-g232a153 +PKGS ?= v1.9.0-alpha.0-13-ge2a561f EXTRAS ?= v1.9.0-alpha.0 KRES_IMAGE ?= ghcr.io/siderolabs/kres:latest diff --git a/internal/app/machined/pkg/controllers/block/lvm.go b/internal/app/machined/pkg/controllers/block/lvm.go index 8ce08dd64b..cfd5967962 100644 --- a/internal/app/machined/pkg/controllers/block/lvm.go +++ b/internal/app/machined/pkg/controllers/block/lvm.go @@ -11,16 +11,11 @@ import ( "github.com/cosi-project/runtime/pkg/controller" "github.com/cosi-project/runtime/pkg/safe" - "github.com/cosi-project/runtime/pkg/state" "github.com/hashicorp/go-multierror" - "github.com/siderolabs/gen/optional" "github.com/siderolabs/go-cmd/pkg/cmd" "go.uber.org/zap" - "github.com/siderolabs/talos/pkg/machinery/constants" "github.com/siderolabs/talos/pkg/machinery/resources/block" - runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime" - "github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1" ) // LVMActivationController activates LVM volumes when they are discovered by the block.DiscoveryController. @@ -37,12 +32,6 @@ func (ctrl *LVMActivationController) Name() string { // Inputs implements controller.Controller interface. func (ctrl *LVMActivationController) Inputs() []controller.Input { return []controller.Input{ - { - Namespace: v1alpha1.NamespaceName, - Type: runtimeres.MountStatusType, - ID: optional.Some(constants.EphemeralPartitionLabel), - Kind: controller.InputWeak, - }, { Namespace: block.NamespaceName, Type: block.DiscoveredVolumeType, @@ -75,15 +64,6 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti case <-r.EventCh(): } - if _, err := safe.ReaderGetByID[*runtimeres.MountStatus](ctx, r, constants.EphemeralPartitionLabel); err != nil { - if state.IsNotFoundError(err) { - // wait for the mount status to be available - continue - } - - return fmt.Errorf("failed to get mount status: %w", err) - } - discoveredVolumes, err := safe.ReaderListAll[*block.DiscoveredVolume](ctx, r) if err != nil { return fmt.Errorf("failed to list discovered volumes: %w", err) @@ -92,17 +72,19 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti var multiErr error for iterator := discoveredVolumes.Iterator(); iterator.Next(); { - if _, ok := ctrl.seenVolumes[iterator.Value().Metadata().ID()]; ok { - continue - } - if iterator.Value().TypedSpec().Name != "lvm2-pv" { + // if the volume is not an LVM volume the moment we saw it, we can skip it + // we need to activate the volumes only on reboot, not when they are first formatted ctrl.seenVolumes[iterator.Value().Metadata().ID()] = struct{}{} continue } - logger.Info("checking device for LVM volume activation", zap.String("device", iterator.Value().TypedSpec().DevPath)) + if _, ok := ctrl.seenVolumes[iterator.Value().Metadata().ID()]; ok { + continue + } + + logger.Debug("checking device for LVM volume activation", zap.String("device", iterator.Value().TypedSpec().DevPath)) vgName, err := ctrl.checkVGNeedsActivation(ctx, iterator.Value().TypedSpec().DevPath) if err != nil { @@ -112,8 +94,6 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti } if vgName == "" { - ctrl.seenVolumes[iterator.Value().Metadata().ID()] = struct{}{} - continue } @@ -132,10 +112,10 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti "event", vgName, ); err != nil { - return fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err) + multiErr = multierror.Append(multiErr, fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err)) + } else { + ctrl.activatedVGs[vgName] = struct{}{} } - - ctrl.activatedVGs[vgName] = struct{}{} } if multiErr != nil { @@ -153,7 +133,6 @@ func (ctrl *LVMActivationController) checkVGNeedsActivation(ctx context.Context, "/sbin/lvm", "pvscan", "--cache", - "--verbose", "--listvg", "--checkcomplete", "--vgonline", @@ -166,11 +145,19 @@ func (ctrl *LVMActivationController) checkVGNeedsActivation(ctx context.Context, return "", fmt.Errorf("failed to check if LVM volume backed by device %s needs activation: %w", devicePath, err) } - if strings.HasPrefix(stdOut, "LVM_VG_NAME_INCOMPLETE") { - return "", nil - } + // parse the key-value pairs from the udev output + for _, line := range strings.Split(stdOut, "\n") { + key, value, ok := strings.Cut(line, "=") + if !ok { + continue + } - vgName := strings.TrimSuffix(strings.TrimPrefix(strings.TrimSuffix(stdOut, "\n"), "LVM_VG_NAME_COMPLETE='"), "'") + value = strings.Trim(value, "'\"") + + if key == "LVM_VG_NAME_COMPLETE" { + return value, nil + } + } - return vgName, nil + return "", nil } diff --git a/internal/integration/api/volumes.go b/internal/integration/api/volumes.go index e801c971b9..b6531158ba 100644 --- a/internal/integration/api/volumes.go +++ b/internal/integration/api/volumes.go @@ -191,6 +191,13 @@ func (suite *VolumesSuite) TestLVMActivation() { node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) + k8sNode, err := suite.GetK8sNodeByInternalIP(suite.ctx, node) + suite.Require().NoError(err) + + nodeName := k8sNode.Name + + suite.T().Logf("creating LVM volume group on node %s/%s", node, nodeName) + userDisks, err := suite.UserDisks(suite.ctx, node) suite.Require().NoError(err) @@ -201,6 +208,8 @@ func (suite *VolumesSuite) TestLVMActivation() { podDef, err := suite.NewPrivilegedPod("pv-create") suite.Require().NoError(err) + podDef = podDef.WithNodeName(nodeName) + suite.Require().NoError(podDef.Create(suite.ctx, 5*time.Minute)) defer podDef.Delete(suite.ctx) //nolint:errcheck @@ -230,9 +239,13 @@ func (suite *VolumesSuite) TestLVMActivation() { suite.Require().Contains(stdout, "Logical volume \"lv1\" created.") defer func() { + suite.T().Logf("removing LVM volumes %s/%s", node, nodeName) + deletePodDef, err := suite.NewPrivilegedPod("pv-destroy") suite.Require().NoError(err) + deletePodDef = deletePodDef.WithNodeName(nodeName) + suite.Require().NoError(deletePodDef.Create(suite.ctx, 5*time.Minute)) defer deletePodDef.Delete(suite.ctx) //nolint:errcheck @@ -252,6 +265,8 @@ func (suite *VolumesSuite) TestLVMActivation() { } }() + suite.T().Logf("rebooting node %s/%s", node, nodeName) + // now we want to reboot the node and make sure the array is still mounted suite.AssertRebooted( suite.ctx, node, func(nodeCtx context.Context) error { @@ -259,14 +274,14 @@ func (suite *VolumesSuite) TestLVMActivation() { }, 5*time.Minute, ) + suite.T().Logf("verifying LVM activation %s/%s", node, nodeName) + suite.Require().Eventually(func() bool { - return suite.lvmVolumeExists() + return suite.lvmVolumeExists(node) }, 5*time.Second, 1*time.Second, "LVM volume group was not activated after reboot") } -func (suite *VolumesSuite) lvmVolumeExists() bool { - node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) - +func (suite *VolumesSuite) lvmVolumeExists(node string) bool { ctx := client.WithNode(suite.ctx, node) disks, err := safe.StateListAll[*block.Disk](ctx, suite.Client.COSI) diff --git a/internal/integration/base/k8s.go b/internal/integration/base/k8s.go index 43ef7a8cdf..bebb3d68ce 100644 --- a/internal/integration/base/k8s.go +++ b/internal/integration/base/k8s.go @@ -200,6 +200,7 @@ func (k8sSuite *K8sSuite) WaitForEventExists(ctx context.Context, ns string, che type podInfo interface { Name() string + WithNodeName(nodeName string) podInfo Create(ctx context.Context, waitTimeout time.Duration) error Delete(ctx context.Context) error Exec(ctx context.Context, command string) (string, string, error) @@ -217,6 +218,12 @@ func (p *pod) Name() string { return p.name } +func (p *pod) WithNodeName(nodeName string) podInfo { + p.pod.Spec.NodeName = nodeName + + return p +} + func (p *pod) Create(ctx context.Context, waitTimeout time.Duration) error { _, err := p.suite.Clientset.CoreV1().Pods(p.namespace).Create(ctx, p.pod, metav1.CreateOptions{}) if err != nil { diff --git a/internal/integration/k8s/testdata/rook-ceph-cluster-values.yaml b/internal/integration/k8s/testdata/rook-ceph-cluster-values.yaml index 79a70584f0..4475e83d6d 100644 --- a/internal/integration/k8s/testdata/rook-ceph-cluster-values.yaml +++ b/internal/integration/k8s/testdata/rook-ceph-cluster-values.yaml @@ -15,3 +15,8 @@ cephClusterSpec: requests: cpu: "500m" memory: "1Gi" + storage: + useAllNodes: true + useAllDevices: true + config: + encryptedDevice: "true" diff --git a/pkg/machinery/gendata/data/pkgs b/pkg/machinery/gendata/data/pkgs index 69d2ae8726..72d1c32704 100644 --- a/pkg/machinery/gendata/data/pkgs +++ b/pkg/machinery/gendata/data/pkgs @@ -1 +1 @@ -v1.9.0-alpha.0-11-g232a153 \ No newline at end of file +v1.9.0-alpha.0-13-ge2a561f \ No newline at end of file