Skip to content

Commit

Permalink
fix: multiple fixes for LVM activation
Browse files Browse the repository at this point in the history
Two fixes were in pkgs/lvm2:

* siderolabs/pkgs#1041
* siderolabs/pkgs#1042

Other fixes in this PR:

* drop the dm-raid patch, `lvm2` loads the required kernel modules on
  its own
* adjust the controller a bit for some interactions
* make Rook test use more complicated, encrypted setup which uses LVM
* adjust LVM test to handle a case when there's more than one worker

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Oct 2, 2024
1 parent 0a4df4e commit 37ec64f
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 61 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-23T13:32:15Z by kres 8be5fa7.
# Generated on 2024-10-02T15:06:23Z by kres 34e72ac.

name: default
concurrency:
Expand Down Expand Up @@ -1464,7 +1464,7 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_MEMORY_WORKERS: "4096"
SHORT_INTEGRATION_TEST: "yes"
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@hack/test/patches/extensions.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@hack/test/patches/extensions.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
Expand Down Expand Up @@ -2774,7 +2774,7 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
Expand Down Expand Up @@ -3089,7 +3089,7 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
WITH_DISK_ENCRYPTION: "true"
WITH_KUBESPAN: "true"
WITH_VIRTUAL_IP: "true"
Expand Down Expand Up @@ -3197,7 +3197,7 @@ jobs:
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
TAG_SUFFIX: -race
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/integration-extensions-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-20T17:49:19Z by kres 8be5fa7.
# Generated on 2024-10-02T15:06:23Z by kres 34e72ac.

name: integration-extensions-cron
concurrency:
Expand Down Expand Up @@ -113,7 +113,7 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_MEMORY_WORKERS: "4096"
SHORT_INTEGRATION_TEST: "yes"
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@hack/test/patches/extensions.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@_out/installer-extensions-patch.yaml:@hack/test/patches/extensions.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/integration-qemu-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-20T16:31:33Z by kres 8be5fa7.
# Generated on 2024-10-02T15:06:23Z by kres 34e72ac.

name: integration-qemu-cron
concurrency:
Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/integration-qemu-encrypted-vip-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-20T17:49:19Z by kres 8be5fa7.
# Generated on 2024-10-02T15:06:23Z by kres 34e72ac.

name: integration-qemu-encrypted-vip-cron
concurrency:
Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
WITH_DISK_ENCRYPTION: "true"
WITH_KUBESPAN: "true"
WITH_VIRTUAL_IP: "true"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/integration-qemu-race-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-21T05:02:59Z by kres 8be5fa7.
# Generated on 2024-10-02T15:06:23Z by kres 34e72ac.

name: integration-qemu-race-cron
concurrency:
Expand Down Expand Up @@ -94,7 +94,7 @@ jobs:
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
TAG_SUFFIX: -race
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml'
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
run: |
sudo -E make e2e-qemu
- name: save artifacts
Expand Down
8 changes: 4 additions & 4 deletions .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ spec:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_EXTRA_DISKS_DRIVERS: "ide,nvme"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml"
- name: save-talos-logs
conditions:
- always
Expand Down Expand Up @@ -1017,7 +1017,7 @@ spec:
withSudo: true
environment:
QEMU_MEMORY_WORKERS: 4096
WITH_CONFIG_PATCH_WORKER: "@_out/installer-extensions-patch.yaml:@hack/test/patches/extensions.yaml:@hack/test/patches/dm-raid-module.yaml"
WITH_CONFIG_PATCH_WORKER: "@_out/installer-extensions-patch.yaml:@hack/test/patches/extensions.yaml"
QEMU_EXTRA_DISKS: 3
SHORT_INTEGRATION_TEST: yes
EXTRA_TEST_ARGS: -talos.extensions.qemu
Expand Down Expand Up @@ -1156,7 +1156,7 @@ spec:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_EXTRA_DISKS_DRIVERS: "ide,nvme"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml"
- name: save-talos-logs
conditions:
- always
Expand Down Expand Up @@ -1216,7 +1216,7 @@ spec:
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_EXTRA_DISKS_DRIVERS: "ide,nvme"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml:@hack/test/patches/dm-raid-module.yaml"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml"
TAG_SUFFIX: -race
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: save-talos-logs
Expand Down
4 changes: 0 additions & 4 deletions hack/test/patches/dm-raid-module.yaml

This file was deleted.

59 changes: 23 additions & 36 deletions internal/app/machined/pkg/controllers/block/lvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,11 @@ import (

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/hashicorp/go-multierror"
"github.com/siderolabs/gen/optional"
"github.com/siderolabs/go-cmd/pkg/cmd"
"go.uber.org/zap"

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
"github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1"
)

// LVMActivationController activates LVM volumes when they are discovered by the block.DiscoveryController.
Expand All @@ -37,12 +32,6 @@ func (ctrl *LVMActivationController) Name() string {
// Inputs implements controller.Controller interface.
func (ctrl *LVMActivationController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: v1alpha1.NamespaceName,
Type: runtimeres.MountStatusType,
ID: optional.Some(constants.EphemeralPartitionLabel),
Kind: controller.InputWeak,
},
{
Namespace: block.NamespaceName,
Type: block.DiscoveredVolumeType,
Expand Down Expand Up @@ -75,15 +64,6 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
case <-r.EventCh():
}

if _, err := safe.ReaderGetByID[*runtimeres.MountStatus](ctx, r, constants.EphemeralPartitionLabel); err != nil {
if state.IsNotFoundError(err) {
// wait for the mount status to be available
continue
}

return fmt.Errorf("failed to get mount status: %w", err)
}

discoveredVolumes, err := safe.ReaderListAll[*block.DiscoveredVolume](ctx, r)
if err != nil {
return fmt.Errorf("failed to list discovered volumes: %w", err)
Expand All @@ -92,17 +72,19 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
var multiErr error

for iterator := discoveredVolumes.Iterator(); iterator.Next(); {
if _, ok := ctrl.seenVolumes[iterator.Value().Metadata().ID()]; ok {
continue
}

if iterator.Value().TypedSpec().Name != "lvm2-pv" {
// if the volume is not an LVM volume the moment we saw it, we can skip it
// we need to activate the volumes only on reboot, not when they are first formatted
ctrl.seenVolumes[iterator.Value().Metadata().ID()] = struct{}{}

continue
}

logger.Info("checking device for LVM volume activation", zap.String("device", iterator.Value().TypedSpec().DevPath))
if _, ok := ctrl.seenVolumes[iterator.Value().Metadata().ID()]; ok {
continue
}

logger.Debug("checking device for LVM volume activation", zap.String("device", iterator.Value().TypedSpec().DevPath))

vgName, err := ctrl.checkVGNeedsActivation(ctx, iterator.Value().TypedSpec().DevPath)
if err != nil {
Expand All @@ -112,8 +94,6 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
}

if vgName == "" {
ctrl.seenVolumes[iterator.Value().Metadata().ID()] = struct{}{}

continue
}

Expand All @@ -132,10 +112,10 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
"event",
vgName,
); err != nil {
return fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err)
multiErr = multierror.Append(multiErr, fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err))
} else {
ctrl.activatedVGs[vgName] = struct{}{}
}

ctrl.activatedVGs[vgName] = struct{}{}
}

if multiErr != nil {
Expand All @@ -153,7 +133,6 @@ func (ctrl *LVMActivationController) checkVGNeedsActivation(ctx context.Context,
"/sbin/lvm",
"pvscan",
"--cache",
"--verbose",
"--listvg",
"--checkcomplete",
"--vgonline",
Expand All @@ -166,11 +145,19 @@ func (ctrl *LVMActivationController) checkVGNeedsActivation(ctx context.Context,
return "", fmt.Errorf("failed to check if LVM volume backed by device %s needs activation: %w", devicePath, err)
}

if strings.HasPrefix(stdOut, "LVM_VG_NAME_INCOMPLETE") {
return "", nil
}
// parse the key-value pairs from the udev output
for _, line := range strings.Split(stdOut, "\n") {
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}

vgName := strings.TrimSuffix(strings.TrimPrefix(strings.TrimSuffix(stdOut, "\n"), "LVM_VG_NAME_COMPLETE='"), "'")
value = strings.Trim(value, "'\"")

if key == "LVM_VG_NAME_COMPLETE" {
return value, nil
}
}

return vgName, nil
return "", nil
}
23 changes: 19 additions & 4 deletions internal/integration/api/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,13 @@ func (suite *VolumesSuite) TestLVMActivation() {

node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)

k8sNode, err := suite.GetK8sNodeByInternalIP(suite.ctx, node)
suite.Require().NoError(err)

nodeName := k8sNode.Name

suite.T().Logf("creating LVM volume group on node %s/%s", node, nodeName)

userDisks, err := suite.UserDisks(suite.ctx, node)
suite.Require().NoError(err)

Expand All @@ -201,6 +208,8 @@ func (suite *VolumesSuite) TestLVMActivation() {
podDef, err := suite.NewPrivilegedPod("pv-create")
suite.Require().NoError(err)

podDef = podDef.WithNodeName(nodeName)

suite.Require().NoError(podDef.Create(suite.ctx, 5*time.Minute))

defer podDef.Delete(suite.ctx) //nolint:errcheck
Expand Down Expand Up @@ -230,9 +239,13 @@ func (suite *VolumesSuite) TestLVMActivation() {
suite.Require().Contains(stdout, "Logical volume \"lv1\" created.")

defer func() {
suite.T().Logf("removing LVM volumes %s/%s", node, nodeName)

deletePodDef, err := suite.NewPrivilegedPod("pv-destroy")
suite.Require().NoError(err)

deletePodDef = deletePodDef.WithNodeName(nodeName)

suite.Require().NoError(deletePodDef.Create(suite.ctx, 5*time.Minute))

defer deletePodDef.Delete(suite.ctx) //nolint:errcheck
Expand All @@ -252,21 +265,23 @@ func (suite *VolumesSuite) TestLVMActivation() {
}
}()

suite.T().Logf("rebooting node %s/%s", node, nodeName)

// now we want to reboot the node and make sure the array is still mounted
suite.AssertRebooted(
suite.ctx, node, func(nodeCtx context.Context) error {
return base.IgnoreGRPCUnavailable(suite.Client.Reboot(nodeCtx))
}, 5*time.Minute,
)

suite.T().Logf("verifying LVM activation %s/%s", node, nodeName)

suite.Require().Eventually(func() bool {
return suite.lvmVolumeExists()
return suite.lvmVolumeExists(node)
}, 5*time.Second, 1*time.Second, "LVM volume group was not activated after reboot")
}

func (suite *VolumesSuite) lvmVolumeExists() bool {
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)

func (suite *VolumesSuite) lvmVolumeExists(node string) bool {
ctx := client.WithNode(suite.ctx, node)

disks, err := safe.StateListAll[*block.Disk](ctx, suite.Client.COSI)
Expand Down
7 changes: 7 additions & 0 deletions internal/integration/base/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func (k8sSuite *K8sSuite) WaitForEventExists(ctx context.Context, ns string, che

type podInfo interface {
Name() string
WithNodeName(nodeName string) podInfo
Create(ctx context.Context, waitTimeout time.Duration) error
Delete(ctx context.Context) error
Exec(ctx context.Context, command string) (string, string, error)
Expand All @@ -217,6 +218,12 @@ func (p *pod) Name() string {
return p.name
}

func (p *pod) WithNodeName(nodeName string) podInfo {
p.pod.Spec.NodeName = nodeName

return p
}

func (p *pod) Create(ctx context.Context, waitTimeout time.Duration) error {
_, err := p.suite.Clientset.CoreV1().Pods(p.namespace).Create(ctx, p.pod, metav1.CreateOptions{})
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ cephClusterSpec:
requests:
cpu: "500m"
memory: "1Gi"
storage:
useAllNodes: true
useAllDevices: true
config:
encryptedDevice: "true"

0 comments on commit 37ec64f

Please sign in to comment.