Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ISSUE-1051] Handle Kubelet's Wrong CSI Call Inconsistent with Real Volume Status for OBS Release 1.3 #1053

Open
wants to merge 18 commits into
base: rel/objectscale-1.3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 56 additions & 6 deletions pkg/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,15 @@ func (s *CSINodeService) NodeUnstageVolume(ctx context.Context, req *csi.NodeUns
}

currStatus := volumeCR.Spec.CSIStatus
if currStatus == apiV1.Created {
switch currStatus {
case apiV1.Failed:
ll.Warningf("Volume status: %s. Need to retry.", currStatus)
case apiV1.Created:
ll.Info("Volume has been already unstaged")
return &csi.NodeUnstageVolumeResponse{}, nil
} else if currStatus != apiV1.VolumeReady {
case apiV1.VolumeReady:
ll.Infof("Expected volume status: %s", currStatus)
default:
msg := fmt.Sprintf("current volume CR status - %s, expected to be in [%s, %s]",
currStatus, apiV1.Created, apiV1.VolumeReady)
ll.Error(msg)
Expand Down Expand Up @@ -468,8 +473,9 @@ func (s *CSINodeService) NodePublishVolume(ctx context.Context, req *csi.NodePub
}

currStatus := volumeCR.Spec.CSIStatus
// if currStatus not in [VolumeReady, Published]
if currStatus != apiV1.VolumeReady && currStatus != apiV1.Published {
if currStatus == apiV1.Failed {
ll.Warningf("Volume status: %s. Need to retry.", currStatus)
} else if currStatus != apiV1.VolumeReady && currStatus != apiV1.Published {
msg := fmt.Sprintf("current volume CR status - %s, expected to be in [%s, %s]",
currStatus, apiV1.VolumeReady, apiV1.Published)
ll.Error(msg)
Expand All @@ -488,6 +494,46 @@ func (s *CSINodeService) NodePublishVolume(ctx context.Context, req *csi.NodePub
resp, errToReturn = nil, fmt.Errorf("failed to publish volume: fake attach error %s", err.Error())
}
} else {
// will check whether srcPath is mounted, if not, need to redo NodeStageVolume
srcMounted, err := s.fsOps.IsMounted(srcPath)
if err != nil {
errMsg := fmt.Sprintf("execute IsMounted on %s with error: %s", srcPath, err.Error())
ll.Error(errMsg)
return nil, fmt.Errorf("failed to publish volume: %s", errMsg)
}
if !srcMounted {
ll.Warnf("staging path %s is not mounted! need to redo NodeStageVolume!", srcPath)
nodeStageReq := &csi.NodeStageVolumeRequest{
VolumeId: volumeID,
StagingTargetPath: req.GetStagingTargetPath(),
VolumeCapability: req.GetVolumeCapability(),
}

// unlock volume to redo NodeStageVolume
err := s.volMu.UnlockKey(req.GetVolumeId())
if err != nil {
errMsg := fmt.Sprintf("unlock volume %s to redo NodeStageVolume with error: %s", volumeID, err.Error())
ll.Error(errMsg)
return nil, fmt.Errorf("failed to publish volume: %s", errMsg)
}
nodeStageResp, err := s.NodeStageVolume(ctx, nodeStageReq)

// re-lock the volume to proceed NodePublishVolume
s.volMu.LockKey(req.GetVolumeId())

if nodeStageResp == nil && err != nil {
errMsg := fmt.Sprintf("redo NodeStageVolume on volume %s with error: %s", volumeID, err.Error())
ll.Error(errMsg)
return nil, fmt.Errorf("failed to publish volume: %s", errMsg)
}

// update the content of volume
volumeCR, err = s.crHelper.GetVolumeByID(volumeID)
if err != nil {
return nil, status.Error(codes.Internal, fmt.Sprintf("unable to get updated volume %s", volumeID))
}
}

_, isBlock := req.GetVolumeCapability().GetAccessType().(*csi.VolumeCapability_Block)
if err := s.fsOps.PrepareAndPerformMount(srcPath, dstPath, isBlock, !isBlock, mountOptions...); err != nil {
ll.Errorf("Unable to mount volume: %v", err)
Expand Down Expand Up @@ -554,8 +600,9 @@ func (s *CSINodeService) NodeUnpublishVolume(ctx context.Context, req *csi.NodeU
}

currStatus := volumeCR.Spec.CSIStatus
// if currStatus not in [VolumeReady, Published]
if currStatus != apiV1.VolumeReady && currStatus != apiV1.Published {
if currStatus == apiV1.Failed {
ll.Warningf("Volume status: %s. Need to retry.", currStatus)
} else if currStatus != apiV1.VolumeReady && currStatus != apiV1.Published {
msg := fmt.Sprintf("current volume CR status - %s, expected to be in [%s, %s]",
currStatus, apiV1.VolumeReady, apiV1.Published)
ll.Error(msg)
Expand Down Expand Up @@ -602,6 +649,9 @@ func (s *CSINodeService) NodeUnpublishVolume(ctx context.Context, req *csi.NodeU
volumeCR.Spec.Owners = owners
if len(volumeCR.Spec.Owners) == 0 {
volumeCR.Spec.CSIStatus = apiV1.VolumeReady
} else {
// ensure the Published status of volume in the successful processing
volumeCR.Spec.CSIStatus = apiV1.Published
}
if updateErr := s.k8sClient.UpdateCR(ctxWithID, volumeCR); updateErr != nil {
ll.Errorf("Unable to set volume CR status to VolumeReady: %v", updateErr)
Expand Down
38 changes: 33 additions & 5 deletions pkg/node/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ var _ = Describe("CSINodeService NodePublish()", func() {
Expect(err).To(BeNil())
Expect(volumeCR.Spec.Owners[0]).To(Equal(testPod1Name))

// publish again such volume
// publish again such volume in Failed Status
volumeCR.Spec.CSIStatus = apiV1.Failed
err = node.k8sClient.UpdateCR(testCtx, volumeCR)
resp, err = node.NodePublishVolume(testCtx, req)
Expect(resp).NotTo(BeNil())
Expect(err).To(BeNil())
Expand All @@ -111,6 +113,7 @@ var _ = Describe("CSINodeService NodePublish()", func() {
err = node.k8sClient.ReadCR(testCtx, testV1ID, "", volumeCR)
Expect(err).To(BeNil())
Expect(len(volumeCR.Spec.Owners)).To(Equal(1))
Expect(volumeCR.Spec.CSIStatus).To(Equal(apiV1.Published))
})
})

Expand Down Expand Up @@ -157,12 +160,12 @@ var _ = Describe("CSINodeService NodePublish()", func() {
Expect(err).NotTo(BeNil())
Expect(err.Error()).To(ContainSubstring("Staging Path missing in request"))
})
It("Should fail, because Volume has failed status", func() {
It("Should fail, because Volume has unexpected status", func() {
req := getNodePublishRequest(testV1ID, targetPath, *testVolumeCap)
vol1 := &vcrd.Volume{}
err := node.k8sClient.ReadCR(testCtx, testVolume1.Id, testNs, vol1)
Expect(err).To(BeNil())
vol1.Spec.CSIStatus = apiV1.Failed
vol1.Spec.CSIStatus = apiV1.Creating
err = node.k8sClient.UpdateCR(testCtx, vol1)
Expect(err).To(BeNil())

Expand Down Expand Up @@ -364,6 +367,19 @@ var _ = Describe("CSINodeService NodeUnPublish()", func() {
Expect(err).To(BeNil())
Expect(volumeCR.Spec.CSIStatus).To(Equal(apiV1.VolumeReady))
Expect(volumeCR.Spec.Owners).To(BeNil())

// unpublish again on failed volume
volumeCR.Spec.CSIStatus = apiV1.Failed
err = node.k8sClient.UpdateCR(testCtx, volumeCR)
Expect(err).To(BeNil())

resp, err = node.NodeUnpublishVolume(testCtx, req)
Expect(resp).NotTo(BeNil())
Expect(err).To(BeNil())

err = node.k8sClient.ReadCR(testCtx, testV1ID, "", volumeCR)
Expect(err).To(BeNil())
Expect(volumeCR.Spec.CSIStatus).To(Equal(apiV1.VolumeReady))
})
It("Should unpublish volume and don't change volume CR status", func() {
req := getNodeUnpublishRequest(testV1ID, targetPath1)
Expand Down Expand Up @@ -506,6 +522,18 @@ var _ = Describe("CSINodeService NodeUnStage()", func() {
err = node.k8sClient.ReadCR(testCtx, testV1ID, "", volumeCR)
Expect(err).To(BeNil())
Expect(volumeCR.Spec.CSIStatus).To(Equal(apiV1.Created))

// retry unstage on failed volume
volumeCR.Spec.CSIStatus = apiV1.Failed
err = node.k8sClient.UpdateCR(testCtx, volumeCR)
Expect(err).To(BeNil())
resp, err = node.NodeUnstageVolume(testCtx, req)
Expect(resp).NotTo(BeNil())
Expect(err).To(BeNil())

err = node.k8sClient.ReadCR(testCtx, testV1ID, "", volumeCR)
Expect(err).To(BeNil())
Expect(volumeCR.Spec.CSIStatus).To(Equal(apiV1.Created))
})
})

Expand Down Expand Up @@ -564,12 +592,12 @@ var _ = Describe("CSINodeService NodeUnStage()", func() {
Expect(volumeCR.Spec.CSIStatus).To(Equal(apiV1.Failed))
})

It("Should failed, because Volume has failed status", func() {
It("Should failed, because Volume has unexpected status", func() {
req := getNodeUnstageRequest(testV1ID, targetPath)
vol1 := &vcrd.Volume{}
err := node.k8sClient.ReadCR(testCtx, testVolume1.Id, testNs, vol1)
Expect(err).To(BeNil())
vol1.Spec.CSIStatus = apiV1.Failed
vol1.Spec.CSIStatus = apiV1.Creating
err = node.k8sClient.UpdateCR(testCtx, vol1)
Expect(err).To(BeNil())

Expand Down