doc: stage/publish flow

ydb-platform · Oct 11, 2024 · 57f3167 · 57f3167
1 parent 76587ea
commit 57f3167
Show file tree

Hide file tree

Showing 2 changed files with 71 additions and 0 deletions.
diff --git a/cloud/blockstore/tools/csi_driver/internal/driver/node.go b/cloud/blockstore/tools/csi_driver/internal/driver/node.go
@@ -616,6 +616,8 @@ func (s *nodeService) nodePublishDiskAsFilesystem(
  ctx context.Context,
  req *csi.NodePublishVolumeRequest) error {
 
+ // Fallback to previous implementation for already mounted volumes
+ // Must be removed after migration of all endpoints to the new format
  mounted, _ := s.mounter.IsMountPoint(req.StagingTargetPath)
  if !mounted {
  return s.nodePublishDiskAsFilesystemDeprecated(ctx, req)
@@ -900,6 +902,9 @@ func (s *nodeService) nodeUnstageVolume(
 
  mounted, _ := s.mounter.IsMountPoint(req.StagingTargetPath)
  if !mounted {
+ //Fallback to previous implementation for already mounted volumes to
+ // stop endpoint in nodeUnpublishVolume
+ // Must be removed after migration of all endpoints to the new format
  return nil
  }
 
@@ -951,6 +956,9 @@ func (s *nodeService) nodeUnpublishVolume(
  // because the endpoint's backend service is unknown here.
  // When we miss we get S_FALSE/S_ALREADY code (err == nil).
 
+ //Fallback to previous implementation for already mounted volumes to
+ // stop endpoint in nodeUnpublishVolume.
+ // Must be removed after migration of all endpoints to the new format
  if s.nbsClient != nil {
  _, err := s.nbsClient.StopEndpoint(ctx, &nbsapi.TStopEndpointRequest{
  UnixSocketPath: filepath.Join(endpointDir, nbsSocketName),
@@ -1366,6 +1374,8 @@ func (s *nodeService) NodeExpandVolume(
  nbdDevicePath := ""
  unixSocketPath := ""
  for _, endpoint := range listEndpointsResp.Endpoints {
+ //Fallback to previous implementation for already mounted volumes
+ // Must be removed after migration of all endpoints to the new format
  if endpoint.UnixSocketPath == unixSocketPathOld {
  nbdDevicePath = endpoint.GetNbdDeviceFile()
  unixSocketPath = unixSocketPathOld

diff --git a/cloud/blockstore/tools/csi_driver/stage-publish-unpublish-unstage-flow.md b/cloud/blockstore/tools/csi_driver/stage-publish-unpublish-unstage-flow.md
@@ -0,0 +1,61 @@
+Current CSI Driver implementation violates CSI specification in terms of stage/publish/unstage/unpublish volumes.
+At this moment StageVolume step is completely ignored and start endpoint/mounting volumes happens at PublishVolume step.
+As a result CSI Driver doesn't support ReadWriteOnce access mode in the correct way and only one pod on the same node can mount the volume,
+however it should be allowed to mount the same volume into multiple pods on the same node.
+
+According to CSI Driver specification:
+
+NodeStageVolume should mount the volume to the staging path
+NodePublishVolume should mount the volume to the target path
+NodeUnpublishVolume should unmount the volume from the target path
+NodeUnstageVolume should unmount the volume from the staging path
+As we already have current implementation of CSI Driver in production clusters we need to handle migration
+from existing implementation of mounting volumes(only NodePublishVolune/NodeUnpublishVolume is implemented)
+to the new implementation.
+
+The tricky part here is using different UnixSocketPath/InstanceId/ClientId
+for already bounded volumes and "new" volumes.
+
+Current format of UnixSocketPath: socketsDir/podId/volumeId
+New format of UnixSocketPath: socketsDir/nodeId/volumeId
+
+Current format of InstanceId: podId
+New format of InstanceId: nodeId
+
+Current format of ClientId: clientID-podId
+New format of ClientId: clientID-nodeId
+
+Possible scenarios:
+
+--------
+1. Volume was staged and published
+2. CSI Driver was updated
+3. Volume was unpublished and unstaged <- here we should handle unpublish with old unix socket path
+--------
+1. Volume was staged and published
+2. CSI Driver was updated
+3. Kubelet restart was happened
+4. CSI Driver received stage and publish for the same volume again <- here we should handle stage/publish with old unix socket path
+--------
+1. CSI Driver was updated
+2. Volume was staged and published
+3. endpoint should be start with new unix socket path
+4. Volume was unpublished and unstaged
+5. UnstageVolume should stop endpoint with new unix socket path
+--------
+1. CSI Driver was updated
+2. Volume was staged and published on the node #1 with RWO access mode
+3. Staging volume on the node #2
+4. StageVolume on the node #2 should return error
+
+
+Migration is splitted for differnt modes
+VM mode: https://github.com/ydb-platform/nbs/pull/1982
+Mount mode: https://github.com/ydb-platform/nbs/pull/2195
+Block mode: is not implemented yet
+
+After migration of all volumes to the new endpoints we can remove backward compatibility
+with old format of endpoints.
+
+External links/Documentation:
+https://github.com/container-storage-interface/spec/blob/master/spec.md#node-service-rpc