Skip to content

Commit

Permalink
Reduced time ANF driver waits for some operations
Browse files Browse the repository at this point in the history
  • Loading branch information
clintonk authored Apr 4, 2024
1 parent e991189 commit f1215ce
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 68 deletions.
13 changes: 10 additions & 3 deletions storage_drivers/azure/api/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ import (
)

const (
VolumeCreateTimeout = 10 * time.Second
VolumeCreateTimeout = 2 * time.Second
SubvolumeCreateTimeout = 10 * time.Second
SnapshotTimeout = 240 * time.Second // Snapshotter sidecar has a timeout of 5 minutes. Stay under that!
DefaultTimeout = 120 * time.Second
MaxLabelLength = 256
Expand Down Expand Up @@ -1008,6 +1009,9 @@ func (c Client) WaitForVolumeState(
volumeState = StateDeleted
return nil
}
if errors.Is(err, context.Canceled) {
return backoff.Permanent(err)
}

volumeState = ""
return fmt.Errorf("could not get volume status; %v", err)
Expand Down Expand Up @@ -1349,7 +1353,7 @@ func (c Client) DeleteVolume(ctx context.Context, filesystem *FileSystem) error
return err
}

Logc(ctx).WithFields(logFields).Debug("Volume deleted.")
Logc(ctx).WithFields(logFields).Debug("Volume deletion started.")

return nil
}
Expand Down Expand Up @@ -1488,6 +1492,9 @@ func (c Client) WaitForSnapshotState(
Logc(ctx).Debugf("Implied deletion for snapshot %s.", snapshot.Name)
return nil
}
if errors.Is(err, context.Canceled) {
return backoff.Permanent(err)
}
return fmt.Errorf("could not get snapshot status; %v", err)
}

Expand Down Expand Up @@ -1634,7 +1641,7 @@ func (c Client) DeleteSnapshot(ctx context.Context, filesystem *FileSystem, snap
return err
}

Logc(ctx).WithFields(logFields).Debug("Snapshot deleted.")
Logc(ctx).WithFields(logFields).Debug("Snapshot deletion started.")

return nil
}
Expand Down
60 changes: 34 additions & 26 deletions storage_drivers/azure/azure_anf.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,10 +582,11 @@ func (d *NASStorageDriver) initializeAzureSDKClient(

// Azure workload identity
// If cloud identity is provided and cloud provider is set to 'Azure' during the installation,
// we can use AZURE_CLIENT_ID,AZURE_TENANT_ID,AZURE_FEDERATED_TOKEN_FILE and AZURE_AUTHORITY_HOST environment variables
// injected by workload identity webhook for initialization of ANF driver.
// we can use AZURE_CLIENT_ID,AZURE_TENANT_ID,AZURE_FEDERATED_TOKEN_FILE and AZURE_AUTHORITY_HOST
// environment variables injected by workload identity webhook for initialization of ANF driver.

if os.Getenv("AZURE_CLIENT_ID") != "" && os.Getenv("AZURE_TENANT_ID") != "" && os.Getenv("AZURE_FEDERATED_TOKEN_FILE") != "" && os.Getenv("AZURE_AUTHORITY_HOST") != "" {
if os.Getenv("AZURE_CLIENT_ID") != "" && os.Getenv("AZURE_TENANT_ID") != "" &&
os.Getenv("AZURE_FEDERATED_TOKEN_FILE") != "" && os.Getenv("AZURE_AUTHORITY_HOST") != "" {
Logc(ctx).Info("Using Azure workload identity.")
} else {
// Azure managed identity
Expand Down Expand Up @@ -1336,7 +1337,8 @@ func (d *NASStorageDriver) Import(ctx context.Context, volConfig *storage.Volume
}
}

if err = d.SDK.ModifyVolume(ctx, volume, labels, &unixPermissions, &snapshotDirAccess, &modifiedExportRule); err != nil {
err = d.SDK.ModifyVolume(ctx, volume, labels, &unixPermissions, &snapshotDirAccess, &modifiedExportRule)
if err != nil {
Logc(ctx).WithField("originalName", originalName).WithError(err).Error(
"Could not import volume, volume modify failed.")
return fmt.Errorf("could not import volume %s, volume modify failed; %v", originalName, err)
Expand Down Expand Up @@ -1424,22 +1426,18 @@ func (d *NASStorageDriver) waitForVolumeCreate(ctx context.Context, volume *api.
return errors.VolumeCreatingError(err.Error())

case api.StateDeleting:
// Wait for deletion to complete
_, errDelete := d.SDK.WaitForVolumeState(
ctx, volume, api.StateDeleted, []string{api.StateError}, d.defaultTimeout(), operation)
if errDelete != nil {
Logc(ctx).WithFields(logFields).WithError(errDelete).Error(
"Volume could not be cleaned up and must be manually deleted.")
}
// Don't wait if volume is already being deleted
Logc(ctx).WithFields(logFields).WithError(err).Error(
"Volume is being cleaned up and should be recreated later.")

case api.StateError:
// Delete a failed volume
errDelete := d.SDK.DeleteVolume(ctx, volume)
if errDelete != nil {
if errDelete := d.SDK.DeleteVolume(ctx, volume); errDelete != nil {
Logc(ctx).WithFields(logFields).WithError(errDelete).Error(
"Volume could not be cleaned up and must be manually deleted.")
return multierr.Combine(err, errDelete)
} else {
Logc(ctx).WithField("volume", volume.Name).Info("Volume deleted.")
Logc(ctx).WithField("volume", volume.Name).Info("Cleanup of failed volume started.")
}

Logc(ctx).WithFields(logFields).Debugf("Volume is in %s state.", state)
Expand All @@ -1453,7 +1451,7 @@ func (d *NASStorageDriver) waitForVolumeCreate(ctx context.Context, volume *api.
}
}

return nil
return err
}

// Destroy deletes a volume.
Expand Down Expand Up @@ -1482,10 +1480,13 @@ func (d *NASStorageDriver) Destroy(ctx context.Context, volConfig *storage.Volum
Logc(ctx).WithField("volume", name).Warn("Volume already deleted.")
return nil
} else if extantVolume.ProvisioningState == api.StateDeleting {
// This is a retry, so give it more time before giving up again.
_, err = d.SDK.WaitForVolumeState(
ctx, extantVolume, api.StateDeleted, []string{api.StateError}, d.volumeCreateTimeout, api.Delete)
return err
// This is a retry, so give Docker more time before giving up again. Don't wait in other contexts.
if d.Config.DriverContext == tridentconfig.ContextDocker {
_, err = d.SDK.WaitForVolumeState(
ctx, extantVolume, api.StateDeleted, []string{api.StateError}, d.defaultTimeout(), api.Delete)
return err
}
return nil
}

// Delete the volume
Expand All @@ -1495,9 +1496,13 @@ func (d *NASStorageDriver) Destroy(ctx context.Context, volConfig *storage.Volum

Logc(ctx).WithField("volume", extantVolume.Name).Info("Volume deleted.")

// Wait for deletion to complete
_, err = d.SDK.WaitForVolumeState(ctx, extantVolume, api.StateDeleted, []string{api.StateError}, d.defaultTimeout(), api.Delete)
return err
// If Docker, wait for deletion to complete. Don't wait in other contexts.
if d.Config.DriverContext == tridentconfig.ContextDocker {
_, err = d.SDK.WaitForVolumeState(
ctx, extantVolume, api.StateDeleted, []string{api.StateError}, d.defaultTimeout(), api.Delete)
return err
}
return nil
}

// Publish the volume to the host specified in publishInfo. This method may or may not be running on the host
Expand Down Expand Up @@ -1834,10 +1839,13 @@ func (d *NASStorageDriver) DeleteSnapshot(
return err
}

// Wait for snapshot deletion to complete
return d.SDK.WaitForSnapshotState(
ctx, snapshot, extantVolume, api.StateDeleted, []string{api.StateError}, api.SnapshotTimeout,
)
// If Docker, wait for deletion to complete. Don't wait in other contexts.
if d.Config.DriverContext == tridentconfig.ContextDocker {
return d.SDK.WaitForSnapshotState(
ctx, snapshot, extantVolume, api.StateDeleted, []string{api.StateError}, api.SnapshotTimeout,
)
}
return nil
}

// List returns the list of volumes associated with this backend.
Expand Down
2 changes: 1 addition & 1 deletion storage_drivers/azure/azure_anf_subvolume.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func (d *NASBlockStorageDriver) defaultCreateTimeout() time.Duration {
case tridentconfig.ContextDocker:
return tridentconfig.DockerCreateTimeout
default:
return api.VolumeCreateTimeout
return api.SubvolumeCreateTimeout
}
}

Expand Down
4 changes: 2 additions & 2 deletions storage_drivers/azure/azure_anf_subvolume_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@ func TestSubvolumeDefaultCreateTimeout(t *testing.T) {
Expected time.Duration
}{
{tridentconfig.ContextDocker, tridentconfig.DockerCreateTimeout},
{tridentconfig.ContextCSI, api.VolumeCreateTimeout},
{"", api.VolumeCreateTimeout},
{tridentconfig.ContextCSI, api.SubvolumeCreateTimeout},
{"", api.SubvolumeCreateTimeout},
}
for _, test := range tests {
t.Run(string(test.Context), func(t *testing.T) {
Expand Down
Loading

0 comments on commit f1215ce

Please sign in to comment.