Skip to content

Commit

Permalink
Support drive anti-affinity for volumes
Browse files Browse the repository at this point in the history
Some optimal setups would require volumes to be allocated on unique disks.
ie, not more than one volume per disk.

By default, the volume scheduling algorithm choses the drive based on most
free capacity. This will end up allocating more than one volumes per disk.

This PR provides a way for such optimal setups, by using the storage class
parameters. Using a storage class with `directpv.min.io/unique-alloc-id: XXX`
parameter enables unique allocation for PVCs. This unique allocation id enables
the one-to-one cardinality for the drives and volumes.
  • Loading branch information
Praveenrajmani committed Sep 12, 2023
1 parent c26a1c1 commit 5d35d88
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 20 deletions.
68 changes: 50 additions & 18 deletions docs/volume-scheduling.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ DirectPV CSI controller selects suitable drive for `CreateVolume` request like b
3. As no `DirectPVDrive` CRD object has the requested volume, each drive is selected by
a. By requested capacity
b. By access-tier if requested
c. By topology constraints if requested
c. If unique id doesn't match
d. By topology constraints if requested
4. In the process of step (3), if more than one drive is selected, the maximum free capacity drive is picked.
5. If step (4) picks up more than one drive, a drive is randomly selected.
6. Finally the selected drive is updated with requested volume information.
Expand Down Expand Up @@ -42,23 +43,25 @@ DirectPV CSI controller selects suitable drive for `CreateVolume` request like b
│ drive │ │ matched? │ | No │ Match by │
└─────^─────┘ ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯ |<----│ access-tier │
| | Yes | │ if requested? │
| ┌───────V───────┐ | ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯
| │ Filter drives │ | | Yes
| │ by maximum │ | ╭╌╌╌╌╌╌╌V╌╌╌╌╌╌╌╮
| │ free capacity │ | │ Match by │
| └───────────────┘ | No │ topology │
| | |<----│ constraints │
| ╭╌╌╌╌╌╌╌V╌╌╌╌╌╌╌╮ | │ if requested? │
| No │ Is more than │ | ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯
+-----------│ one drive │ | | Yes
│ matched? │ | ┌───────V───────┐
╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯ | │ Append to │
| Yes +<----│ matched drives│
┌───────V───────┐ └───────────────┘
│ Return │
│ Randomly │
│ selected drive│
└───────────────┘
| ┌───────V───────┐ | ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯
| │ Filter drives │ | |
| │ by maximum │ | ╭╌╌╌╌╌╌╌V╌╌╌╌╌╌╌╌╌╮
| │ free capacity │ | Yes | Match by |
| └───────────────┘ |<----| unique-alloc-id |
| | | | if requested? |
| ╭╌╌╌╌╌╌╌V╌╌╌╌╌╌╌╮ | ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯
| No │ Is more than │ | | No
+-----------│ one drive │ | ╭╌╌╌╌╌╌╌V╌╌╌╌╌╌╌╮
| matched? │ | │ Match by │
╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯ | No │ topology │
| Yes |<----│ constraints │
┌───────V───────┐ | │ if requested? │
│ Return │ | ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯
│ Randomly │ | | Yes
│ selected drive│ | ┌───────V───────┐
└───────────────┘ | │ Append to │
+<----│ matched drives│
└───────────────┘
```

## Customizing drive selection
Expand Down Expand Up @@ -92,3 +95,32 @@ spec:
storage: 8Mi
EOF
```

## Unique allocation of drives for volumes

By default, DirectPV allocates drives for volumes based on the free capacity present on the drives. So, the drive with most free capacity gets selected for a volume provisioning request. For setups that require unique drive allocation for its volumes where one drive cannot share more than one volume, use the below steps to enable unique allocation of drives.

* Create new storage class with `directpv.min.io/unique-alloc-id: <any-uid>` using [create-storage-class.sh script](../tools/create-storage-class.sh). Below is an example:
```sh
# NOTE: The allocation id must be 47 characters or less and should be valid label value.
#
# Create new storage class 'directpv-optimal' with the label 'directpv.min.io/unique-alloc-id: 1234'
$ create-storage-class.sh directpv-optimal 'directpv.min.io/unique-alloc-id: 1234'
```

* Use newly created storage class in [volume provisioning](./volume-provisioning.md). Below is an example:
```sh
$ kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sleep-pvc
spec:
volumeMode: Filesystem
storageClassName: directpv-optimal
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 8Mi
EOF
```
6 changes: 6 additions & 0 deletions pkg/apis/directpv.min.io/types/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ const (

// SuspendLabelKey denotes if the volume is suspended.
SuspendLabelKey LabelKey = consts.GroupName + "/suspend"

// UniqueAllocIDLabelKey label key to denote the unique allocation of drives for volumes
UniqueAllocIDLabelKey LabelKey = consts.GroupName + "/unique-alloc-id"

// UniqueAllocIDLabelKeyPrefix label key prefix for unique allocation id to be set on the drive
UniqueAllocIDLabelKeyPrefix = consts.GroupName + "/unique-alloc-id-"
)

// LabelValue is a type definition for label value
Expand Down
19 changes: 19 additions & 0 deletions pkg/apis/directpv.min.io/v1beta1/drive.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package v1beta1

import (
"strconv"
"strings"

"github.com/minio/directpv/pkg/apis/directpv.min.io/types"
Expand Down Expand Up @@ -221,6 +222,24 @@ func (drive DirectPVDrive) GetNodeID() types.NodeID {
return types.NodeID(drive.getLabel(types.NodeLabelKey))
}

// HasUniqueAllocID checks if the provided allocation id is set on the drive.
func (drive *DirectPVDrive) HasUniqueAllocID(allocID string) bool {
if v, ok := drive.GetLabels()[types.UniqueAllocIDLabelKeyPrefix+allocID]; ok && v == strconv.FormatBool(true) {
return true
}
return false
}

// SetUniqueAllocID sets the provided allocation id on the drive.
func (drive *DirectPVDrive) SetUniqueAllocID(allocID string) {
drive.SetLabel(types.LabelKey(types.UniqueAllocIDLabelKeyPrefix+allocID), types.LabelValue(strconv.FormatBool(true)))
}

// RemoveUniqueAllocID removes the unique alloc ID label.
func (drive *DirectPVDrive) RemoveUniqueAllocID(allocID string) {
drive.RemoveLabel(types.LabelKey(types.UniqueAllocIDLabelKeyPrefix + allocID))
}

// SetLabel sets label to this drive.
func (drive *DirectPVDrive) SetLabel(key types.LabelKey, value types.LabelValue) bool {
values := drive.GetLabels()
Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/directpv.min.io/v1beta1/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,16 @@ func (volume DirectPVVolume) IsSuspended() bool {
return string(volume.getLabel(types.SuspendLabelKey)) == strconv.FormatBool(true)
}

// SetUniqueAllocID sets the provided allocation id on the volume.
func (volume *DirectPVVolume) SetUniqueAllocID(allocID string) {
volume.SetLabel(types.UniqueAllocIDLabelKey, types.LabelValue(allocID))
}

// GetUniqueAllocID gets the provided allocation id on the volume.
func (volume *DirectPVVolume) GetUniqueAllocID() string {
return string(volume.getLabel(types.UniqueAllocIDLabelKey))
}

// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

// DirectPVVolumeList denotes list of volumes.
Expand Down
15 changes: 14 additions & 1 deletion pkg/csi/controller/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,18 @@ func (c *Server) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
return nil, status.Errorf(codes.InvalidArgument, "unsupported filesystem type %v for volume %v", req.GetVolumeCapabilities()[0].GetMount().GetFsType(), name)
}

var uniqueAllocID string
for key, value := range req.GetParameters() {
if key == string(directpvtypes.AccessTierLabelKey) {
switch key {
case string(directpvtypes.AccessTierLabelKey):
if _, err := directpvtypes.StringsToAccessTiers(value); err != nil {
return nil, status.Errorf(codes.InvalidArgument, "unknown access-tier %v for volume %v; %v", value, name, err)
}
case string(directpvtypes.UniqueAllocIDLabelKey):
if err := validAllocID(value); err != nil {
return nil, status.Errorf(codes.InvalidArgument, "invalid %v value; %v", directpvtypes.UniqueAllocIDLabelKey, err)
}
uniqueAllocID = value
}
}

Expand Down Expand Up @@ -177,6 +184,9 @@ func (c *Server) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
drive.GetDriveName(),
size,
)
if uniqueAllocID != "" {
newVolume.SetUniqueAllocID(uniqueAllocID)
}

if _, err := client.VolumeClient().Create(ctx, newVolume, metav1.CreateOptions{}); err != nil {
if !errors.IsAlreadyExists(err) {
Expand Down Expand Up @@ -206,6 +216,9 @@ func (c *Server) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
}

if drive.AddVolumeFinalizer(req.GetName()) {
if uniqueAllocID != "" {
drive.SetUniqueAllocID(uniqueAllocID)
}
drive.Status.FreeCapacity -= size
drive.Status.AllocatedCapacity += size

Expand Down
13 changes: 13 additions & 0 deletions pkg/csi/controller/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ func matchDrive(drive *types.Drive, req *csi.CreateVolumeRequest) bool {
if len(accessTiers) > 0 && drive.GetAccessTier() != accessTiers[0] {
return false
}
case string(directpvtypes.UniqueAllocIDLabelKey):
if value != "" && drive.HasUniqueAllocID(value) {
// Do not allocate another volume with this unique alloc id
return false
}
default:
if labels[key] != value {
return false
Expand Down Expand Up @@ -159,3 +164,11 @@ func selectDrive(ctx context.Context, req *csi.CreateVolumeRequest) (*types.Driv

return &maxFreeCapacityDrives[n.Int64()], nil
}

func validAllocID(allocID string) error {
_, err := directpvtypes.NewLabelKey(directpvtypes.UniqueAllocIDLabelKeyPrefix + allocID)
if err == nil {
_, err = directpvtypes.NewLabelValue(allocID)
}
return err
}
36 changes: 36 additions & 0 deletions pkg/csi/controller/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,41 @@ func TestGetFilteredDrives(t *testing.T) {
Parameters: map[string]string{consts.GroupName + "/access-type": "hot"},
}

case15Objects := []runtime.Object{
types.NewDrive(
"drive-1",
types.DriveStatus{Status: directpvtypes.DriveStatusReady},
"node-1",
directpvtypes.DriveName("sda"),
directpvtypes.AccessTierDefault,
),
newDriveWithLabels(
"drive-2",
types.DriveStatus{
Status: directpvtypes.DriveStatusReady,
Topology: map[string]string{"node": "node1", "rack": "rack1", "zone": "zone1", "region": "region1"},
},
"node-1",
directpvtypes.DriveName("sdd"),
map[directpvtypes.LabelKey]directpvtypes.LabelValue{
consts.GroupName + "/unique-alloc-id-xxx": "true",
},
),
}
case15Request := &csi.CreateVolumeRequest{
Name: "volume-1",
Parameters: map[string]string{consts.GroupName + "/unique-alloc-id": "xxx"},
}
case15Result := []types.Drive{
*types.NewDrive(
"drive-1",
types.DriveStatus{Status: directpvtypes.DriveStatusReady},
"node-1",
directpvtypes.DriveName("sda"),
directpvtypes.AccessTierDefault,
),
}

testCases := []struct {
objects []runtime.Object
request *csi.CreateVolumeRequest
Expand All @@ -476,6 +511,7 @@ func TestGetFilteredDrives(t *testing.T) {
{case12Objects, case12Request, case12Result},
{case13Objects, case13Request, case13Result},
{case14Objects, case14Request, case14Result},
{case15Objects, case15Request, case15Result},
}

for i, testCase := range testCases {
Expand Down
4 changes: 3 additions & 1 deletion pkg/volume/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ func (handler *volumeEventHandler) releaseVolume(ctx context.Context, volume *ty

drive.Status.FreeCapacity += volume.Status.TotalCapacity
drive.Status.AllocatedCapacity = drive.Status.TotalCapacity - drive.Status.FreeCapacity

if uniqueAllocID := volume.GetUniqueAllocID(); uniqueAllocID != "" {
drive.RemoveUniqueAllocID(uniqueAllocID)
}
_, err = client.DriveClient().Update(
ctx, drive, metav1.UpdateOptions{TypeMeta: types.NewDriveTypeMeta()},
)
Expand Down

0 comments on commit 5d35d88

Please sign in to comment.