From 7a013a5e2c383e0751431bcb3ab420556ce4595a Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Tue, 5 Apr 2022 23:49:15 +0000 Subject: [PATCH 01/10] KEP-3294: Add proposal for provisioning volumes from cross-namespace snapshots --- keps/prod-readiness/sig-storage/3294.yaml | 6 + .../README.md | 1024 +++++++++++++++++ .../kep.yaml | 56 + 3 files changed, 1086 insertions(+) create mode 100644 keps/prod-readiness/sig-storage/3294.yaml create mode 100644 keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md create mode 100644 keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml diff --git a/keps/prod-readiness/sig-storage/3294.yaml b/keps/prod-readiness/sig-storage/3294.yaml new file mode 100644 index 00000000000..14c671e9baf --- /dev/null +++ b/keps/prod-readiness/sig-storage/3294.yaml @@ -0,0 +1,6 @@ +# The KEP must have an approver from the +# "prod-readiness-approvers" group +# of http://git.k8s.io/enhancements/OWNERS_ALIASES +kep-number: 3294 +alpha: + approver: @wojtek-t diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md new file mode 100644 index 00000000000..1d1edb09bf0 --- /dev/null +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md @@ -0,0 +1,1024 @@ + +# KEP-3294: Provision volumes from cross-namespace snapshots + + + + + + +- [Release Signoff Checklist](#release-signoff-checklist) +- [Summary](#summary) +- [Motivation](#motivation) + - [Goals](#goals) + - [Non-Goals](#non-goals) +- [Proposal](#proposal) + - [User Stories (Optional)](#user-stories-optional) + - [Story 1](#story-1) + - [Story 2](#story-2) + - [Notes/Constraints/Caveats (Optional)](#notesconstraintscaveats-optional) + - [Provisioning PVs from cross-namespace PVs](#provisioning-pvs-from-cross-namespace-pvs) + - [Risks and Mitigations](#risks-and-mitigations) + - [Secret Handling](#secret-handling) + - [Security](#security) +- [Design Details](#design-details) + - [Example flow of how this proposal works](#example-flow-of-how-this-proposal-works) + - [API](#api) + - [Populator implementation](#populator-implementation) + - [(a) inside the existing CSI external-provisioner](#a-inside-the-existing-csi-external-provisioner) + - [(b) as a separate populator](#b-as-a-separate-populator) + - [(1) Populate data from snapshot to provisioned PV](#1-populate-data-from-snapshot-to-provisioned-pv) + - [(2) Provision PV with data via CSI call](#2-provision-pv-with-data-via-csi-call) + - [Test Plan](#test-plan) + - [Graduation Criteria](#graduation-criteria) + - [Alpha](#alpha) + - [Beta](#beta) + - [GA](#ga) + - [Upgrade / Downgrade Strategy](#upgrade--downgrade-strategy) + - [Version Skew Strategy](#version-skew-strategy) +- [Production Readiness Review Questionnaire](#production-readiness-review-questionnaire) + - [Feature Enablement and Rollback](#feature-enablement-and-rollback) + - [Rollout, Upgrade and Rollback Planning](#rollout-upgrade-and-rollback-planning) + - [Monitoring Requirements](#monitoring-requirements) + - [Dependencies](#dependencies) + - [Scalability](#scalability) + - [Troubleshooting](#troubleshooting) +- [Implementation History](#implementation-history) +- [Drawbacks](#drawbacks) +- [Alternatives](#alternatives) +- [Infrastructure Needed (Optional)](#infrastructure-needed-optional) + + +## Release Signoff Checklist + + + +Items marked with (R) are required *prior to targeting to a milestone / release*. + +- [ ] (R) Enhancement issue in release milestone, which links to KEP dir in [kubernetes/enhancements] (not the initial KEP PR) +- [ ] (R) KEP approvers have approved the KEP status as `implementable` +- [ ] (R) Design details are appropriately documented +- [ ] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors) + - [ ] e2e Tests for all Beta API Operations (endpoints) + - [ ] (R) Ensure GA e2e tests for meet requirements for [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) + - [ ] (R) Minimum Two Week Window for GA e2e tests to prove flake free +- [ ] (R) Graduation criteria is in place + - [ ] (R) [all GA Endpoints](https://github.com/kubernetes/community/pull/1806) must be hit by [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) +- [ ] (R) Production readiness review completed +- [ ] (R) Production readiness review approved +- [ ] "Implementation History" section is up-to-date for milestone +- [ ] User-facing documentation has been created in [kubernetes/website], for publication to [kubernetes.io] +- [ ] Supporting documentation—e.g., additional design documents, links to mailing list discussions/SIG meetings, relevant PRs/issues, release notes + + + +[kubernetes.io]: https://kubernetes.io/ +[kubernetes/enhancements]: https://git.k8s.io/enhancements +[kubernetes/kubernetes]: https://git.k8s.io/kubernetes +[kubernetes/website]: https://git.k8s.io/website + +## Summary + + + +This KEP proposes a method for provisioning volumes from cross-namespace snapshots. + +## Motivation + + + +By using [volume snapshots feature](https://kubernetes.io/docs/concepts/storage/volume-snapshots/), users can provision volumes from snapshots. +However, it only works for the `VolumeSnapshot` in the same namespace, +therefore users can't provision a volume in one namespace from a `VolumeSnapshot` in the other namespace. +On the other hand, as discussed in other KEP PRs (https://github.com/kubernetes/enhancements/pull/643, +https://github.com/kubernetes/enhancements/pull/1112, and https://github.com/kubernetes/enhancements/pull/2849), there are use cases that require to share the `VolumeSnapshot` across namespaces. +For such use cases, this KEP proposes a method for provisioning volumes from cross-namespace snapshots. + +### Goals + + +- Provision of PVs from `VolumeSnapshot`s in other namespaces + +### Non-Goals + + +- Provision of PVs from PVCs in other namespaces (Please also see [Provisioning PVs from cross-namespace PVs](#provisioning-pvs-from-cross-namespace-pvs)) +- Copy or move of `VolumeSnapshot`s to other namespaces (Please also see [Alternatives](#alternatives)) +- Clone of `VolumeSnapshotContent`s + +## Proposal + + + +Define an API to specify a cross-namespace `VolumeSnapshot` as a `DataSourceRef` of a PVC and implement a generic populator for the API. + +- To specify a non-standard API as a `DataSourceRef` of a PVC, [AnyVolumeDataSource feature](https://kubernetes.io/blog/2021/08/30/volume-populators-redesigned/) is used, +- To specify a cross-namespace `VolumeSnapshot`, a new `VolumeSnapshotLink` CRD is introduced (Please also see [API](#api)), +- To restrict only allowed `VolumeSnapshot` to be consumed from other namespaces, [`ReferencePolicy` CRD](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.ReferencePolicy) is used, +- To actually populate a PV from a `VolumeSnapshot` referenced from `VolumeSnapshotLink` CRD, a populator for each CSI driver is used, +- As a reference populator implementation, [CSI external provisioner](https://github.com/kubernetes-csi/external-provisioner) is extended to handle the `VolumeSnapshotLink` CRD (Please also see [Populator implementation](#populator-implementation)). + +An initial discussion of this idea can be found [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-949929595) and PoC implementation can be found [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-958208039). + +### User Stories (Optional) + + + +#### Story 1 + +`VolumeSnapshot`s for PVCs in prod namespace are taken on a regular basis, PVCs are created from the `VolumeSnapshot`s in other test namespaces for testing. + +#### Story 2 + +The same `VolumeSnapshot`s are expected to be consumed as golden images from multiple namespaces. Using PVs as [VM images for KubeVirt](https://kubevirt.io/2020/KubeVirt-VM-Image-Usage-Patterns.html) is one of the examples of this use case. + +### Notes/Constraints/Caveats (Optional) + + + +#### Provisioning PVs from cross-namespace PVs + +The conclusion of the original discussion ([here](https://docs.google.com/document/d/17H1k4lqdtJwZSjNRaQue-FhMhyk14JA_MoURpoxha5Q/edit#bookmark=id.nj4e1ocn8b23) and [here](https://docs.google.com/document/d/17H1k4lqdtJwZSjNRaQue-FhMhyk14JA_MoURpoxha5Q/edit#bookmark=id.h1eqongxseo)) on transfer feature was that we should avoid implementing transfer of PVCs, because there will be more race conditions for PVs than snapshots. +However, we might have a room to reconsider if this cross-namespace-provision approach can solve the issue of race for PVCs, although transfer approach can't seems to resolve the issue easily. + +### Risks and Mitigations + + + +#### Secret Handling + +TBD: Unlike transfer feature, this idea doesn't require transfer of secert, but it may need to be discussed +(We would need to go back to discussion around https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202). + +#### Security + +TBD: Use of `ReferencePolicy` should remove the risk, but it may need to be discussed +(We would need to check again for [the original use case of Gateway APIs](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-919107307), and review if there are any security risks). + +## Design Details + + + +### Example flow of how this proposal works + +Let's use [Story 1](#story-1) as an example and let's assume: +- There are two namespaces, prod and test, +- Alice manages the prod namesapce and Bob manages the test namespace, +- Alice would like to allow `VolumeSnapshot` foo-backup in the prod namespace to be consumed in the test namespace for testing, +- Bob would like to create a PV for PVC foo-testing in the test namespace from the `VolumeSnapshot` foo-backup in the prod namespace. + +Once this proposal is implemented, it can be achieved by doing the following steps: + +1. In the prod namespace, Alice creates a `ReferencePolicy` bar that allows referencing to the `VolumeSnapshot` foo-backup in the prod namespace from any `VolumeSnapshotLinks` in the test namespace, + ```yaml + apiVersion: gateway.networking.k8s.io/v1alpha2 + kind: ReferencePolicy + metadata: + name: bar + namespace: prod + spec: + from: + - group: snapshot.storage.k8s.io/v1alpha1 + kind: VolumeSnapshotLink + namespace: test + to: + - group: snapshot.storage.k8s.io/v1 + kind: VolumeSnapshot + name: foo-backup + ``` +2. In the test namespace, Bob creates a `VolumeSnapshotLink` foo-link that references the `VolumeSnapshot` foo-backup in the prod namespace as a source, + ```yaml + apiVersion: snapshot.storage.k8s.io/v1alpha1 + kind: VolumeSnapshotLink + metadata: + name: foo-link + namespace: test + spec: + source: + name: foo-backup + namespace: prod + ``` +3. In the test namespace, Bob creates a `PersistentVolumeClaim` foo-testing that references the `VolumeSnapshotLink` foo-link as a data source, + ```yaml + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: foo-testing + namespace: test + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Mi + dataSourceRef: + apiGroup: snapshot.storage.k8s.io/v1alpha1 + kind: VolumeSnapshotLink + name: foo-link + volumeMode: Filesystem + ``` +4. Once the populator finds a `VolumeSnapshotLink` is specified as `dataSourceRef`, it checks all `ReferencePolicys` in `VolumeSnapshotLink.spec.source.namespace` to see if populating the `VolumeSnapshotLink.spec.source` is allowed. If it is allowed, the populator populates the volume. + +### API + +A new `VolumeSnapshotLink` CRD is introduced in `snapshot.storage.k8s.io` API group: + +```golang +type VolumeSnapshotLink struct { + metav1.TypeMeta `json:",inline"` + // +optional + metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` + + Spec VolumeSnapshotLinkSpec `json:"spec" protobuf:"bytes,2,opt,name=spec"` +} + +type VolumeSnapshotLinkList struct { + metav1.TypeMeta `json:",inline"` + // +optional + metav1.ListMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` + + Items []VolumeSnapshotLink `json:"items" protobuf:"bytes,2,rep,name=items"` +} + +// VolumeSnapshotLinkSpec describes attributes of a volume snapshot link. +type VolumeSnapshotLinkSpec struct { + // This field is immutable after creation. + Source VolumeSnapshotLinkSource `json:"source" protobuf:"bytes,1,opt,name=source"` +} + +// VolumeSnapshotLinkSource specifies a reference to VolumeSnapshot. +type VolumeSnapshotLinkSource struct { + Name string `json:"name" protobuf:"bytes,1,opt,name=name"` + Namespace string `json:"namespace" protobuf:"bytes,2,opt,name=namespace"` +} +``` + +### Populator implementation + +The populator logic can be implemented either [(a) inside the existing CSI external-provisioner](#a-inside-the-existing-csi-external-provisioner) or [(b) as a separate populator](#b-as-a-separate-populator). +Cluster admins can choose which implementation to be used per CSI driver basis. +As a reference implementation, only (a) will be implemented in the community. + +Regardless of the implementation, +- `VolumeSnapshotLink` CRD and `ReferencePolicy` CRD must exist in the cluster before the populator is deployed. +- `VolumePopulator` CRD to allow popluating from `VolumeSnapshotLink` CRD needs to be created to enable this feature, as AnyVolumeDataSource feature defines. The `VolumePopulator` CRD needed for this feature will be as follows: +```yaml +kind: VolumePopulator +apiVersion: populator.storage.k8s.io/v1beta1 +metadata: + name: volumesnapshotlink +sourceKind: + group: snapshot.storage.k8s.io + kind: VolumeSnapshotLink +``` + +#### (a) inside the existing CSI external-provisioner + +Once populator is implemented inside the existing CSI external-provisioner, the CSI external provisioner: +- Handles `VolumeSnapshotLink` CRD and `ReferencePolicy` CRD, +- Checks if `VolumeSnapshotLink` is specified as `DataSourceRef`: + - If specified, check if the access to the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` is allowed by any `ReferencePolicy`s: + - If allowed, use the `VolumeSnapshot` as a SnapshotSource to pass to the CSI driver for provision. + +To enable this feature in CSI external provisioner, `--cross-namespace-snapshot=true` +command line flag needs to be passed to the provisioner for each CSI plugin. + +#### (b) as a separate populator + +There will be two approaches to implement as a separate populator: +- [(1) Populate data from snapshot to provisioned PV](#1-populate-data-from-snapshot-to-provisioned-pv) +- [(2) Provision PV with data via CSI call](#2-provision-pv-with-data-via-csi-call) + +##### (1) Populate data from snapshot to provisioned PV + +This is a straightforward implementation that AnyVolumeDataSource feature defines. +Developers will be able to utilize lib-volume-populator to implement this way. +One of the challenges to achieve it will be how to actually copy the data from a snapshot in one namespace to an already provisioned PV that will need to be bound to a PVC in the other namespace. + +A naive implementation will be: +1. Create another PV from the snapshot in the snapshot's namespace, +2. Copy the data from the PV to somewhere accessible from any namespaces, +3. Copy the data in step 2 to the originally intended PV, +4. Delete the temporary data in step 1 and step 2. + +If the naive implementation is used, unintended transient states, for example a temporary PVC in the snapshot namespace, may be visible to users. +Also, there may be performance issues depending on where and how data is copied. + +On the other hand, althoguh it completely depends on the implementation, this approach can have advantages, like the ability to populate volumes from snapshot across different CSI drivers or the ability to efficiently copy data by using CSI driver specific way. + +There will be no generic way to implement by using this approach, because the implementations rely too much on backup tools or CSI drivers. +Therefore no community implementation of this approach will be provided. + +##### (2) Provision PV with data via CSI call + +Current CSI external provisioners provision volume regardless of the data source, therefore populators need to populate data to already provisioned PVs. +However, this behavior may be changed and provisioners may offload provisioning to populators for PV with `VolumeSnapshotLink` CRD data source. + +The implementation of provisioner and populator of this approach will be as follows: + +- Provisioner: + - Handles `VolumeSnapshotLink` CRD, + - Checks if `VolumeSnapshotLink` is specified as `DataSourceRef`: + - If specified, skip provisioning the volume + +- Populator: + - Handles `VolumeSnapshotLink` CRD and `ReferencePolicy` CRD, + - Checks if `VolumeSnapshotLink` is specified as `DataSourceRef`: + - If specified, check if the access to the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` is allowed by any `ReferencePolicy`s: + - If allowed, use the `VolumeSnapshot` as a SnapshotSource to pass to the CSI driver for provision. + +The above implementation is just separating the logics in approach (a) to two components, and it won't help improve efficiency nor simplify implementations. +Therefore, the description in this section is just for discussion purpose and won't be implemented. + +### Test Plan + + + +- For Alpha, unit tests and e2e tests for provisioning PV from `VolumeSnapshotLink` are added. + - unit tests: + - feature enabeld case: + - Verify that PV is provisioned from VS in other namsepace if allowed by ReferencePolicy + - Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferencePolicy + - feature disabled case: + - Verify that provisioning from VolumeSnapshotLink returns error if the feature is disabled + - e2e tests: + - Verify that PV is provisioned from VS in other namsepace if allowed by ReferencePolicy + - Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferencePolicy +- For Beta, scalability tests are added to exercise this feature. +- For GA, the introduced e2e tests will be promoted to conformance. + +### Graduation Criteria + + + +#### Alpha + +- Feature implemented behind a non-default command line flag of CSI external-provisioner +- Initial e2e tests completed and enabled + +#### Beta + +- Gather feedback from developers and surveys +- Additional tests are in Testgrid and linked in KEP + +#### GA + +- Allowing time for feedback + +**Note:** Generally we also wait at least two releases between beta and +GA/stable, because there's no opportunity for user feedback, or even bug reports, +in back-to-back releases. + +**For non-optional features moving to GA, the graduation criteria must include +[conformance tests].** + +[conformance tests]: https://git.k8s.io/community/contributors/devel/sig-architecture/conformance-tests.md + +### Upgrade / Downgrade Strategy + + + +- Upgrade: + - Method: Do both of the below operations: + - Specify `--cross-namespace-snapshot=true` command line flag of CSI external-provisioner + - Create `VolumePopulator` CRD to allow popluating from `VolumeSnapshotLink` CRD + - Behavior: + - Provisioning volumes from snapshots in other namespaces is enabled. +- Downgrade: + - Method: Do both of the below operations: + - Specify `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner + - Delete `VolumePopulator` CRD to deny popluating from `VolumeSnapshotLink` CRD + - Behavior: + - Provisioning volumes from snapshots in other namespaces is disabled. + +### Version Skew Strategy + + + +## Production Readiness Review Questionnaire + + + +### Feature Enablement and Rollback + + + +###### How can this feature be enabled / disabled in a live cluster? + + + +- [x] Other + - Describe the mechanism: + - Will enabling / disabling the feature require downtime of the control + plane? No. + - Will enabling / disabling the feature require downtime or reprovisioning + of a node? (Do not assume `Dynamic Kubelet Config` feature is enabled). No. + +###### Does enabling the feature change any default behavior? + + + +Yes, `VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for provisioning PV. + +###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)? + + + +Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner, and deleting `VolumePopulator` CRD to deny popluating from `VolumeSnapshotLink` CRD. + +###### What happens if we reenable the feature if it was previously rolled back? + +`VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for provisioning PV, again. + +###### Are there any tests for feature enablement/disablement? + + + +Yes, unit tests cover scenarios where the feature is disabled or enabled. + +### Rollout, Upgrade and Rollback Planning + + + +###### How can a rollout or rollback fail? Can it impact already running workloads? + + + +###### What specific metrics should inform a rollback? + + + +###### Were upgrade and rollback tested? Was the upgrade->downgrade->upgrade path tested? + + + +###### Is the rollout accompanied by any deprecations and/or removals of features, APIs, fields of API types, flags, etc.? + + + +### Monitoring Requirements + + + +###### How can an operator determine if the feature is in use by workloads? + + + +###### How can someone using this feature know that it is working for their instance? + + + +- [ ] Events + - Event Reason: +- [x] API .status + - Condition name: `Bound` for a PV that is provisioned from a PVC referencing `VolumeSnapshotLink` + - Other field: +- [x] Other (treat as last resort) + - Details: Check if a `VolumePopulator` CRD to allow popluating from `VolumeSnapshotLink` CRD exists. + +###### What are the reasonable SLOs (Service Level Objectives) for the enhancement? + + + +###### What are the SLIs (Service Level Indicators) an operator can use to determine the health of the service? + + + +- [x] Metrics + - Metric name: TBD (Need to discuss if existing metrics for "claims" queue is sufficient). + - [Optional] Aggregation method: prometheus + - Components exposing the metric: CSI external-provisioner for each CSI plugin + +###### Are there any missing metrics that would be useful to have to improve observability of this feature? + + + +Existing metrics only provides number of claims remains in the queue and number of retries for the claims. +To identify what kind of data source was specified for the claim with errors, per data source type metrics may be needed. + +### Dependencies + + + +###### Does this feature depend on any specific services running in the cluster? + + + +- Features: + - CSI feature ([GA](https://kubernetes.io/blog/2019/01/15/container-storage-interface-ga/) in v1.13) + - AnyVolumeDataSource feature ([Beta](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#api-change) in v1.24) + +- Services: + - CSI external provisioner and CSI plugins + - Usage description: + - Impact of its outage on the feature: The PV isn't provisioned. + - Impact of its degraded performance or high-error rates on the feature: Provision of PV becomes slow or error. + - volume-data-source-validator + - Usage description: + - Impact of its outage on the feature: The PV isn't populated. + - Impact of its degraded performance or high-error rates on the feature: Populating PV becomes slow or error. + +### Scalability + + + +###### Will enabling / using this feature result in any new API calls? + + +- GET `VolumeSnapshotLink` API call: + - originating component(s): CSI external-provisioner + - this API call is triggered once in each [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshotLink` is referenced from PVC. +- GET(LIST) `ReferencePolicy` API call: + - originating component(s): CSI external-provisioner + - this API call is triggered once in each [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshotLink` is referenced from PVC. + +###### Will enabling / using this feature result in introducing new API types? + + + +- API type: `VolumeSnapshotLink` CRD +- Supported number of objects per namespace (for namespace-scoped objects): TBD +(Estimated maximum number is the number of `VolumeSnapshot`s that should be shared across namespace or +the number of PVs per namespace). + +###### Will enabling / using this feature result in any new calls to the cloud provider? + + + +No. + +###### Will enabling / using this feature result in increasing size or count of the existing API objects? + + + +No. + +###### Will enabling / using this feature result in increasing time taken by any operations covered by existing SLIs/SLOs? + + + +No. + +Currently, no SLIs/SLOs are defined for PV provisioning, but no performance change is expected for existing PV provisioning by this feature. +For a new provisioning from cross-namespace snapshot, it may take more time than existing PV provisioning due to the extra API calls. + +###### Will enabling / using this feature result in non-negligible increase of resource usage (CPU, RAM, disk, IO, ...) in any components? + + + +No. + +### Troubleshooting + + + +###### How does this feature react if the API server and/or etcd is unavailable? + +Existing PV provisioning also fails. + +###### What are other known failure modes? + + + +###### What steps should be taken if SLOs are not being met to determine the problem? + +## Implementation History + + + +## Drawbacks + + + +## Alternatives + + + +- Implement transfer feature for `VolumeSnapshot`: + - Pros: + - Can have more control over the transferred `VolumeSnapshot`, like modifying and deleting + - Can potentially be used to directly clone the `VolumeSnapshot` to other namespaces for [backup use case](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-957693334) + - Cons: + - Need to handle [race conditions](https://github.com/kubernetes/enhancements/pull/2849#discussion_r682057570) + - Need to consider [referenced Secrets](https://github.com/kubernetes/enhancements/pull/2849#discussion_r692459041) from snapshot after transferred + +## Infrastructure Needed (Optional) + + diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml new file mode 100644 index 00000000000..c94244be3e5 --- /dev/null +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml @@ -0,0 +1,56 @@ +title: Provision volumes from cross-namespace snapshots +kep-number: 3294 +authors: + - "@Elbehery" + - "@mkimuram" +owning-sig: sig-storage +participating-sigs: + - sig-storage +status: provisional +creation-date: 2022-04-06 +reviewers: + - "@xing-yang" + - "@mhenriks" + - "@jsafrane" + - "@bswartz" +approvers: + - "@xing-yang" + - "@jsafrane" + - "@saad-ali" + +##### WARNING !!! ###### +# prr-approvers has been moved to its own location +# You should create your own in keps/prod-readiness +# Please make a copy of keps/prod-readiness/template/nnnn.yaml +# to keps/prod-readiness/sig-xxxxx/00000.yaml (replace with kep number) +#prr-approvers: + +see-also: + - "/keps/sig-storage/1495-volume-populators" + +# The target maturity stage in the current dev cycle for this KEP. +stage: alpha + +# The most recent milestone for which work toward delivery of this KEP has been +# done. This can be the current (upcoming) milestone, if it is being actively +# worked on. +latest-milestone: "v1.25" + +# The milestone at which this feature was, or is targeted to be, at each stage. +milestone: + alpha: "v1.25" + beta: "v1.26" + stable: "v1.28" + +# The following PRR answers are required at alpha release +# List the feature gate name and the components for which it must be enabled +#feature-gates: +# - name: MyFeature +# components: +# - kube-apiserver +# - kube-controller-manager +#disable-supported: true + +# The following PRR answers are required at beta release +#metrics: +# - my_feature_metric From 2ea852ee32bbc735592cf899aaef9c1bff477c33 Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Tue, 31 May 2022 19:40:07 +0000 Subject: [PATCH 02/10] Addressed review comments and change PRR approver --- keps/prod-readiness/sig-storage/3294.yaml | 2 +- .../README.md | 117 ++++++++++++------ .../kep.yaml | 4 +- 3 files changed, 82 insertions(+), 41 deletions(-) diff --git a/keps/prod-readiness/sig-storage/3294.yaml b/keps/prod-readiness/sig-storage/3294.yaml index 14c671e9baf..70dd5d5cbd7 100644 --- a/keps/prod-readiness/sig-storage/3294.yaml +++ b/keps/prod-readiness/sig-storage/3294.yaml @@ -3,4 +3,4 @@ # of http://git.k8s.io/enhancements/OWNERS_ALIASES kep-number: 3294 alpha: - approver: @wojtek-t + approver: "@johnbelamaric" diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md index 1d1edb09bf0..a26ef09a9f7 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md @@ -87,7 +87,7 @@ tags, and then generate with `hack/update-toc.sh`. - [Story 1](#story-1) - [Story 2](#story-2) - [Notes/Constraints/Caveats (Optional)](#notesconstraintscaveats-optional) - - [Provisioning PVs from cross-namespace PVs](#provisioning-pvs-from-cross-namespace-pvs) + - [Provisioning PVCs from cross-namespace PVCs](#provisioning-pvcs-from-cross-namespace-pvcs) - [Risks and Mitigations](#risks-and-mitigations) - [Secret Handling](#secret-handling) - [Security](#security) @@ -100,6 +100,10 @@ tags, and then generate with `hack/update-toc.sh`. - [(1) Populate data from snapshot to provisioned PV](#1-populate-data-from-snapshot-to-provisioned-pv) - [(2) Provision PV with data via CSI call](#2-provision-pv-with-data-via-csi-call) - [Test Plan](#test-plan) + - [Prerequisite testing updates](#prerequisite-testing-updates) + - [Unit tests](#unit-tests) + - [Integration tests](#integration-tests) + - [e2e tests](#e2e-tests) - [Graduation Criteria](#graduation-criteria) - [Alpha](#alpha) - [Beta](#beta) @@ -197,7 +201,7 @@ demonstrate the interest in a KEP within the wider Kubernetes community. By using [volume snapshots feature](https://kubernetes.io/docs/concepts/storage/volume-snapshots/), users can provision volumes from snapshots. However, it only works for the `VolumeSnapshot` in the same namespace, -therefore users can't provision a volume in one namespace from a `VolumeSnapshot` in the other namespace. +therefore users can't provision a persistent volume claim in one namespace from a `VolumeSnapshot` in the other namespace. On the other hand, as discussed in other KEP PRs (https://github.com/kubernetes/enhancements/pull/643, https://github.com/kubernetes/enhancements/pull/1112, and https://github.com/kubernetes/enhancements/pull/2849), there are use cases that require to share the `VolumeSnapshot` across namespaces. For such use cases, this KEP proposes a method for provisioning volumes from cross-namespace snapshots. @@ -208,7 +212,7 @@ For such use cases, this KEP proposes a method for provisioning volumes from cro List the specific goals of the KEP. What is it trying to achieve? How will we know that this has succeeded? --> -- Provision of PVs from `VolumeSnapshot`s in other namespaces +- Provision of PVCs from `VolumeSnapshot`s in other namespaces ### Non-Goals @@ -216,7 +220,7 @@ know that this has succeeded? What is out of scope for this KEP? Listing non-goals helps to focus discussion and make progress. --> -- Provision of PVs from PVCs in other namespaces (Please also see [Provisioning PVs from cross-namespace PVs](#provisioning-pvs-from-cross-namespace-pvs)) +- Provision of PVCs from PVCs in other namespaces (Please also see [Provisioning PVCs from cross-namespace PVCs](#provisioning-pvcs-from-cross-namespace-pvcs)) - Copy or move of `VolumeSnapshot`s to other namespaces (Please also see [Alternatives](#alternatives)) - Clone of `VolumeSnapshotContent`s @@ -267,10 +271,10 @@ Go in to as much detail as necessary here. This might be a good place to talk about core concepts and how they relate. --> -#### Provisioning PVs from cross-namespace PVs +#### Provisioning PVCs from cross-namespace PVCs -The conclusion of the original discussion ([here](https://docs.google.com/document/d/17H1k4lqdtJwZSjNRaQue-FhMhyk14JA_MoURpoxha5Q/edit#bookmark=id.nj4e1ocn8b23) and [here](https://docs.google.com/document/d/17H1k4lqdtJwZSjNRaQue-FhMhyk14JA_MoURpoxha5Q/edit#bookmark=id.h1eqongxseo)) on transfer feature was that we should avoid implementing transfer of PVCs, because there will be more race conditions for PVs than snapshots. -However, we might have a room to reconsider if this cross-namespace-provision approach can solve the issue of race for PVCs, although transfer approach can't seems to resolve the issue easily. +The conclusion of the original discussion ([here](https://docs.google.com/document/d/17H1k4lqdtJwZSjNRaQue-FhMhyk14JA_MoURpoxha5Q/edit#bookmark=id.nj4e1ocn8b23) and [here](https://docs.google.com/document/d/17H1k4lqdtJwZSjNRaQue-FhMhyk14JA_MoURpoxha5Q/edit#bookmark=id.h1eqongxseo)) on transfer feature was that we should avoid implementing transfer of PVCs, because there will be more race conditions for PVCs than snapshots. +However, we might have a room to reconsider if this cross-namespace-provision approach can solve the issue of race for PVCs, although transfer approach can't seem to resolve the issue easily. ### Risks and Mitigations @@ -288,13 +292,13 @@ Consider including folks who also work outside the SIG or subproject. #### Secret Handling -TBD: Unlike transfer feature, this idea doesn't require transfer of secert, but it may need to be discussed -(We would need to go back to discussion around https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202). +Unlike transfer feature, this idea doesn't need to involve any transfers of Secert, therefore there will be no issue on Secret handling. +From a populator, Secrets are only referenced through snapshots that exist in the same namespace (As commented [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202), depending on the driver implementation, there may be very little chance that some CSI drivers won't work well in a very rare situation. However, such drivers can avoid this issue separately, by turning off this feature, implementing their own populator, and so on). #### Security -TBD: Use of `ReferencePolicy` should remove the risk, but it may need to be discussed -(We would need to check again for [the original use case of Gateway APIs](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-919107307), and review if there are any security risks). +By using [`ReferencePolicy`](https://gateway-api.sigs.k8s.io/concepts/security-model/#2-referencepolicy), only allowed snapshots can be accessed beyond the namespace boundary (Please also see [original discussion on security](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-919107307)). +Therefore, no malicious user will be able to access to prohibited snapshots. ## Design Details @@ -307,9 +311,9 @@ proposal will be implemented, this is the place to discuss them. ### Example flow of how this proposal works -Let's use [Story 1](#story-1) as an example and let's assume: +Let's use [Story 1](#story-1) as an example and let's assume the following: - There are two namespaces, prod and test, -- Alice manages the prod namesapce and Bob manages the test namespace, +- Alice manages the prod namespace and Bob manages the test namespace, - Alice would like to allow `VolumeSnapshot` foo-backup in the prod namespace to be consumed in the test namespace for testing, - Bob would like to create a PV for PVC foo-testing in the test namespace from the `VolumeSnapshot` foo-backup in the prod namespace. @@ -317,7 +321,7 @@ Once this proposal is implemented, it can be achieved by doing the following ste 1. In the prod namespace, Alice creates a `ReferencePolicy` bar that allows referencing to the `VolumeSnapshot` foo-backup in the prod namespace from any `VolumeSnapshotLinks` in the test namespace, ```yaml - apiVersion: gateway.networking.k8s.io/v1alpha2 + apiVersion: gateway.networking.k8s.io/v1alpha2 kind: ReferencePolicy metadata: name: bar @@ -346,7 +350,7 @@ Once this proposal is implemented, it can be achieved by doing the following ste ``` 3. In the test namespace, Bob creates a `PersistentVolumeClaim` foo-testing that references the `VolumeSnapshotLink` foo-link as a data source, ```yaml - apiVersion: v1 + apiVersion: v1 kind: PersistentVolumeClaim metadata: name: foo-testing @@ -480,14 +484,7 @@ Therefore, the description in this section is just for discussion purpose and wo -- For Alpha, unit tests and e2e tests for provisioning PV from `VolumeSnapshotLink` are added. - - unit tests: - - feature enabeld case: - - Verify that PV is provisioned from VS in other namsepace if allowed by ReferencePolicy - - Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferencePolicy - - feature disabled case: - - Verify that provisioning from VolumeSnapshotLink returns error if the feature is disabled - - e2e tests: - - Verify that PV is provisioned from VS in other namsepace if allowed by ReferencePolicy - - Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferencePolicy -- For Beta, scalability tests are added to exercise this feature. -- For GA, the introduced e2e tests will be promoted to conformance. +[x] I/we understand the owners of the involved components may require updates to +existing tests to make this code solid enough prior to committing the changes necessary +to implement this enhancement. + +##### Prerequisite testing updates + + + +##### Unit tests + + + + + +- external-provisioner/pkg/controller/: 2022/5/31 - 81.1% + +##### Integration tests + + + +- No integration tests for csi external provisioner. + +##### e2e tests + + + +- Verify that PV is provisioned from VS in other namsepace if allowed by ReferencePolicy: +- Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferencePolicy: ### Graduation Criteria @@ -649,7 +690,7 @@ well as the [existing list] of feature gates. - [x] Other - Describe the mechanism: - Will enabling / disabling the feature require downtime of the control - plane? No. + plane? No, it won't require downtime of the entire control plane. However, it will require a downtime of each provisioner whose enablement is being changed. - Will enabling / disabling the feature require downtime or reprovisioning of a node? (Do not assume `Dynamic Kubelet Config` feature is enabled). No. @@ -660,7 +701,7 @@ Any change of default behavior may be surprising to users or break existing automations, so be extremely careful here. --> -Yes, `VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for provisioning PV. +Yes, `VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for PVC. ###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)? @@ -679,7 +720,7 @@ Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI e ###### What happens if we reenable the feature if it was previously rolled back? -`VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for provisioning PV, again. +`VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for PVC, again. ###### Are there any tests for feature enablement/disablement? diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml index c94244be3e5..f9a3b428f18 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml @@ -5,8 +5,8 @@ authors: - "@mkimuram" owning-sig: sig-storage participating-sigs: - - sig-storage -status: provisional + - sig-network +status: implementable creation-date: 2022-04-06 reviewers: - "@xing-yang" From fcfd09fb4ffde8052418abae046bc10aaac33d32 Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Thu, 9 Jun 2022 23:14:37 +0000 Subject: [PATCH 03/10] Change from ReferencePolicy to ReferenceGrant and describe details on how it is used --- .../README.md | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md index a26ef09a9f7..9d849d9eb43 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md @@ -239,7 +239,7 @@ Define an API to specify a cross-namespace `VolumeSnapshot` as a `DataSourceRef` - To specify a non-standard API as a `DataSourceRef` of a PVC, [AnyVolumeDataSource feature](https://kubernetes.io/blog/2021/08/30/volume-populators-redesigned/) is used, - To specify a cross-namespace `VolumeSnapshot`, a new `VolumeSnapshotLink` CRD is introduced (Please also see [API](#api)), -- To restrict only allowed `VolumeSnapshot` to be consumed from other namespaces, [`ReferencePolicy` CRD](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.ReferencePolicy) is used, +- To restrict only allowed `VolumeSnapshot` to be consumed from other namespaces, [`ReferenceGrant` CRD (formerly `ReferenceGrant`)](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io%2fv1alpha2.ReferenceGrant) is used, - To actually populate a PV from a `VolumeSnapshot` referenced from `VolumeSnapshotLink` CRD, a populator for each CSI driver is used, - As a reference populator implementation, [CSI external provisioner](https://github.com/kubernetes-csi/external-provisioner) is extended to handle the `VolumeSnapshotLink` CRD (Please also see [Populator implementation](#populator-implementation)). @@ -297,9 +297,19 @@ From a populator, Secrets are only referenced through snapshots that exist in th #### Security -By using [`ReferencePolicy`](https://gateway-api.sigs.k8s.io/concepts/security-model/#2-referencepolicy), only allowed snapshots can be accessed beyond the namespace boundary (Please also see [original discussion on security](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-919107307)). +By using [`ReferenceGrant`](https://gateway-api.sigs.k8s.io/concepts/security-model/#2-referencegrant), only allowed snapshots can be accessed beyond the namespace boundary (Please also see [original discussion on security](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-919107307)). Therefore, no malicious user will be able to access to prohibited snapshots. +In addition, there will be cases that `ReferenceGrant` may be created/deleted/re-created while `VolumeSnapshotLink` is being handled, however, no inconsistent behavior will be expected, as described below. + +1. No `ReferenceGrant` for a `VolumeSnapshotLink` exists when the `VolumeSnapshotLink` is handled, and the `ReferenceGrant` is created later + A controller won't use the `VolumeSnapshot` until the `ReferenceGrant` that allows the access is created. + +2. `ReferenceGrant` for a `VolumeSnapshotLink` exists when the `VolumeSnapshotLink` is handled, and the `ReferenceGrant` is deleted later + A controller will use the `VolumeSnapshot` if the `ReferenceGrant` that allows the access exists when it checks. + If all the processes succeed without any error, it succeeds even the `ReferenceGrant` is deleted in the middle of the processes. + If any errors happened in the processes and the controller retries, it may detect that there is no `ReferenceGrant` . Then, it won't use the `VolumeSnapshot` until the `ReferenceGrant` that allows the access is re-created. + ## Design Details -- Verify that PV is provisioned from VS in other namsepace if allowed by ReferencePolicy: -- Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferencePolicy: +- Verify that PV is provisioned from VS in other namsepace if allowed by ReferenceGrant: +- Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferenceGrant: ### Graduation Criteria @@ -918,7 +931,7 @@ Focusing mostly on: - GET `VolumeSnapshotLink` API call: - originating component(s): CSI external-provisioner - this API call is triggered once in each [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshotLink` is referenced from PVC. -- GET(LIST) `ReferencePolicy` API call: +- GET(LIST) `ReferenceGrant` API call: - originating component(s): CSI external-provisioner - this API call is triggered once in each [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshotLink` is referenced from PVC. From 602e9af9d1e478cdb66a4588c4789e279a16ca76 Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Thu, 9 Jun 2022 23:53:30 +0000 Subject: [PATCH 04/10] Add links to PoC in implementation section --- .../README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md index 9d849d9eb43..8eda87893f8 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md @@ -446,6 +446,8 @@ Once populator is implemented inside the existing CSI external-provisioner, the To enable this feature in CSI external provisioner, `--cross-namespace-snapshot=true` command line flag needs to be passed to the provisioner for each CSI plugin. +The [initial PoC implementation](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-958208039) uses this approach, but it doesn't implements the command line flag to enable/disable this feature. + #### (b) as a separate populator There will be two approaches to implement as a separate populator: @@ -472,6 +474,8 @@ On the other hand, althoguh it completely depends on the implementation, this ap There will be no generic way to implement by using this approach, because the implementations rely too much on backup tools or CSI drivers. Therefore no community implementation of this approach will be provided. +Note that a PoC implementation for this approach can be found [here](https://github.com/kubernetes-csi/lib-volume-populator/pull/31). It works only for csi-hostpath driver and is intended to be just for discussion purpose. + ##### (2) Provision PV with data via CSI call Current CSI external provisioners provision volume regardless of the data source, therefore populators need to populate data to already provisioned PVs. @@ -493,6 +497,12 @@ The implementation of provisioner and populator of this approach will be as foll The above implementation is just separating the logics in approach (a) to two components, and it won't help improve efficiency nor simplify implementations. Therefore, the description in this section is just for discussion purpose and won't be implemented. +A PoC implementation for this approach, forking exisiting provisioner and modify it to only handle `VolumeSnapshotLink`, can be found [here](https://github.com/mkimuram/external-provisioner/commits/separate-controller). +Note that just to separate the containers for normal provision and provision from `VolumeSnapshotLink`, we don't need to fork the codes, instead we can use a command line option. +Fork is only needed if we need to keep the existing CSI external provisioner codes separated from this feature. + +In addition, there may be an alternative way to populate to an already provisioned volume via CSI call from populator, if CSI spec is modified to allow populating an already provisioned volume. + ### Test Plan -#### Secret Handling +#### `Secret` Handling -Unlike transfer feature, this idea doesn't need to involve any transfers of Secert, therefore there will be no issue on Secret handling. -From a populator, Secrets are only referenced through snapshots that exist in the same namespace (As commented [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202), depending on the driver implementation, there may be very little chance that some CSI drivers won't work well in a very rare situation. However, such drivers can avoid this issue separately, by turning off this feature, implementing their own populator, and so on). +Unlike transfer feature, this idea doesn't need to involve any transfers of `Secret`, therefore there will be no issue on `Secret` handling. +From a populator, `Secret`s are only referenced through snapshots that exist in the same namespace (As commented [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202), depending on the driver implementation, there may be very little chance that some CSI drivers won't work well in a very rare situation. However, such drivers can avoid this issue separately, by turning off this feature, implementing their own populator, and so on). #### Security @@ -313,11 +313,11 @@ In addition, there will be cases that `ReferenceGrant` may be created/deleted/re #### Conflict on installing `VolumePopulator` CR for `VolumeSnapshotLink` across CSI drivers -This feature requires installing VolumePopulator` CR for `VolumeSnapshotLink` and is enabled per CSI driver basis. +This feature requires installing `VolumePopulator` CR for `VolumeSnapshotLink` and is enabled per CSI driver basis. Therefore, on enabling this feature for each CSI driver, it is expected that `VolumePopulator` CR for `VolumeSnapshotLink` is created before each CSI driver installation. As a result, there may be a conflict in creating it for each driver, if there are any differences in their definitions, like alpha API and beta API. -To avoid this issue, it should be avoided to manage VolumePopulator` CR for `VolumeSnapshotLink` in each CSI driver's repository. +To avoid this issue, it should be avoided to manage `VolumePopulator` CR for `VolumeSnapshotLink` in each CSI driver's repository. It should be managed in another single repository and the same CR should be used per cluster basis. ## Design Details @@ -388,7 +388,7 @@ Once this proposal is implemented, it can be achieved by doing the following ste volumeMode: Filesystem ``` 4. Once the populator finds a `VolumeSnapshotLink` is specified as `dataSourceRef`, it checks all `ReferenceGrants` in `VolumeSnapshotLink.spec.source.namespace` to see if populating the `VolumeSnapshotLink.spec.source` is allowed. If it is allowed, the populator populates the volume. - Note that how `ReferenceGrant` is checked depends on the implementation, however controllers that are trying to use the `VolumeSnapshot` in another namespace must check `ReferenceGrant` if the access is allowed, before it actually starts exposing the data and metadata from the `VolumeSnapshot` to the `VolumeSnapshotLink`'s namespace. + Note that how `ReferenceGrant` is checked depends on the implementation, however controllers that are trying to use the `VolumeSnapshot` in another namespace must check `ReferenceGrant` if the access is allowed before it actually starts exposing the data and metadata from the `VolumeSnapshot` to the `VolumeSnapshotLink`'s namespace. ### API @@ -434,7 +434,7 @@ As a reference implementation, only (a) will be implemented in the community. Regardless of the implementation, - `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD must exist in the cluster before the populator is deployed. -- `VolumePopulator` CR to allow popluating from `VolumeSnapshotLink` CRD needs to be created to enable this feature, as AnyVolumeDataSource feature defines. The `VolumePopulator` CR needed for this feature will be as follows: +- `VolumePopulator` CR to allow populating from `VolumeSnapshotLink` CRD needs to be created to enable this feature, as AnyVolumeDataSource feature defines. The `VolumePopulator` CR needed for this feature will be as follows: ```yaml kind: VolumePopulator apiVersion: populator.storage.k8s.io/v1beta1 @@ -468,7 +468,7 @@ There will be two approaches to implement as a separate populator: This is a straightforward implementation that AnyVolumeDataSource feature defines. Developers will be able to utilize lib-volume-populator to implement this way. -One of the challenges to achieve it will be how to actually copy the data from a snapshot in one namespace to an already provisioned PV that will need to be bound to a PVC in the other namespace. +One of the challenges to achieve it will be how to copy the data from a snapshot in one namespace to an already provisioned PV that will need to be bound to a PVC in the other namespace. A naive implementation will be: 1. Create another PV from the snapshot in the snapshot's namespace, @@ -479,10 +479,10 @@ A naive implementation will be: If the naive implementation is used, unintended transient states, for example a temporary PVC in the snapshot namespace, may be visible to users. Also, there may be performance issues depending on where and how data is copied. -On the other hand, althoguh it completely depends on the implementation, this approach can have advantages, like the ability to populate volumes from snapshot across different CSI drivers or the ability to efficiently copy data by using CSI driver specific way. +On the other hand, although it completely depends on the implementation, this approach can have advantages, like the ability to populate volumes from snapshot across different CSI drivers or the ability to efficiently copy data by using CSI driver specific way. There will be no generic way to implement by using this approach, because the implementations rely too much on backup tools or CSI drivers. -Therefore no community implementation of this approach will be provided. +Therefore, no community implementation of this approach will be provided. Note that a PoC implementation for this approach can be found [here](https://github.com/kubernetes-csi/lib-volume-populator/pull/31). It works only for csi-hostpath driver and is intended to be just for discussion purpose. @@ -507,7 +507,7 @@ The implementation of provisioner and populator of this approach will be as foll The above implementation is just separating the logics in approach (a) to two components, and it won't help improve efficiency nor simplify implementations. Therefore, the description in this section is just for discussion purpose and won't be implemented. -A PoC implementation for this approach, forking exisiting provisioner and modify it to only handle `VolumeSnapshotLink`, can be found [here](https://github.com/mkimuram/external-provisioner/commits/separate-controller). +A PoC implementation for this approach, forking existing provisioner and modify it to only handle `VolumeSnapshotLink`, can be found [here](https://github.com/mkimuram/external-provisioner/commits/separate-controller). Note that just to separate the containers for normal provision and provision from `VolumeSnapshotLink`, we don't need to fork the codes, instead we can use a command line option. Fork is only needed if we need to keep the existing CSI external provisioner codes separated from this feature. @@ -580,8 +580,8 @@ https://storage.googleapis.com/k8s-triage/index.html We expect no non-infra related flakes in the last month as a GA graduation criteria. --> -- Verify that PV is provisioned from VS in other namsepace if allowed by ReferenceGrant: -- Verify that PV isn't provisioned from VS in other namsepace if not allowed by ReferenceGrant: +- Verify that PV is provisioned from VS in other namespace and bound to PVC if allowed by ReferenceGrant: +- Verify that PV isn't provisioned from VS in other namespace and isn't bound to PVC if not allowed by ReferenceGrant: ### Graduation Criteria @@ -651,15 +651,15 @@ enhancement: --> - Upgrade: - - Method: Do both of the below operations: + - Method: Do both of the following operations: - Specify `--cross-namespace-snapshot=true` command line flag of CSI external-provisioner - - Create `VolumePopulator` CRD to allow popluating from `VolumeSnapshotLink` CRD + - Create `VolumePopulator` CRD to allow populating from `VolumeSnapshotLink` CRD - Behavior: - Provisioning volumes from snapshots in other namespaces is enabled. - Downgrade: - - Method: Do both of the below operations: + - Method: Do both of the following operations: - Specify `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner - - Delete `VolumePopulator` CRD to deny popluating from `VolumeSnapshotLink` CRD + - Delete `VolumePopulator` CRD to deny populating from `VolumeSnapshotLink` CRD - Behavior: - Provisioning volumes from snapshots in other namespaces is disabled. @@ -749,7 +749,7 @@ feature. NOTE: Also set `disable-supported` to `true` or `false` in `kep.yaml`. --> -Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner, and deleting `VolumePopulator` CRD to deny popluating from `VolumeSnapshotLink` CRD. +Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner and deleting `VolumePopulator` CRD to deny popluating from `VolumeSnapshotLink` CRD. ###### What happens if we reenable the feature if it was previously rolled back? @@ -845,7 +845,7 @@ Recall that end users cannot usually observe component logs or access metrics. - Condition name: `Bound` for a PV that is provisioned from a PVC referencing `VolumeSnapshotLink` - Other field: - [x] Other (treat as last resort) - - Details: Check if a `VolumePopulator` CRD to allow popluating from `VolumeSnapshotLink` CRD exists. + - Details: Check if a `VolumePopulator` CRD to allow populating from `VolumeSnapshotLink` CRD exists. ###### What are the reasonable SLOs (Service Level Objectives) for the enhancement? From dfa580d19f1f68590523d12c09051329eb2c9f55 Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Wed, 22 Jun 2022 14:32:04 +0000 Subject: [PATCH 07/10] Fix PRR section --- .../README.md | 17 ++++++++++++----- .../kep.yaml | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md index 7d97025aa3b..c6c9d955671 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md @@ -721,7 +721,7 @@ well as the [existing list] of feature gates. --> - [x] Other - - Describe the mechanism: + - Describe the mechanism: By specifying `--cross-namespace-snapshot=true` command line flag of CSI external-provisioner and creating `VolumePopulator` CR to allow populating from `VolumeSnapshotLink` CR. - Will enabling / disabling the feature require downtime of the control plane? No, it won't require downtime of the entire control plane. However, it will require a downtime of each provisioner whose enablement is being changed. - Will enabling / disabling the feature require downtime or reprovisioning @@ -734,7 +734,8 @@ Any change of default behavior may be surprising to users or break existing automations, so be extremely careful here. --> -Yes, `VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for PVC. +No. Existing behavior of provisioning volumes from PVC and `VolumeSnapshot` remains unchanged. +By enabling the feature, `VolumeSnapshotLink` CR can be used as a `DataSourceRef` for PVC, in addition to the existing data sources. ###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)? @@ -749,11 +750,17 @@ feature. NOTE: Also set `disable-supported` to `true` or `false` in `kep.yaml`. --> -Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner and deleting `VolumePopulator` CRD to deny popluating from `VolumeSnapshotLink` CRD. +Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner and deleting `VolumePopulator` CR to deny populating from `VolumeSnapshotLink` CR. + +- After `--cross-namespace-snapshot=false` command line flag is specified: CSI external-provisioner for the csi driver doesn't provision volume from `VolumeSnapshotLink` CR. +- After `VolumePopulator` CR for `VolumeSnapshotLink` CR is deleted: volume-data-source-validator adds an event to PVC that `VolumeSnapshotLink`isn't a valid data source. ###### What happens if we reenable the feature if it was previously rolled back? -`VolumeSnapshotLink` CRD can be used as a `DataSourceRef` for PVC, again. +`VolumeSnapshotLink` CR can be used as a `DataSourceRef` for PVC, again. + +PVCs with `VolumeSnapshotLink` data source may be created while the feature is disabled (and volumes for the PVCs aren't provisioned). +After the feature is reenabled, volumes for the PVCs will be provisioned retrospectively. ###### Are there any tests for feature enablement/disablement? @@ -770,7 +777,7 @@ You can take a look at one potential example of such test in: https://github.com/kubernetes/kubernetes/pull/97058/files#diff-7826f7adbc1996a05ab52e3f5f02429e94b68ce6bce0dc534d1be636154fded3R246-R282 --> -Yes, unit tests cover scenarios where the feature is disabled or enabled. +No. Unit tests that cover scenarios where the feature is disabled or enabled will be added. ### Rollout, Upgrade and Rollback Planning diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml index f9a3b428f18..31dfc560fa7 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml @@ -49,7 +49,7 @@ milestone: # components: # - kube-apiserver # - kube-controller-manager -#disable-supported: true +disable-supported: true # The following PRR answers are required at beta release #metrics: From 0be0ea78d102f6bebf23304a74e7ab4de24e3dd1 Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Mon, 26 Sep 2022 16:28:15 +0900 Subject: [PATCH 08/10] Update milestones --- .../kep.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml index 31dfc560fa7..c5dcd1b7408 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml @@ -34,13 +34,13 @@ stage: alpha # The most recent milestone for which work toward delivery of this KEP has been # done. This can be the current (upcoming) milestone, if it is being actively # worked on. -latest-milestone: "v1.25" +latest-milestone: "v1.26" # The milestone at which this feature was, or is targeted to be, at each stage. milestone: - alpha: "v1.25" - beta: "v1.26" - stable: "v1.28" + alpha: "v1.26" + beta: "v1.27" + stable: "v1.29" # The following PRR answers are required at alpha release # List the feature gate name and the components for which it must be enabled From bb74d140e54e254db107029d449e7eb83e67f84e Mon Sep 17 00:00:00 2001 From: Masaki Kimura Date: Tue, 4 Oct 2022 17:00:57 +0900 Subject: [PATCH 09/10] Addressed review comments - Add description on "ReferenceGrant API change" in "Risks and Mitigations" section and mark it a beta blocker - Allow not specifying namespace in `VolumeSnapshotLink` and provisioning from it without `ReferenceGrant` - Clearly describe that `ReferenceGrant` check continues until it is found - Add description on core API change to allow specifying namespace in "Alternatives" section --- .../README.md | 49 ++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md index c6c9d955671..db27e3b0b58 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/README.md @@ -92,6 +92,7 @@ tags, and then generate with `hack/update-toc.sh`. - [Secret Handling](#-handling) - [Security](#security) - [Conflict on installing VolumePopulator CR for VolumeSnapshotLink across CSI drivers](#conflict-on-installing--cr-for--across-csi-drivers) + - [ReferenceGrant API change](#referencegrant-api-change) - [Design Details](#design-details) - [Example flow of how this proposal works](#example-flow-of-how-this-proposal-works) - [API](#api) @@ -320,6 +321,14 @@ As a result, there may be a conflict in creating it for each driver, if there ar To avoid this issue, it should be avoided to manage `VolumePopulator` CR for `VolumeSnapshotLink` in each CSI driver's repository. It should be managed in another single repository and the same CR should be used per cluster basis. +#### ReferenceGrant API change + +Currently, `ReferenceGrant` exists in `gateway.networking.k8s.io` API, so users may be confused by using networking API for storage purpose. +To avoid it, `ReferenceGrant` is planned to be moved to more generic place. +As a result, `ReferenceGrant` API will be changed in the future, so `VolumeSnapshotLink` API will also need to be changed. + +Changes in `VolumeSnapshotLink` API related to `ReferenceGrant` API changes will be a beta blocker. + ## Design Details @@ -237,13 +241,8 @@ The "Design Details" section below is for the real nitty-gritty. --> -Define an API to specify a cross-namespace `VolumeSnapshot` as a `DataSourceRef` of a PVC and implement a generic populator for the API. - -- To specify a non-standard API as a `DataSourceRef` of a PVC, [AnyVolumeDataSource feature](https://kubernetes.io/blog/2021/08/30/volume-populators-redesigned/) is used, -- To specify a cross-namespace `VolumeSnapshot`, a new `VolumeSnapshotLink` CRD is introduced (Please also see [API](#api)), -- To restrict only allowed `VolumeSnapshot` to be consumed from other namespaces, [`ReferenceGrant` CRD (formerly `ReferencePolicy`)](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io%2fv1alpha2.ReferenceGrant) is used, -- To populate a PV from a `VolumeSnapshot` referenced from `VolumeSnapshotLink` CRD, a populator for each CSI driver is used, -- As a reference populator implementation, [CSI external provisioner](https://github.com/kubernetes-csi/external-provisioner) is extended to handle the `VolumeSnapshotLink` CRD (Please also see [Populator implementation](#populator-implementation)). +Extend `PersistentVolumeClaim` API to handle a cross-namespace `VolumeSnapshot` as a data source and extend [CSI external provisioner](https://github.com/kubernetes-csi/external-provisioner) to handle it. +To restrict only allowed `VolumeSnapshot` to be consumed from other namespaces, [`ReferenceGrant` CRD (formerly `ReferencePolicy`)](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io%2fv1alpha2.ReferenceGrant) is used. An initial discussion of this idea can be found [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-949929595) and PoC implementation can be found [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-958208039). @@ -295,39 +294,33 @@ Consider including folks who also work outside the SIG or subproject. #### `Secret` Handling Unlike transfer feature, this idea doesn't need to involve any transfers of `Secret`, therefore there will be no issue on `Secret` handling. -From a populator, `Secret`s are only referenced through snapshots that exist in the same namespace (As commented [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202), depending on the driver implementation, there may be very little chance that some CSI drivers won't work well in a very rare situation. However, such drivers can avoid this issue separately, by turning off this feature, implementing their own populator, and so on). +From a populator, `Secret`s are only referenced through snapshots that exist in the same namespace (As commented [here](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-962168202), depending on the driver implementation, there may be very little chance that some CSI drivers won't work well in a very rare situation). #### Security By using [`ReferenceGrant`](https://gateway-api.sigs.k8s.io/concepts/security-model/#2-referencegrant), only allowed snapshots can be accessed beyond the namespace boundary (Please also see [original discussion on security](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-919107307)). Therefore, no malicious user will be able to access to prohibited snapshots. -In addition, there will be cases that `ReferenceGrant` may be created/deleted/re-created while `VolumeSnapshotLink` is being handled, however, no inconsistent behavior will be expected, as described below. +In addition, there will be cases that `ReferenceGrant` may be created/deleted/re-created while referencing PVC is being handled, however, no inconsistent behavior will be expected, as described below. -1. No `ReferenceGrant` for a `VolumeSnapshotLink` exists when the `VolumeSnapshotLink` is handled, and the `ReferenceGrant` is created later +1. No `ReferenceGrant` for a referencing PVC exists when the PVC is handled, and the `ReferenceGrant` is created later A controller won't use the `VolumeSnapshot` until the `ReferenceGrant` that allows the access is created. -2. `ReferenceGrant` for a `VolumeSnapshotLink` exists when the `VolumeSnapshotLink` is handled, and the `ReferenceGrant` is deleted later - A controller will use the `VolumeSnapshot` if the `ReferenceGrant` that allows the access exists when it checks. +2. `ReferenceGrant` for a referencing PVC exists when the PVC is handled, and the `ReferenceGrant` is deleted later + A controller will use the PVC if the `ReferenceGrant` that allows the access exists when it checks. If all the processes succeed without any error, it succeeds even the `ReferenceGrant` is deleted in the middle of the processes. - If any errors happened in the processes and the controller retries, it may detect that there is no `ReferenceGrant` . Then, it won't use the `VolumeSnapshot` until the `ReferenceGrant` that allows the access is re-created. - -#### Conflict on installing `VolumePopulator` CR for `VolumeSnapshotLink` across CSI drivers - -This feature requires installing `VolumePopulator` CR for `VolumeSnapshotLink` and is enabled per CSI driver basis. -Therefore, on enabling this feature for each CSI driver, it is expected that `VolumePopulator` CR for `VolumeSnapshotLink` is created before each CSI driver installation. -As a result, there may be a conflict in creating it for each driver, if there are any differences in their definitions, like alpha API and beta API. - -To avoid this issue, it should be avoided to manage `VolumePopulator` CR for `VolumeSnapshotLink` in each CSI driver's repository. -It should be managed in another single repository and the same CR should be used per cluster basis. + If any errors happened in the processes and the controller retries, it may detect that there is no `ReferenceGrant` . Then, it won't use the PVC until the `ReferenceGrant` that allows the access is re-created. #### ReferenceGrant API change Currently, `ReferenceGrant` exists in `gateway.networking.k8s.io` API, so users may be confused by using networking API for storage purpose. To avoid it, `ReferenceGrant` is planned to be moved to more generic place. -As a result, `ReferenceGrant` API will be changed in the future, so `VolumeSnapshotLink` API will also need to be changed. +As a result, `ReferenceGrant` API will be changed in the future, so csi external provisioner implementation will also need to be changed. + +In addition, it will be better that library for checking `ReferenceGrant` is provided in a generic place. +Depending on the implementation of the library code, consuming API may need to be changed. -Changes in `VolumeSnapshotLink` API related to `ReferenceGrant` API changes will be a beta blocker. +Therefore, changes in `VolumeSnapshotLink` API related to `ReferenceGrant` API changes will be a beta blocker. ## Design Details @@ -348,7 +341,7 @@ Let's use [Story 1](#story-1) as an example and let's assume the following: Once this proposal is implemented, it can be achieved by doing the following steps: -1. In the prod namespace, Alice creates a `ReferenceGrant` bar that allows referencing to the `VolumeSnapshot` foo-backup in the prod namespace from any `VolumeSnapshotLinks` in the test namespace, +1. In the prod namespace, Alice creates a `ReferenceGrant` bar that allows referencing to the `VolumeSnapshot` foo-backup in the prod namespace from any `PersistentVolumeClaim` in the test namespace, ```yaml apiVersion: gateway.networking.k8s.io/v1alpha2 kind: ReferenceGrant @@ -357,27 +350,14 @@ Once this proposal is implemented, it can be achieved by doing the following ste namespace: prod spec: from: - - group: snapshot.storage.k8s.io/v1alpha1 - kind: VolumeSnapshotLink + - kind: PersistentVolumeClaim namespace: test to: - group: snapshot.storage.k8s.io/v1 kind: VolumeSnapshot name: foo-backup ``` -2. In the test namespace, Bob creates a `VolumeSnapshotLink` foo-link that references the `VolumeSnapshot` foo-backup in the prod namespace as a source, - ```yaml - apiVersion: snapshot.storage.k8s.io/v1alpha1 - kind: VolumeSnapshotLink - metadata: - name: foo-link - namespace: test - spec: - source: - name: foo-backup - namespace: prod - ``` -3. In the test namespace, Bob creates a `PersistentVolumeClaim` foo-testing that references the `VolumeSnapshotLink` foo-link as a data source, +2. In the test namespace, Bob creates a `PersistentVolumeClaim` foo-testing that references the `VolumeSnapshot` foo-backup in prod namespace as a data source, ```yaml apiVersion: v1 kind: PersistentVolumeClaim @@ -390,148 +370,91 @@ Once this proposal is implemented, it can be achieved by doing the following ste resources: requests: storage: 10Mi - dataSourceRef: - apiGroup: snapshot.storage.k8s.io/v1alpha1 - kind: VolumeSnapshotLink - name: foo-link + dataSourceRef2: + apiGroup: snapshot.storage.k8s.io/v1 + kind: VolumeSnapshot + name: foo-backup + namespace: prod volumeMode: Filesystem ``` -4. Once the populator finds a `VolumeSnapshotLink` is specified as `dataSourceRef`, it checks all `ReferenceGrants` in `VolumeSnapshotLink.spec.source.namespace` to see if populating the `VolumeSnapshotLink.spec.source` is allowed. If it is allowed, the populator populates the volume. +3. Once the csi-provisioner finds a `VolumeSnapshot` is specified with non-empty namespace as `dataSourceRef2`, it checks all `ReferenceGrants` in `PersistentVolumeClaim.spec.dataSourceRef2.namespace` to see if access to the snapshot is allowed. If it is allowed, the csi-provisioner provisions a volume from the snapshot. Note that how `ReferenceGrant` is checked depends on the implementation, however controllers that are trying to use the `VolumeSnapshot` in another namespace must check `ReferenceGrant` if the access is allowed before it actually starts exposing the data and metadata from the `VolumeSnapshot` to the `VolumeSnapshotLink`'s namespace. ### API -A new `VolumeSnapshotLink` CRD is introduced in `snapshot.storage.k8s.io` API group: +A new `DataSourceRef2` field is added to `PersistentVolumeClaimSpec`: ```golang -type VolumeSnapshotLink struct { - metav1.TypeMeta `json:",inline"` - // +optional - metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` - - Spec VolumeSnapshotLinkSpec `json:"spec" protobuf:"bytes,2,opt,name=spec"` -} +type PersistentVolumeClaimSpec struct { -type VolumeSnapshotLinkList struct { - metav1.TypeMeta `json:",inline"` - // +optional - metav1.ListMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` + . + . + . - Items []VolumeSnapshotLink `json:"items" protobuf:"bytes,2,rep,name=items"` -} - -// VolumeSnapshotLinkSpec describes attributes of a volume snapshot link. -type VolumeSnapshotLinkSpec struct { - // This field is immutable after creation. - Source VolumeSnapshotLinkSource `json:"source" protobuf:"bytes,1,opt,name=source"` + DataSourceRef2 *TypedCrossNamespaceObjectReference } +``` -// VolumeSnapshotLinkSource specifies a reference to VolumeSnapshot. -type VolumeSnapshotLinkSource struct { - Name string `json:"name" protobuf:"bytes,1,opt,name=name"` - // Namespace is the namespace of the snapshot. When unspecified, the local namespace is inferred. +```golang +type TypedCrossNamespaceObjectReference struct { + // APIGroup is the group for the resource being referenced. + // If APIGroup is not specified, the specified Kind must be in the core API group. + // For any other third-party types, APIGroup is required. + // +optional + APIGroup *string + // Kind is the type of resource being referenced + Kind string + // Namespace is the namespace of resource being referenced // Note that when a namespace is specified, a ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. // +optional - Namespace string `json:"namespace" protobuf:"bytes,2,opt,name=namespace"` + Namespace string + // Name is the name of resource being referenced + Name string } ``` -Note that when a `VolumeSnapshotLink.spec.source.namespace` is specified, a `ReferenceGrant` object is required in the referent namespace to allow that namespace’s owner to accept the reference. See the `ReferenceGrant` documentation for details. - -### Populator implementation - -The populator logic can be implemented either [(a) inside the existing CSI external-provisioner](#a-inside-the-existing-csi-external-provisioner) or [(b) as a separate populator](#b-as-a-separate-populator). -Cluster admins can choose which implementation to be used per CSI driver basis. -As a reference implementation, only (a) will be implemented in the community. - -Regardless of the implementation, -- `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD must exist in the cluster before the populator is deployed. -- `VolumePopulator` CR to allow populating from `VolumeSnapshotLink` CRD needs to be created to enable this feature, as AnyVolumeDataSource feature defines. The `VolumePopulator` CR needed for this feature will be as follows: -```yaml -kind: VolumePopulator -apiVersion: populator.storage.k8s.io/v1beta1 -metadata: - name: volumesnapshotlink -sourceKind: - group: snapshot.storage.k8s.io - kind: VolumeSnapshotLink -``` +### Implementation -#### (a) inside the existing CSI external-provisioner +#### CSI external provisioner -Once populator is implemented inside the existing CSI external-provisioner, the CSI external provisioner: -1. Handles `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD, -2. Datasource is handled in the below reconciler loop: - - If `VolumeSnapshotLink` isn't specified as `DataSourceRef`: Do the existing provisioner behavior. - - Otherwise: - - If namespace isn't specified in `VolumeSnapshotLink`: use the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` as a SnapshotSource to pass to the CSI driver for provision. +- If CrossNamespaceSourceProvisioning feature gate isn't enabled: Do the existing provisioner behavior. +- Otherwise: Datasource is handled in the below reconciler loop: + - If data source is specified through `DataSource` or `DataSourceRef`: Do the existing provisioner behavior for the data source. + - If data source is specified through `DataSourceRef2`: + - If namepace isn't specified: Provision a volume from the snapshot specified through DataSourceRef2`. - Otherwise: - - If the access to the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` is allowed by any `ReferenceGrant`s: Use the `VolumeSnapshot` as a SnapshotSource to pass to the CSI driver for provision. + - If the access to the `VolumeSnapshot` referenced by the `PersistentVolumeClaim` is allowed by any `ReferenceGrant`s: Provision a volume from the snapshot specified through DataSourceRef2`. - Otherwise: Do nothing (the `ReferenceGrant` check above will be repeated in the loop). -To enable this feature in CSI external provisioner, `--cross-namespace-snapshot=true` -command line flag needs to be passed to the provisioner for each CSI plugin. - -The [initial PoC implementation](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-958208039) uses this approach, but it doesn't implements the command line flag to enable/disable this feature. - -#### (b) as a separate populator - -There will be two approaches to implement as a separate populator: -- [(1) Populate data from snapshot to provisioned PV](#1-populate-data-from-snapshot-to-provisioned-pv) -- [(2) Provision PV with data via CSI call](#2-provision-pv-with-data-via-csi-call) - -##### (1) Populate data from snapshot to provisioned PV - -This is a straightforward implementation that AnyVolumeDataSource feature defines. -Developers will be able to utilize lib-volume-populator to implement this way. -One of the challenges to achieve it will be how to copy the data from a snapshot in one namespace to an already provisioned PV that will need to be bound to a PVC in the other namespace. - -A naive implementation will be: -1. Create another PV from the snapshot in the snapshot's namespace, -2. Copy the data from the PV to somewhere accessible from any namespaces, -3. Copy the data in step 2 to the originally intended PV, -4. Delete the temporary data in step 1 and step 2. - -If the naive implementation is used, unintended transient states, for example a temporary PVC in the snapshot namespace, may be visible to users. -Also, there may be performance issues depending on where and how data is copied. - -On the other hand, although it completely depends on the implementation, this approach can have advantages, like the ability to populate volumes from snapshot across different CSI drivers or the ability to efficiently copy data by using CSI driver specific way. +#### API server -There will be no generic way to implement by using this approach, because the implementations rely too much on backup tools or CSI drivers. -Therefore, no community implementation of this approach will be provided. +To provide compatibility of `PersistentVolumeClaim` API with older versions, the API servers need to properly handle `DataSourceRef2`, `DataSource`, and `DataSourceRef`. +There will be two approaches: -Note that a PoC implementation for this approach can be found [here](https://github.com/kubernetes-csi/lib-volume-populator/pull/31). It works only for csi-hostpath driver and is intended to be just for discussion purpose. +1. Eventually allow only `DataSourceRef2` fields -##### (2) Provision PV with data via CSI call +This is [a similar approach](https://github.com/kubernetes/enhancements/tree/master/keps/sig-storage/1495-volume-populators#proposal) to `DataSourceRef` and `DataSource` compatibility handling. -Current CSI external provisioners provision volume regardless of the data source, therefore populators need to populate data to already provisioned PVs. -However, this behavior may be changed and provisioners may offload provisioning to populators for PV with `VolumeSnapshotLink` CRD data source. +Deprecate the DataSourceRef field once DataSourceRef2 reaches GA, but continue to support it in a backwards-compatible way, never removing it. -The implementation of provisioner and populator of this approach will be as follows: +In particular: + 1. If DataSourceRef and DataSourceRef2 are both unset; accept + 2. If DataSourceRef is set AND DataSourceRef2 is not set; set DataSourceRef2 from DataSourceRef. + 3. If DataSourceRef is set AND DataSourceRef2 is set the same (namespace is empty); accept + 4. If DataSourceRef is set AND DataSourceRef2 is set differently; reject + 5. If DataSourceRef is not set AND DataSourceRef2 is set (namespace is empty); set DataSourceRef from DataSourceRef2 and accept + 6. If DataSourceRef is not set AND DataSourceRef2 is set (namespace is not empty); accept -- Provisioner: - 1. Handles `VolumeSnapshotLink` CRD, - 2. Checks if `VolumeSnapshotLink` is specified as `DataSourceRef`: - - If specified, skip provisioning the volume +Note that we can't set DataSourceRef from DataSourceRef2 in case 6, because namespace field can't be set to DataSourceRef. +Impact of not setting fields in this case, needs to be considered. -- Populator: - 1. Handles `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD, - 2. Datasource is handled in the below reconciler loop: - - If `VolumeSnapshotLink` isn't specified as `DataSourceRef`: Skip handling the Datasource. - - Otherwise: - - If namespace isn't specified in `VolumeSnapshotLink`: use the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` as a SnapshotSource to pass to the CSI driver for provision. - - Otherwise: - - If the access to the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` is allowed by any `ReferenceGrant`s: Use the `VolumeSnapshot` as a SnapshotSource to pass to the CSI driver for provision. - - Otherwise: Do nothing (the `ReferenceGrant` check above will be repeated in the loop). +2. Allow both `DataSourceRef` field and `DataSourceRef2` fields to coexist, but only allow specifying either of them or not specifying at all -The above implementation is just separating the logics in approach (a) to two components, and it won't help improve efficiency nor simplify implementations. -Therefore, the description in this section is just for discussion purpose and won't be implemented. +In this case, `DataSourceRef2` is used for cross-namespace provisioning purpose and `DataSourceRef` is used for existing provisioing purpose. -A PoC implementation for this approach, forking existing provisioner and modify it to only handle `VolumeSnapshotLink`, can be found [here](https://github.com/mkimuram/external-provisioner/commits/separate-controller). -Note that just to separate the containers for normal provision and provision from `VolumeSnapshotLink`, we don't need to fork the codes, instead we can use a command line option. -Fork is only needed if we need to keep the existing CSI external provisioner codes separated from this feature. + 1. If DataSource and DataSourceRef are both set; reject -In addition, there may be an alternative way to populate to an already provisioned volume via CSI call from populator, if CSI spec is modified to allow populating an already provisioned volume. +For this case, `DataSourceRef2` could be renamed like `CrossNamespaceDataSourceRef`. ### Test Plan @@ -600,8 +523,10 @@ https://storage.googleapis.com/k8s-triage/index.html We expect no non-infra related flakes in the last month as a GA graduation criteria. --> -- Verify that PV is provisioned from VS in other namespace and bound to PVC if allowed by ReferenceGrant: -- Verify that PV isn't provisioned from VS in other namespace and isn't bound to PVC if not allowed by ReferenceGrant: +- Verify that PV is provisioned from VS in other NS if specified through datasourceRef2 and allowed by ReferenceGrant: +- Verify that PV isn't provisioned from VS in other NS if specified through datasourceRef2 and not allowed by ReferenceGrant: +- Verify that PV is provisioned from VS in the same NS if specified through datasourceRef2 with empty namspace and not allowed by ReferenceGrant: +- Verify that PV isn't provisioned from VS in the same NS if specified through datasourceRef2 with non-empty namspace and not allowed by ReferenceGrant: ### Graduation Criteria @@ -635,12 +560,13 @@ Below are some examples to consider, in addition to the aforementioned [maturity #### Alpha -- Feature implemented behind a non-default command line flag of CSI external-provisioner +- Feature implemented behind a feature gate - Initial e2e tests completed and enabled #### Beta - Gather feedback from developers and surveys +- Add scale tests - Additional tests are in Testgrid and linked in KEP #### GA @@ -671,15 +597,13 @@ enhancement: --> - Upgrade: - - Method: Do both of the following operations: - - Specify `--cross-namespace-snapshot=true` command line flag of CSI external-provisioner - - Create `VolumePopulator` CRD to allow populating from `VolumeSnapshotLink` CRD + - Method: + - Enable CrossNamespaceSourceProvisioning feature gate. - Behavior: - Provisioning volumes from snapshots in other namespaces is enabled. - Downgrade: - - Method: Do both of the following operations: - - Specify `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner - - Delete `VolumePopulator` CRD to deny populating from `VolumeSnapshotLink` CRD + - Method: + - Disable CrossNamespaceSourceProvisioning feature gate. - Behavior: - Provisioning volumes from snapshots in other namespaces is disabled. @@ -740,12 +664,9 @@ well as the [existing list] of feature gates. [existing list]: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/ --> -- [x] Other - - Describe the mechanism: By specifying `--cross-namespace-snapshot=true` command line flag of CSI external-provisioner and creating `VolumePopulator` CR to allow populating from `VolumeSnapshotLink` CR. - - Will enabling / disabling the feature require downtime of the control - plane? No, it won't require downtime of the entire control plane. However, it will require a downtime of each provisioner whose enablement is being changed. - - Will enabling / disabling the feature require downtime or reprovisioning - of a node? (Do not assume `Dynamic Kubelet Config` feature is enabled). No. +- [x] Feature gate (also fill in values in `kep.yaml`) + - Feature gate name: CrossNamespaceSourceProvisioning + - Components depending on the feature gate: kube-apiserver, csi-provisioner ###### Does enabling the feature change any default behavior? @@ -755,7 +676,7 @@ automations, so be extremely careful here. --> No. Existing behavior of provisioning volumes from PVC and `VolumeSnapshot` remains unchanged. -By enabling the feature, `VolumeSnapshotLink` CR can be used as a `DataSourceRef` for PVC, in addition to the existing data sources. +By enabling the feature, volumes can be provisioned from snapshots in another namespace. ###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)? @@ -770,17 +691,16 @@ feature. NOTE: Also set `disable-supported` to `true` or `false` in `kep.yaml`. --> -Yes, by specifying `--cross-namespace-snapshot=false` command line flag of CSI external-provisioner and deleting `VolumePopulator` CR to deny populating from `VolumeSnapshotLink` CR. +Yes, by diabling CrossNamespaceSourceProvisioning feature gate. -- After `--cross-namespace-snapshot=false` command line flag is specified: CSI external-provisioner for the csi driver doesn't provision volume from `VolumeSnapshotLink` CR. -- After `VolumePopulator` CR for `VolumeSnapshotLink` CR is deleted: volume-data-source-validator adds an event to PVC that `VolumeSnapshotLink`isn't a valid data source. +After the feature is disabled, provisioing volumes from snapshots in another namespace fails. ###### What happens if we reenable the feature if it was previously rolled back? -`VolumeSnapshotLink` CR can be used as a `DataSourceRef` for PVC, again. +Volumes can be provisioned from snapshots in another namespace, again. -PVCs with `VolumeSnapshotLink` data source may be created while the feature is disabled (and volumes for the PVCs aren't provisioned). -After the feature is reenabled, volumes for the PVCs will be provisioned retrospectively. +Volumes with snapshots in another namespace may be created while the feature is disabled (and such volumes aren't provisioned). +After the feature is reenabled, volumes will be provisioned retrospectively. ###### Are there any tests for feature enablement/disablement? @@ -898,9 +818,11 @@ Pick one more of these and delete the rest. --> - [x] Metrics - - Metric name: TBD (Need to discuss if existing metrics for "claims" queue is sufficient). + - Metric name: + - `cross_namespace_persistentvolumeclaim_provision_total`: Total number of persistent volumes provisioned from cross namespace data source successfully. Broken down by storage class name. + - `cross_namespace_persistentvolumeclaim_provision_failed_total`: Total number of persistent volume provision from cross namespace data source failed attempts. Broken down by storage class name. - [Optional] Aggregation method: prometheus - - Components exposing the metric: CSI external-provisioner for each CSI plugin + - Components exposing the metric: CSI external-provisioner ###### Are there any missing metrics that would be useful to have to improve observability of this feature? @@ -937,7 +859,6 @@ and creating new ones, as well as about cluster-level services (e.g. DNS): - Features: - CSI feature ([GA](https://kubernetes.io/blog/2019/01/15/container-storage-interface-ga/) in v1.13) - - AnyVolumeDataSource feature ([Beta](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#api-change) in v1.24) - Services: - CSI external provisioner and CSI plugins @@ -975,12 +896,9 @@ Focusing mostly on: - periodic API calls to reconcile state (e.g. periodic fetching state, heartbeats, leader election, etc.) --> -- GET `VolumeSnapshotLink` API call: - - originating component(s): CSI external-provisioner - - this API call is triggered once in each [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshotLink` is referenced from PVC. -- GET(LIST) `ReferenceGrant` API call: +- GET `ReferenceGrant` API call: - originating component(s): CSI external-provisioner - - this API call is triggered once in each [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshotLink` is referenced from PVC. + - this API call is triggered once after matching `ReferenceGrant` is found in informer cache on [`Provision` call](https://github.com/kubernetes-csi/external-provisioner/blob/master/pkg/controller/controller.go#L719) in CSI external-provisioner when `VolumeSnapshot` in another namespace is referenced from PVC. ###### Will enabling / using this feature result in introducing new API types? @@ -1108,20 +1026,216 @@ not need to be as detailed as the proposal, but should include enough information to express the idea and why it was not acceptable. --> -- Implement transfer feature for `VolumeSnapshot`: - - Pros: - - Can have more control over the transferred `VolumeSnapshot`, like modifying and deleting - - Can potentially be used to directly clone the `VolumeSnapshot` to other namespaces for [backup use case](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-957693334) - - Cons: - - Need to handle [race conditions](https://github.com/kubernetes/enhancements/pull/2849#discussion_r682057570) - - Need to consider [referenced Secrets](https://github.com/kubernetes/enhancements/pull/2849#discussion_r692459041) from snapshot after transferred - -- Change `PersistentVolumeClaimSpec` in core API to take namespace as a data source, instead of introducing a new `VolumeSnapshotLink` API: - - Pros: - - Can provide a simple API, which will provide a better UX - - Cons: - - Requires a core API change - - Needs to handle compatibility issues +### Implement transfer feature for `VolumeSnapshot`: +- Pros: + - Can have more control over the transferred `VolumeSnapshot`, like modifying and deleting + - Can potentially be used to directly clone the `VolumeSnapshot` to other namespaces for [backup use case](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-957693334) +- Cons: + - Need to handle [race conditions](https://github.com/kubernetes/enhancements/pull/2849#discussion_r682057570) + - Need to consider [referenced Secrets](https://github.com/kubernetes/enhancements/pull/2849#discussion_r692459041) from snapshot after transferred + +### Introducing a new `VolumeSnapshotLink` API instead of changing `PersistentVolumeClaimSpec` in core API to take namespace as a data source: +- Pros: + - Doesn't require a core API change + - Doesn't need to handle compatibility issues +- Cons: + - Provide a complex API with an extra object, which will provide a better UX + - Conflict on installing `VolumePopulator` CR for `VolumeSnapshotLink` across CSI drivers + +#### Example flow of how `VolumeSnapshotLink` API works + +Let's use [Story 1](#story-1) as an example and let's assume the following: +- There are two namespaces, prod and test, +- Alice manages the prod namespace and Bob manages the test namespace, +- Alice would like to allow `VolumeSnapshot` foo-backup in the prod namespace to be consumed in the test namespace for testing, +- Bob would like to create a PV for PVC foo-testing in the test namespace from the `VolumeSnapshot` foo-backup in the prod namespace. + +Once this proposal is implemented, it can be achieved by doing the following steps: + +1. In the prod namespace, Alice creates a `ReferenceGrant` bar that allows referencing to the `VolumeSnapshot` foo-backup in the prod namespace from any `VolumeSnapshotLinks` in the test namespace, + ```yaml + apiVersion: gateway.networking.k8s.io/v1alpha2 + kind: ReferenceGrant + metadata: + name: bar + namespace: prod + spec: + from: + - group: snapshot.storage.k8s.io/v1alpha1 + kind: VolumeSnapshotLink + namespace: test + to: + - group: snapshot.storage.k8s.io/v1 + kind: VolumeSnapshot + name: foo-backup + ``` +2. In the test namespace, Bob creates a `VolumeSnapshotLink` foo-link that references the `VolumeSnapshot` foo-backup in the prod namespace as a source, + ```yaml + apiVersion: snapshot.storage.k8s.io/v1alpha1 + kind: VolumeSnapshotLink + metadata: + name: foo-link + namespace: test + spec: + source: + name: foo-backup + namespace: prod + ``` +3. In the test namespace, Bob creates a `PersistentVolumeClaim` foo-testing that references the `VolumeSnapshotLink` foo-link as a data source, + ```yaml + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: foo-testing + namespace: test + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Mi + dataSourceRef: + apiGroup: snapshot.storage.k8s.io/v1alpha1 + kind: VolumeSnapshotLink + name: foo-link + volumeMode: Filesystem + ``` +4. Once the populator finds a `VolumeSnapshotLink` is specified as `dataSourceRef`, it checks all `ReferenceGrants` in `VolumeSnapshotLink.spec.source.namespace` to see if populating the `VolumeSnapshotLink.spec.source` is allowed. If it is allowed, the populator populates the volume. + Note that how `ReferenceGrant` is checked depends on the implementation, however controllers that are trying to use the `VolumeSnapshot` in another namespace must check `ReferenceGrant` if the access is allowed before it actually starts exposing the data and metadata from the `VolumeSnapshot` to the `VolumeSnapshotLink`'s namespace. + +#### `VolumeSnapshotLink` API + +A new `VolumeSnapshotLink` CRD is introduced in `snapshot.storage.k8s.io` API group: + +```golang +type VolumeSnapshotLink struct { + metav1.TypeMeta `json:",inline"` + // +optional + metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` + + Spec VolumeSnapshotLinkSpec `json:"spec" protobuf:"bytes,2,opt,name=spec"` +} + +type VolumeSnapshotLinkList struct { + metav1.TypeMeta `json:",inline"` + // +optional + metav1.ListMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` + + Items []VolumeSnapshotLink `json:"items" protobuf:"bytes,2,rep,name=items"` +} + +// VolumeSnapshotLinkSpec describes attributes of a volume snapshot link. +type VolumeSnapshotLinkSpec struct { + // This field is immutable after creation. + Source VolumeSnapshotLinkSource `json:"source" protobuf:"bytes,1,opt,name=source"` +} + +// VolumeSnapshotLinkSource specifies a reference to VolumeSnapshot. +type VolumeSnapshotLinkSource struct { + Name string `json:"name" protobuf:"bytes,1,opt,name=name"` + // Namespace is the namespace of the snapshot. When unspecified, the local namespace is inferred. + // Note that when a namespace is specified, a ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + // +optional + Namespace string `json:"namespace" protobuf:"bytes,2,opt,name=namespace"` +} +``` + +Note that when a `VolumeSnapshotLink.spec.source.namespace` is specified, a `ReferenceGrant` object is required in the referent namespace to allow that namespace’s owner to accept the reference. See the `ReferenceGrant` documentation for details. + +#### Populator implementation + +The populator logic can be implemented either [(a) inside the existing CSI external-provisioner](#a-inside-the-existing-csi-external-provisioner) or [(b) as a separate populator](#b-as-a-separate-populator). +Cluster admins can choose which implementation to be used per CSI driver basis. +As a reference implementation, only (a) will be implemented in the community. + +Regardless of the implementation, +- `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD must exist in the cluster before the populator is deployed. +- `VolumePopulator` CR to allow populating from `VolumeSnapshotLink` CRD needs to be created to enable this feature, as AnyVolumeDataSource feature defines. The `VolumePopulator` CR needed for this feature will be as follows: +```yaml +kind: VolumePopulator +apiVersion: populator.storage.k8s.io/v1beta1 +metadata: + name: volumesnapshotlink +sourceKind: + group: snapshot.storage.k8s.io + kind: VolumeSnapshotLink +``` + +##### (a) inside the existing CSI external-provisioner + +Once populator is implemented inside the existing CSI external-provisioner, the CSI external provisioner: +1. Handles `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD, +2. Datasource is handled in the below reconciler loop: + - If `VolumeSnapshotLink` isn't specified as `DataSourceRef`: Do the existing provisioner behavior. + - Otherwise: + - If namespace isn't specified in `VolumeSnapshotLink`: use the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` as a SnapshotSource to pass to the CSI driver for provision. + - Otherwise: + - If the access to the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` is allowed by any `ReferenceGrant`s: Use the `VolumeSnapshot` as a SnapshotSource to pass to the CSI driver for provision. + - Otherwise: Do nothing (the `ReferenceGrant` check above will be repeated in the loop). + +To enable this feature in CSI external provisioner, `--cross-namespace-snapshot=true` +command line flag needs to be passed to the provisioner for each CSI plugin. + +The [initial PoC implementation](https://github.com/kubernetes/enhancements/pull/2849#issuecomment-958208039) uses this approach, but it doesn't implements the command line flag to enable/disable this feature. + +##### (b) as a separate populator + +There will be two approaches to implement as a separate populator: +- [(1) Populate data from snapshot to provisioned PV](#1-populate-data-from-snapshot-to-provisioned-pv) +- [(2) Provision PV with data via CSI call](#2-provision-pv-with-data-via-csi-call) + +###### (1) Populate data from snapshot to provisioned PV + +This is a straightforward implementation that AnyVolumeDataSource feature defines. +Developers will be able to utilize lib-volume-populator to implement this way. +One of the challenges to achieve it will be how to copy the data from a snapshot in one namespace to an already provisioned PV that will need to be bound to a PVC in the other namespace. + +A naive implementation will be: +1. Create another PV from the snapshot in the snapshot's namespace, +2. Copy the data from the PV to somewhere accessible from any namespaces, +3. Copy the data in step 2 to the originally intended PV, +4. Delete the temporary data in step 1 and step 2. + +If the naive implementation is used, unintended transient states, for example a temporary PVC in the snapshot namespace, may be visible to users. +Also, there may be performance issues depending on where and how data is copied. + +On the other hand, although it completely depends on the implementation, this approach can have advantages, like the ability to populate volumes from snapshot across different CSI drivers or the ability to efficiently copy data by using CSI driver specific way. + +There will be no generic way to implement by using this approach, because the implementations rely too much on backup tools or CSI drivers. +Therefore, no community implementation of this approach will be provided. + +Note that a PoC implementation for this approach can be found [here](https://github.com/kubernetes-csi/lib-volume-populator/pull/31). It works only for csi-hostpath driver and is intended to be just for discussion purpose. + +###### (2) Provision PV with data via CSI call + +Current CSI external provisioners provision volume regardless of the data source, therefore populators need to populate data to already provisioned PVs. +However, this behavior may be changed and provisioners may offload provisioning to populators for PV with `VolumeSnapshotLink` CRD data source. + +The implementation of provisioner and populator of this approach will be as follows: + +- Provisioner: + 1. Handles `VolumeSnapshotLink` CRD, + 2. Checks if `VolumeSnapshotLink` is specified as `DataSourceRef`: + - If specified, skip provisioning the volume + +- Populator: + 1. Handles `VolumeSnapshotLink` CRD and `ReferenceGrant` CRD, + 2. Datasource is handled in the below reconciler loop: + - If `VolumeSnapshotLink` isn't specified as `DataSourceRef`: Skip handling the Datasource. + - Otherwise: + - If namespace isn't specified in `VolumeSnapshotLink`: use the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` as a SnapshotSource to pass to the CSI driver for provision. + - Otherwise: + - If the access to the `VolumeSnapshot` referenced by the `VolumeSnapshotLink` is allowed by any `ReferenceGrant`s: Use the `VolumeSnapshot` as a SnapshotSource to pass to the CSI driver for provision. + - Otherwise: Do nothing (the `ReferenceGrant` check above will be repeated in the loop). + +The above implementation is just separating the logics in approach (a) to two components, and it won't help improve efficiency nor simplify implementations. +Therefore, the description in this section is just for discussion purpose and won't be implemented. + +A PoC implementation for this approach, forking existing provisioner and modify it to only handle `VolumeSnapshotLink`, can be found [here](https://github.com/mkimuram/external-provisioner/commits/separate-controller). +Note that just to separate the containers for normal provision and provision from `VolumeSnapshotLink`, we don't need to fork the codes, instead we can use a command line option. +Fork is only needed if we need to keep the existing CSI external provisioner codes separated from this feature. + +In addition, there may be an alternative way to populate to an already provisioned volume via CSI call from populator, if CSI spec is modified to allow populating an already provisioned volume. ## Infrastructure Needed (Optional) diff --git a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml index c5dcd1b7408..92c83c4619c 100644 --- a/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml +++ b/keps/sig-storage/3294-provision-volumes-from-cross-namespace-snapshots/kep.yaml @@ -3,9 +3,11 @@ kep-number: 3294 authors: - "@Elbehery" - "@mkimuram" + - "@ttakahashi21" owning-sig: sig-storage participating-sigs: - sig-network + - sig-auth status: implementable creation-date: 2022-04-06 reviewers: @@ -44,11 +46,11 @@ milestone: # The following PRR answers are required at alpha release # List the feature gate name and the components for which it must be enabled -#feature-gates: -# - name: MyFeature -# components: -# - kube-apiserver -# - kube-controller-manager +feature-gates: + - name: CrossNamespaceSourceProvisioning + components: + - kube-apiserver + - csi-external-provisioner disable-supported: true # The following PRR answers are required at beta release