Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

local pv provisioner for public cloud #406

Merged
merged 1 commit into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ build: ## Build binary.
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/controller-manager cmd/controller-manager/main.go
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/autoscaler cmd/autoscaler/main.go
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/scheduler cmd/scheduler/main.go
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/provisioner cmd/provisioner/main.go

helm-charts: ## Build helm charts.
helm package charts/nebula-operator --version $(CHARTS_VERSION) --app-version $(CHARTS_VERSION)
Expand Down
4 changes: 2 additions & 2 deletions alpine.multiarch
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
FROM alpine:3.18.2

ARG TARGETPLATFORM
ARG TARGETARCH

RUN apk update \
&& apk upgrade \
&& apk add --no-cache \
curl jq \
curl jq util-linux bash xfsprogs \
&& rm -rf /var/cache/apk/*
88 changes: 88 additions & 0 deletions cmd/provisioner/app/options/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
Copyright 2023 Vesoft Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package options

import (
"time"

"github.com/spf13/pflag"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
pvController "sigs.k8s.io/sig-storage-lib-external-provisioner/v9/controller"
)

const (
defaultProvisioner = "nebula-cloud.io/local-pv"
defaultHelperImage = "vesoft/nebula-alpine:latest"
defaultServiceAccount = "local-pv-provisioner-sa"
defaultConfigMap = "local-pv-config"

defaultResyncPeriod = 5 * time.Minute
defaultThreadiness = pvController.DefaultThreadiness
defaultFailedProvisionThreshold = pvController.DefaultFailedProvisionThreshold
defaultFailedDeleteThreshold = pvController.DefaultFailedDeleteThreshold
)

type Options struct {
// leaderElect enables a leader election client to gain leadership
// before executing the main loop
LeaderElect bool

// The name of the provisioner for which this controller dynamically
// provisions volumes
Provisioner string

// The helper image used for managing mount point on the host
HelperImage string

// Image pull secret used for pulling helper image
ImagePullSecret string

// The name of the provisioner service account
ServiceAccount string

// The name of the provisioner configmap
ConfigMap string

// The number of claim and volume workers each to launch
Threadiness int

// The threshold for max number of retries on failures of volume Provision
FailedProvisionThreshold int

// The threshold for max number of retries on failures of volume Delete
FailedDeleteThreshold int

// Period at which the controller forces the repopulation of its local object stores
ResyncPeriod metav1.Duration
}

func NewOptions() *Options {
return &Options{}
}

func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.LeaderElect, "leader-elect", false, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.StringVar(&o.Provisioner, "provisioner-name", defaultProvisioner, "The name of the provisioner for which this controller dynamically provisions volumes.")
flags.StringVar(&o.HelperImage, "helper-image", defaultHelperImage, "The helper image used for managing mount point on the host.")
flags.StringVar(&o.ImagePullSecret, "image-pull-secret", "", "Image pull secret used for pulling helper image.")
flags.StringVar(&o.ServiceAccount, "service-account", defaultServiceAccount, "The name of the provisioner service account.")
flags.StringVar(&o.ConfigMap, "configmap", defaultConfigMap, "The name of the provisioner configmap.")
flags.IntVar(&o.Threadiness, "threadiness", defaultThreadiness, "The number of claim and volume workers each to launch.")
flags.IntVar(&o.FailedProvisionThreshold, "failed-provision-threshold", defaultFailedProvisionThreshold, "The threshold for max number of retries on failures of volume Provision.")
flags.IntVar(&o.FailedDeleteThreshold, "failed-delete-threshold", defaultFailedDeleteThreshold, "The threshold for max number of retries on failures of volume Delete.")
flags.DurationVar(&o.ResyncPeriod.Duration, "resync-period", defaultResyncPeriod, "Period at which the controller forces the repopulation of its local object stores.")
}
117 changes: 117 additions & 0 deletions cmd/provisioner/app/provisioner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
Copyright 2023 Vesoft Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package app

import (
"context"
"flag"
"os"
"time"

"github.com/spf13/cobra"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
cliflag "k8s.io/component-base/cli/flag"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client/config"
pvController "sigs.k8s.io/sig-storage-lib-external-provisioner/v9/controller"

"github.com/vesoft-inc/nebula-operator/cmd/provisioner/app/options"
"github.com/vesoft-inc/nebula-operator/pkg/controller/provisioner"
klogflag "github.com/vesoft-inc/nebula-operator/pkg/flag/klog"
)

const (
defaultProvisionerNamespace = "local-pv-storage"
)

// NewProvisionerCommand creates a *cobra.Command object with default parameters
func NewProvisionerCommand(ctx context.Context) *cobra.Command {
opts := options.NewOptions()

cmd := &cobra.Command{
Use: "local-pv-provisioner",
RunE: func(cmd *cobra.Command, args []string) error {
return Run(ctx, opts)
},
}

nfs := cliflag.NamedFlagSets{}
fs := nfs.FlagSet("generic")
fs.AddGoFlagSet(flag.CommandLine)
opts.AddFlags(fs)

logsFlagSet := nfs.FlagSet("logs")
klogflag.Add(logsFlagSet)

cmd.Flags().AddFlagSet(fs)
cmd.Flags().AddFlagSet(logsFlagSet)

return cmd
}

// Run runs the provisioner with options. This should never exit.
func Run(ctx context.Context, opts *options.Options) error {
c, err := config.GetConfig()
if err != nil {
return err
}
kubeClient, err := clientset.NewForConfig(c)
if err != nil {
return err
}

cacheConfig := &provisioner.CacheConfig{
ProvisionerName: opts.Provisioner,
Cache: provisioner.NewVolumeCache(),
InformerFactory: informers.NewSharedInformerFactory(kubeClient, time.Second*5),
}
provisioner.NewPopulator(cacheConfig)

// Start informers after all event listeners are registered.
cacheConfig.InformerFactory.Start(ctx.Done())
// Wait for all started informers' cache were synced.
for v, synced := range cacheConfig.InformerFactory.WaitForCacheSync(ctx.Done()) {
if !synced {
klog.Fatalf("Error syncing informer for %v", v)
}
}

namespace := os.Getenv("POD_NAMESPACE")
if namespace == "" {
namespace = defaultProvisionerNamespace
}
p := provisioner.NewProvisioner(ctx, kubeClient, namespace, opts.HelperImage, opts.ConfigMap, opts.ServiceAccount)
p.CacheConfig = cacheConfig
if opts.ImagePullSecret != "" {
p.SetImagePullSecret(opts.ImagePullSecret)
}
pc := pvController.NewProvisionController(
kubeClient,
opts.Provisioner,
p,
pvController.LeaderElection(opts.LeaderElect),
pvController.FailedProvisionThreshold(opts.FailedProvisionThreshold),
pvController.FailedDeleteThreshold(opts.FailedDeleteThreshold),
pvController.Threadiness(opts.Threadiness),
)
klog.Info("local pv provisioner started")
pc.Run(ctx)
klog.Info("local pv provisioner stopped")

return nil
}
33 changes: 33 additions & 0 deletions cmd/provisioner/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
Copyright 2023 Vesoft Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"os"

"k8s.io/component-base/cli"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/vesoft-inc/nebula-operator/cmd/provisioner/app"
)

func main() {
ctx := ctrl.SetupSignalHandler()
cmd := app.NewProvisionerCommand(ctx)
code := cli.Run(cmd)
os.Exit(code)
}
60 changes: 60 additions & 0 deletions config/samples/gke-daemonset-raid-disks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: gke-raid-disks
namespace: default
labels:
k8s-app: gke-raid-disks
spec:
selector:
matchLabels:
name: gke-raid-disks
template:
metadata:
labels:
name: gke-raid-disks
spec:
nodeSelector:
cloud.google.com/gke-local-nvme-ssd: "true"
hostPID: true
containers:
- name: startup-script
image: gcr.io/google-containers/startup-script:v1
securityContext:
privileged: true
env:
- name: STARTUP_SCRIPT
value: |
set -o errexit
set -o nounset
set -o pipefail

devices=()
for ssd in /dev/disk/by-id/google-local-ssd-block*; do
if [ -e "${ssd}" ]; then
devices+=("${ssd}")
fi
done
if [ "${#devices[@]}" -eq 0 ]; then
echo "No Local NVMe SSD disks found."
exit 0
fi

seen_arrays=(/dev/md/*)
device=${seen_arrays[0]}
echo "Setting RAID array with Local SSDs on device ${device}"
if [ ! -e "$device" ]; then
device="/dev/md/0"
echo "y" | mdadm --create "${device}" --level=0 --force --raid-devices=${#devices[@]} "${devices[@]}"
fi

if ! tune2fs -l "${device}" ; then
echo "Formatting '${device}'"
mkfs.ext4 -F "${device}"
fi

mountpoint=/mnt/disks/raid0
mkdir -p "${mountpoint}"
echo "Mounting '${device}' at '${mountpoint}'"
mount -o discard,defaults "${device}" "${mountpoint}"
chmod a+w "${mountpoint}"
Loading