Skip to content

Commit

Permalink
local pv provisioner for public cloud (#406)
Browse files Browse the repository at this point in the history
  • Loading branch information
MegaByte875 authored Dec 1, 2023
1 parent ffcc8b4 commit 4cbc4a2
Show file tree
Hide file tree
Showing 13 changed files with 1,135 additions and 3 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ build: ## Build binary.
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/controller-manager cmd/controller-manager/main.go
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/autoscaler cmd/autoscaler/main.go
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/scheduler cmd/scheduler/main.go
$(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/provisioner cmd/provisioner/main.go

helm-charts: ## Build helm charts.
helm package charts/nebula-operator --version $(CHARTS_VERSION) --app-version $(CHARTS_VERSION)
Expand Down
4 changes: 2 additions & 2 deletions alpine.multiarch
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
FROM alpine:3.18.2

ARG TARGETPLATFORM
ARG TARGETARCH

RUN apk update \
&& apk upgrade \
&& apk add --no-cache \
curl jq \
curl jq util-linux bash xfsprogs \
&& rm -rf /var/cache/apk/*
88 changes: 88 additions & 0 deletions cmd/provisioner/app/options/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
Copyright 2023 Vesoft Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package options

import (
"time"

"github.com/spf13/pflag"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
pvController "sigs.k8s.io/sig-storage-lib-external-provisioner/v9/controller"
)

const (
defaultProvisioner = "nebula-cloud.io/local-pv"
defaultHelperImage = "vesoft/nebula-alpine:latest"
defaultServiceAccount = "local-pv-provisioner-sa"
defaultConfigMap = "local-pv-config"

defaultResyncPeriod = 5 * time.Minute
defaultThreadiness = pvController.DefaultThreadiness
defaultFailedProvisionThreshold = pvController.DefaultFailedProvisionThreshold
defaultFailedDeleteThreshold = pvController.DefaultFailedDeleteThreshold
)

type Options struct {
// leaderElect enables a leader election client to gain leadership
// before executing the main loop
LeaderElect bool

// The name of the provisioner for which this controller dynamically
// provisions volumes
Provisioner string

// The helper image used for managing mount point on the host
HelperImage string

// Image pull secret used for pulling helper image
ImagePullSecret string

// The name of the provisioner service account
ServiceAccount string

// The name of the provisioner configmap
ConfigMap string

// The number of claim and volume workers each to launch
Threadiness int

// The threshold for max number of retries on failures of volume Provision
FailedProvisionThreshold int

// The threshold for max number of retries on failures of volume Delete
FailedDeleteThreshold int

// Period at which the controller forces the repopulation of its local object stores
ResyncPeriod metav1.Duration
}

func NewOptions() *Options {
return &Options{}
}

func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.LeaderElect, "leader-elect", false, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.StringVar(&o.Provisioner, "provisioner-name", defaultProvisioner, "The name of the provisioner for which this controller dynamically provisions volumes.")
flags.StringVar(&o.HelperImage, "helper-image", defaultHelperImage, "The helper image used for managing mount point on the host.")
flags.StringVar(&o.ImagePullSecret, "image-pull-secret", "", "Image pull secret used for pulling helper image.")
flags.StringVar(&o.ServiceAccount, "service-account", defaultServiceAccount, "The name of the provisioner service account.")
flags.StringVar(&o.ConfigMap, "configmap", defaultConfigMap, "The name of the provisioner configmap.")
flags.IntVar(&o.Threadiness, "threadiness", defaultThreadiness, "The number of claim and volume workers each to launch.")
flags.IntVar(&o.FailedProvisionThreshold, "failed-provision-threshold", defaultFailedProvisionThreshold, "The threshold for max number of retries on failures of volume Provision.")
flags.IntVar(&o.FailedDeleteThreshold, "failed-delete-threshold", defaultFailedDeleteThreshold, "The threshold for max number of retries on failures of volume Delete.")
flags.DurationVar(&o.ResyncPeriod.Duration, "resync-period", defaultResyncPeriod, "Period at which the controller forces the repopulation of its local object stores.")
}
117 changes: 117 additions & 0 deletions cmd/provisioner/app/provisioner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
Copyright 2023 Vesoft Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package app

import (
"context"
"flag"
"os"
"time"

"github.com/spf13/cobra"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
cliflag "k8s.io/component-base/cli/flag"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client/config"
pvController "sigs.k8s.io/sig-storage-lib-external-provisioner/v9/controller"

"github.com/vesoft-inc/nebula-operator/cmd/provisioner/app/options"
"github.com/vesoft-inc/nebula-operator/pkg/controller/provisioner"
klogflag "github.com/vesoft-inc/nebula-operator/pkg/flag/klog"
)

const (
defaultProvisionerNamespace = "local-pv-storage"
)

// NewProvisionerCommand creates a *cobra.Command object with default parameters
func NewProvisionerCommand(ctx context.Context) *cobra.Command {
opts := options.NewOptions()

cmd := &cobra.Command{
Use: "local-pv-provisioner",
RunE: func(cmd *cobra.Command, args []string) error {
return Run(ctx, opts)
},
}

nfs := cliflag.NamedFlagSets{}
fs := nfs.FlagSet("generic")
fs.AddGoFlagSet(flag.CommandLine)
opts.AddFlags(fs)

logsFlagSet := nfs.FlagSet("logs")
klogflag.Add(logsFlagSet)

cmd.Flags().AddFlagSet(fs)
cmd.Flags().AddFlagSet(logsFlagSet)

return cmd
}

// Run runs the provisioner with options. This should never exit.
func Run(ctx context.Context, opts *options.Options) error {
c, err := config.GetConfig()
if err != nil {
return err
}
kubeClient, err := clientset.NewForConfig(c)
if err != nil {
return err
}

cacheConfig := &provisioner.CacheConfig{
ProvisionerName: opts.Provisioner,
Cache: provisioner.NewVolumeCache(),
InformerFactory: informers.NewSharedInformerFactory(kubeClient, time.Second*5),
}
provisioner.NewPopulator(cacheConfig)

// Start informers after all event listeners are registered.
cacheConfig.InformerFactory.Start(ctx.Done())
// Wait for all started informers' cache were synced.
for v, synced := range cacheConfig.InformerFactory.WaitForCacheSync(ctx.Done()) {
if !synced {
klog.Fatalf("Error syncing informer for %v", v)
}
}

namespace := os.Getenv("POD_NAMESPACE")
if namespace == "" {
namespace = defaultProvisionerNamespace
}
p := provisioner.NewProvisioner(ctx, kubeClient, namespace, opts.HelperImage, opts.ConfigMap, opts.ServiceAccount)
p.CacheConfig = cacheConfig
if opts.ImagePullSecret != "" {
p.SetImagePullSecret(opts.ImagePullSecret)
}
pc := pvController.NewProvisionController(
kubeClient,
opts.Provisioner,
p,
pvController.LeaderElection(opts.LeaderElect),
pvController.FailedProvisionThreshold(opts.FailedProvisionThreshold),
pvController.FailedDeleteThreshold(opts.FailedDeleteThreshold),
pvController.Threadiness(opts.Threadiness),
)
klog.Info("local pv provisioner started")
pc.Run(ctx)
klog.Info("local pv provisioner stopped")

return nil
}
33 changes: 33 additions & 0 deletions cmd/provisioner/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
Copyright 2023 Vesoft Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"os"

"k8s.io/component-base/cli"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/vesoft-inc/nebula-operator/cmd/provisioner/app"
)

func main() {
ctx := ctrl.SetupSignalHandler()
cmd := app.NewProvisionerCommand(ctx)
code := cli.Run(cmd)
os.Exit(code)
}
60 changes: 60 additions & 0 deletions config/samples/gke-daemonset-raid-disks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: gke-raid-disks
namespace: default
labels:
k8s-app: gke-raid-disks
spec:
selector:
matchLabels:
name: gke-raid-disks
template:
metadata:
labels:
name: gke-raid-disks
spec:
nodeSelector:
cloud.google.com/gke-local-nvme-ssd: "true"
hostPID: true
containers:
- name: startup-script
image: gcr.io/google-containers/startup-script:v1
securityContext:
privileged: true
env:
- name: STARTUP_SCRIPT
value: |
set -o errexit
set -o nounset
set -o pipefail
devices=()
for ssd in /dev/disk/by-id/google-local-ssd-block*; do
if [ -e "${ssd}" ]; then
devices+=("${ssd}")
fi
done
if [ "${#devices[@]}" -eq 0 ]; then
echo "No Local NVMe SSD disks found."
exit 0
fi
seen_arrays=(/dev/md/*)
device=${seen_arrays[0]}
echo "Setting RAID array with Local SSDs on device ${device}"
if [ ! -e "$device" ]; then
device="/dev/md/0"
echo "y" | mdadm --create "${device}" --level=0 --force --raid-devices=${#devices[@]} "${devices[@]}"
fi
if ! tune2fs -l "${device}" ; then
echo "Formatting '${device}'"
mkfs.ext4 -F "${device}"
fi
mountpoint=/mnt/disks/raid0
mkdir -p "${mountpoint}"
echo "Mounting '${device}' at '${mountpoint}'"
mount -o discard,defaults "${device}" "${mountpoint}"
chmod a+w "${mountpoint}"
Loading

0 comments on commit 4cbc4a2

Please sign in to comment.