Skip to content

Commit

Permalink
Merge pull request #6 from lvyanru8200/main
Browse files Browse the repository at this point in the history
add finetune-controller
  • Loading branch information
LronDC authored Feb 1, 2024
2 parents 89023c0 + 0a6eb84 commit f2a814d
Show file tree
Hide file tree
Showing 26 changed files with 3,730 additions and 447 deletions.
26 changes: 5 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,33 +1,17 @@
# Build the manager binary
FROM golang:1.19 as builder
ARG TARGETOS
ARG TARGETARCH
FROM golang:1.20 as builder

WORKDIR /workspace
# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download
COPY . .
RUN go mod tidy

# Copy the go source
COPY main.go main.go
COPY apis/ apis/
COPY controllers/ controllers/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager main.go
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o manager main.go

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot
FROM alpine:3
WORKDIR /
COPY --from=builder /workspace/manager .
USER 65532:65532

ENTRYPOINT ["/manager"]
117 changes: 90 additions & 27 deletions cmd/controller-manager/app/controller_manager.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,41 @@
package app

import (
"context"
"fmt"
"os"

"github.com/DataTunerX/utility-server/logging"

"github.com/DataTunerX/finetune-experiment-controller/cmd/controller-manager/app/options"
"github.com/DataTunerX/finetune-experiment-controller/internal/controller/finetune"
"github.com/DataTunerX/finetune-experiment-controller/pkg/util"
corev1beta1 "github.com/DataTunerX/meta-server/api/core/v1beta1"
extensionv1beta1 "github.com/DataTunerX/meta-server/api/extension/v1beta1"
finetunev1beta1 "github.com/DataTunerX/meta-server/api/finetune/v1beta1"
"github.com/DataTunerX/utility-server/logging"
"github.com/go-logr/zapr"
"github.com/operator-framework/operator-lib/leader"
"github.com/open-policy-agent/cert-controller/pkg/rotator"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/manager"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"
//+kubebuilder:scaffold:imports
)

const LockName = "finetune-experiment-controller-lock"
const (
LockName = "datatunerx-lock"
SecretName = "datatunerx-cert"
CaName = "datatunerx-ca"
CaOrganization = "datatunerx"
ServiceName = "finetune-experiment"
)

var (
scheme = runtime.NewScheme()
Expand All @@ -41,45 +51,88 @@ func init() {
}

func NewControllerManager() (manager.Manager, error) {
logging.ZLogger.Info("Start building controller manager")
opts := options.NewOptions()
flagSet := pflag.NewFlagSet("generic", pflag.ExitOnError)
opts.AddFlags(flagSet)
err := flagSet.Parse(os.Args[1:])
if err != nil {
logging.ZLogger.Errorf("Error parsing flags: %v", err)
return nil, err
os.Exit(1)
}
logging.ZLogger.Info("Set logger for controller")
ctrl.SetLogger(zapr.NewLogger(logging.ZLogger.GetLogger()))

namespace := util.GetOperatorNamespace()
ctrOption := ctrl.Options{
Scheme: scheme,
MetricsBindAddress: opts.MetricsAddr,
Port: 9443,
HealthProbeBindAddress: opts.ProbeAddr,
}

if opts.LeaderElectLifeConfig.EnableLeaderLifeElect {
err = leader.Become(context.TODO(), LockName)
if err != nil {
logging.ZLogger.Errorf("Failed to retry for leader lock: %v", err)
return nil, err
}
} else {
ctrOption.LeaderElection = false
ctrOption.LeaderElectionID = LockName
ctrOption.RetryPeriod = &opts.LeaderElectLeaseConfig.RetryPeriod
ctrOption.RenewDeadline = &opts.LeaderElectLeaseConfig.RenewDeadline
ctrOption.LeaseDuration = &opts.LeaderElectLeaseConfig.LeaseDuration
ctrOption.LeaderElectionNamespace = opts.LeaderElectLeaseConfig.LeaderElectionNamespace
Scheme: scheme,
Metrics: metricsserver.Options{
BindAddress: opts.MetricsAddr,
},
WebhookServer: webhook.NewServer(webhook.Options{Port: 9443}),
HealthProbeBindAddress: opts.ProbeAddr,
LeaderElection: true,
LeaderElectionID: LockName,
LeaderElectionNamespace: namespace,
}

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrOption)
if err != nil {
logging.ZLogger.Errorf("Build controller manager failed: %v", err)
return nil, err
}
setupFinished := make(chan struct{})
if opts.EnableCertRotator {
logging.ZLogger.Info("Setting up cert rotation")
if err := rotator.AddRotator(mgr, &rotator.CertRotator{
SecretKey: types.NamespacedName{
Namespace: namespace,
Name: SecretName,
},
CAName: CaName,
CAOrganization: CaOrganization,
CertDir: "/tmp/k8s-webhook-server/serving-certs",
DNSName: fmt.Sprintf("%s.%s.svc", ServiceName, namespace),
IsReady: setupFinished,
Webhooks: []rotator.WebhookInfo{
{
Name: namespace + "-validating-webhook-configuration",
Type: rotator.Validating,
},
{
Name: namespace + "-mutating-webhook-configuration",
Type: rotator.Mutating,
},
},
}); err != nil {
logging.ZLogger.Errorf("Unable to set up cert rotation, %v", err)
os.Exit(1)
}
} else {
close(setupFinished)
}
go func() {
<-setupFinished
if err := (&finetunev1beta1.FinetuneJob{}).SetupWebhookWithManager(mgr); err != nil {
logging.ZLogger.Errorf("Unable to create webhook, %v", err)
os.Exit(1)

}
if err := (&finetunev1beta1.FinetuneExperiment{}).SetupWebhookWithManager(mgr); err != nil {
logging.ZLogger.Errorf("Unable to create webhook, %v", err)
os.Exit(1)
}
if err := (&corev1beta1.LLM{}).SetupWebhookWithManager(mgr); err != nil {
logging.ZLogger.Errorf("Unable to create webhook, %v", err)
os.Exit(1)
}
if err := (&corev1beta1.Hyperparameter{}).SetupWebhookWithManager(mgr); err != nil {
logging.ZLogger.Errorf("Unable to create webhook, %v", err)
os.Exit(1)
}
if err := (&extensionv1beta1.Dataset{}).SetupWebhookWithManager(mgr); err != nil {
logging.ZLogger.Errorf("Unable to create webhook, %v", err)
os.Exit(1)
}
}()

if err = (&finetune.FinetuneExperimentReconciler{
Client: mgr.GetClient(),
Expand All @@ -97,6 +150,16 @@ func NewControllerManager() (manager.Manager, error) {
logging.ZLogger.Errorf("Unable to create FinetuneJob controller, %v", err)
return nil, err
}
if err = (&finetune.FinetuneReconciler{
Log: logging.ZLogger,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Clientset: kubernetes.NewForConfigOrDie(ctrl.GetConfigOrDie()),
Config: ctrl.GetConfigOrDie(),
}).SetupWithManager(mgr); err != nil {
logging.ZLogger.Errorf("Unable to create Finetune controller, %v", err)
return nil, err
}
//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
Expand Down
18 changes: 6 additions & 12 deletions cmd/controller-manager/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,24 @@ const (
defaultMetricsAddr = ":8080"
defaultProbeAddr = ":8081"
defaultNamespace = "datatunerx-dev"
defaultCertRotator = true
)

type Options struct {
LeaderElectLeaseConfig LeaderElectLeaseConfig
LeaderElectLifeConfig LeaderElectLifeConfig
MetricsAddr string
ProbeAddr string
EnableCertRotator bool
}

type LeaderElectLeaseConfig struct {
LeaseDuration time.Duration
RenewDeadline time.Duration
RetryPeriod time.Duration
LeaderElectionNamespace string
}

type LeaderElectLifeConfig struct {
EnableLeaderLifeElect bool
LeaseDuration time.Duration
RenewDeadline time.Duration
RetryPeriod time.Duration
}

func NewOptions() *Options {
return &Options{
LeaderElectLifeConfig: LeaderElectLifeConfig{},
LeaderElectLeaseConfig: LeaderElectLeaseConfig{},
}
}
Expand All @@ -46,9 +41,8 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) {
}
fs.StringVar(&o.MetricsAddr, "metrics-bind-address", defaultMetricsAddr, "The address the metric endpoint binds to.")
fs.StringVar(&o.ProbeAddr, "health-probe-bind-address", defaultProbeAddr, "The address the probe endpoint binds to.")
fs.StringVar(&o.LeaderElectLeaseConfig.LeaderElectionNamespace, "leader-life-namespace", defaultNamespace, "LeaderElectionNamespace determines the namespace in which the leader.")
fs.BoolVar(&o.LeaderElectLifeConfig.EnableLeaderLifeElect, "enable-leader-life", false, "Enable or disable leader election life.")
fs.DurationVar(&o.LeaderElectLeaseConfig.LeaseDuration, "lease-duration", defaultLeaseDuration, "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed group.")
fs.DurationVar(&o.LeaderElectLeaseConfig.RenewDeadline, "renew-deadline", defaultRenewDeadline, "Duration the clients should wait between attempting to renew the lease of the lock.")
fs.DurationVar(&o.LeaderElectLeaseConfig.RetryPeriod, "retry-period", defaultRetryPeriod, "The time duration for the client to wait between attempts of acquiring a lock.")
fs.BoolVar(&o.EnableCertRotator, "cert-rotator", defaultCertRotator, "Automatically apply for a certificate for Webhooks.")
}
62 changes: 32 additions & 30 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,63 +1,65 @@
module github.com/DataTunerX/finetune-experiment-controller

go 1.19
go 1.20

require (
github.com/DataTunerX/meta-server v0.0.0-20231128065201-7109bd13c9cb
github.com/DataTunerX/utility-server v0.0.0-20231107081331-e4ac0bbd2db2
github.com/go-logr/zapr v1.2.3
github.com/operator-framework/operator-lib v0.11.0
github.com/DataTunerX/meta-server v0.0.0-20231225093059-13cc8ff65bdc
github.com/DataTunerX/utility-server v0.0.0-20231208092112-6224f8619737
github.com/duke-git/lancet/v2 v2.2.8
github.com/go-logr/zapr v1.2.4
github.com/open-policy-agent/cert-controller v0.10.0
github.com/ray-project/kuberay/ray-operator v1.0.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.17.0
k8s.io/api v0.26.0
k8s.io/apimachinery v0.26.0
k8s.io/client-go v0.26.0
sigs.k8s.io/controller-runtime v0.14.1
k8s.io/api v0.28.1
k8s.io/apimachinery v0.28.1
k8s.io/client-go v0.28.1
sigs.k8s.io/controller-runtime v0.16.1
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/prometheus/client_golang v1.16.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/sagikazarmark/locafero v0.3.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.10.0 // indirect
github.com/spf13/cast v1.5.1 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
go.uber.org/multierr v1.10.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/net v0.17.0 // indirect
Expand All @@ -66,19 +68,19 @@ require (
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.26.0 // indirect
k8s.io/component-base v0.26.0 // indirect
k8s.io/klog/v2 v2.80.1 // indirect
k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect
k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 // indirect
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
k8s.io/apiextensions-apiserver v0.28.1 // indirect
k8s.io/component-base v0.28.1 // indirect
k8s.io/klog/v2 v2.100.1 // indirect
k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect
k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)
Loading

0 comments on commit f2a814d

Please sign in to comment.