Skip to content

Commit

Permalink
feat: add support for downsize
Browse files Browse the repository at this point in the history
we can allow the cluster to downsize if the follower broker
exits cleanly with 0, without need for the broker index max
completions attribute that is enabled with a feature gate
and requires k8s 1.28. This change also adds support for a
minSize cluster, which will work to start the quorum when
fewer than the size workers are available. note that this does
not adjust tasks given to a job, so might be assigning too
many tasks to too few workers. This also adds in the previous
downsize workers example, except instead of using pkill for
rockylinux we fall back to flux overlay disconnect, as pkill
is not available by default. It is up to the user to ensure
that the follower broker can be disconnected (and is not running
anything). Finally, we add support for a flux->arch tag,
specifically for an arm binary to be downloaded and used
for the go-wait-fs command.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jan 18, 2024
1 parent 0a119ba commit e54e8b6
Show file tree
Hide file tree
Showing 17 changed files with 507 additions and 15 deletions.
27 changes: 27 additions & 0 deletions api/v1alpha2/minicluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ type MiniClusterSpec struct {
// +optional
MaxSize int32 `json:"maxSize,omitempty"`

// MinSize (minimum number of pods that must be up for Flux)
// Note that this option does not edit the number of tasks,
// so a job could run with fewer (and then not start)
// +optional
MinSize int32 `json:"minSize,omitempty"`

// Total number of CPUs being run across entire cluster
// +kubebuilder:default=1
// +default=1
Expand Down Expand Up @@ -249,8 +255,14 @@ type ContainerVolume struct {
type FluxSpec struct {

// Container base for flux
// +optional
Container FluxContainer `json:"container,omitempty"`

// Change the arch string - determines the binaries
// that are downloaded to run the entrypoint
// +optional
Arch string `json:"arch,omitempty"`

// Modify flux submit to be something else
// +optional
SubmitCommand string `json:"submitCommand,omitempty"`
Expand Down Expand Up @@ -279,6 +291,11 @@ type FluxSpec struct {
// +optional
NoWaitSocket bool `json:"noWaitSocket"`

// Complete workers when they fail
// This is ideal if you don't want them to restart
// +optional
CompleteWorkers bool `json:"completeWorkers"`

// Log level to use for flux logging (only in non TestMode)
// +kubebuilder:default=6
// +default=6
Expand Down Expand Up @@ -674,6 +691,16 @@ func (f *MiniCluster) Validate() bool {
return false
}

// If MinSize is set, it must be <= MaxSize and Size
if f.Spec.MinSize != 0 && f.Spec.MaxSize != 0 && f.Spec.MinSize > f.Spec.MaxSize {
fmt.Printf("😥️ MinSize of cluster must be less than MaxSize.\n")
return false
}
if f.Spec.MinSize != 0 && f.Spec.MinSize > f.Spec.Size {
fmt.Printf("😥️ MinSize of cluster must be less than size.\n")
return false
}

// Set the default headless service name
if f.Spec.Network.HeadlessName == "" {
f.Spec.Network.HeadlessName = "flux-service"
Expand Down
17 changes: 14 additions & 3 deletions api/v1alpha2/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,12 @@
"limits": {
"type": "object",
"additionalProperties": {
"default": {},
"$ref": "#/definitions/IntOrString"
}
},
"requests": {
"type": "object",
"additionalProperties": {
"default": {},
"$ref": "#/definitions/IntOrString"
}
}
Expand Down Expand Up @@ -241,6 +239,10 @@
"FluxSpec": {
"type": "object",
"properties": {
"arch": {
"description": "Change the arch string - determines the binaries that are downloaded to run the entrypoint",
"type": "string"
},
"brokerConfig": {
"description": "Optionally provide a manually created broker config this is intended for bursting to remote clusters",
"type": "string",
Expand All @@ -251,6 +253,11 @@
"default": {},
"$ref": "#/definitions/Bursting"
},
"completeWorkers": {
"description": "Complete workers when they fail This is ideal if you don't want them to restart",
"type": "boolean",
"default": false
},
"connectTimeout": {
"description": "Single user executable to provide to flux start",
"type": "string",
Expand Down Expand Up @@ -592,6 +599,11 @@
"type": "integer",
"format": "int32"
},
"minSize": {
"description": "MinSize (minimum number of pods that must be up for Flux) Note that this option does not edit the number of tasks, so a job could run with fewer (and then not start)",
"type": "integer",
"format": "int32"
},
"network": {
"description": "A spec for exposing or defining the cluster headless service",
"default": {},
Expand Down Expand Up @@ -735,7 +747,6 @@
"description": "Resources include limits and requests",
"type": "object",
"additionalProperties": {
"default": {},
"$ref": "#/definitions/IntOrString"
}
},
Expand Down
31 changes: 25 additions & 6 deletions api/v1alpha2/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions chart/templates/minicluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ spec:
flux:
description: Flux options for the broker, shared across cluster
properties:
arch:
description: Change the arch string - determines the binaries that
are downloaded to run the entrypoint
type: string
brokerConfig:
description: Optionally provide a manually created broker config
this is intended for bursting to remote clusters
Expand Down Expand Up @@ -306,6 +310,10 @@ spec:
- size
type: object
type: object
completeWorkers:
description: Complete workers when they fail This is ideal if you
don't want them to restart
type: boolean
connectTimeout:
default: 5s
description: Single user executable to provide to flux start
Expand Down Expand Up @@ -449,6 +457,12 @@ spec:
description: MaxSize (maximum number of pods to allow scaling to)
format: int32
type: integer
minSize:
description: MinSize (minimum number of pods that must be up for Flux)
Note that this option does not edit the number of tasks, so a job
could run with fewer (and then not start)
format: int32
type: integer
network:
description: A spec for exposing or defining the cluster headless service
properties:
Expand Down
14 changes: 14 additions & 0 deletions config/crd/bases/flux-framework.org_miniclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ spec:
flux:
description: Flux options for the broker, shared across cluster
properties:
arch:
description: Change the arch string - determines the binaries
that are downloaded to run the entrypoint
type: string
brokerConfig:
description: Optionally provide a manually created broker config
this is intended for bursting to remote clusters
Expand Down Expand Up @@ -307,6 +311,10 @@ spec:
- size
type: object
type: object
completeWorkers:
description: Complete workers when they fail This is ideal if
you don't want them to restart
type: boolean
connectTimeout:
default: 5s
description: Single user executable to provide to flux start
Expand Down Expand Up @@ -453,6 +461,12 @@ spec:
description: MaxSize (maximum number of pods to allow scaling to)
format: int32
type: integer
minSize:
description: MinSize (minimum number of pods that must be up for Flux)
Note that this option does not edit the number of tasks, so a job
could run with fewer (and then not start)
format: int32
type: integer
network:
description: A spec for exposing or defining the cluster headless
service
Expand Down
24 changes: 24 additions & 0 deletions docs/getting_started/custom-resource-definition.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@ The operator works to add Flux to your application container dynamically by way
one that is run as a sidecar alongside your container, and then the view is copied over and flux run as your
active user. Settings under the Flux directive typically refer to flux options, e.g., for the broker or similar.

#### arch

If you are using an arm based container, ensure to add the architecture flag to designate that.

```yaml
flux:
arch: "arm"
```

Note that this doesn't edit the container, but rather the binaries installed for it (e.g., to wait for files).


#### container

You can customize the flux container, and most attributes that are available for a standard container are available here.
Expand Down Expand Up @@ -229,6 +241,18 @@ And then your pod containers also both need to have memory and cpu defined. In
1. Ensure cpuManagerPolicy is static
2. Create all pod containers (including the init container) in the MiniCluster to have a cpu and memory definition.

### completeWorkers

By default, when a follower broker is killed it is attempted to restart. While we could use [JobBackoffPerIndex](https://kubernetes.io/blog/2023/08/21/kubernetes-1-28-jobapi-update/#backoff-limit-per-index) to prevent it from restarting under
any conditions, this currently requires a feature gate (Kubernetes 1.28) so we are opting for a more simple approach. You can set `completeWorkers` to true, in which case when a lead broker is killed, it will Complete and not recreate.

```yaml
spec:
flux:
completeWorkers: true
```

This can be useful for cases of autoscaling in the down direction when you need to drain a node, and then delete the pod.

#### submitCommand

Expand Down
14 changes: 14 additions & 0 deletions examples/dist/flux-operator-arm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ spec:
flux:
description: Flux options for the broker, shared across cluster
properties:
arch:
description: Change the arch string - determines the binaries
that are downloaded to run the entrypoint
type: string
brokerConfig:
description: Optionally provide a manually created broker config
this is intended for bursting to remote clusters
Expand Down Expand Up @@ -313,6 +317,10 @@ spec:
- size
type: object
type: object
completeWorkers:
description: Complete workers when they fail This is ideal if
you don't want them to restart
type: boolean
connectTimeout:
default: 5s
description: Single user executable to provide to flux start
Expand Down Expand Up @@ -459,6 +467,12 @@ spec:
description: MaxSize (maximum number of pods to allow scaling to)
format: int32
type: integer
minSize:
description: MinSize (minimum number of pods that must be up for Flux)
Note that this option does not edit the number of tasks, so a job
could run with fewer (and then not start)
format: int32
type: integer
network:
description: A spec for exposing or defining the cluster headless
service
Expand Down
14 changes: 14 additions & 0 deletions examples/dist/flux-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ spec:
flux:
description: Flux options for the broker, shared across cluster
properties:
arch:
description: Change the arch string - determines the binaries
that are downloaded to run the entrypoint
type: string
brokerConfig:
description: Optionally provide a manually created broker config
this is intended for bursting to remote clusters
Expand Down Expand Up @@ -313,6 +317,10 @@ spec:
- size
type: object
type: object
completeWorkers:
description: Complete workers when they fail This is ideal if
you don't want them to restart
type: boolean
connectTimeout:
default: 5s
description: Single user executable to provide to flux start
Expand Down Expand Up @@ -459,6 +467,12 @@ spec:
description: MaxSize (maximum number of pods to allow scaling to)
format: int32
type: integer
minSize:
description: MinSize (minimum number of pods that must be up for Flux)
Note that this option does not edit the number of tasks, so a job
could run with fewer (and then not start)
format: int32
type: integer
network:
description: A spec for exposing or defining the cluster headless
service
Expand Down
Loading

0 comments on commit e54e8b6

Please sign in to comment.