Skip to content

Commit

Permalink
Merge pull request #231 from flux-framework/add-additional-skypilot-f…
Browse files Browse the repository at this point in the history
…eatures

* podspec: add additional features for podspec

runtimeClassName can be used to designate nvidia
for skypilot

* feat: allow non root user

Problem: skypilot (and likely others) do not run with a root user
Solution: allow a non-root user that has sudo
Signed-off-by: vsoch <vsoch@users.noreply.github.com>

* restart policy should default to always

Signed-off-by: vsoch <vsoch@users.noreply.github.com>

* default should be on failure

Signed-off-by: vsoch <vsoch@users.noreply.github.com>

---------

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
Co-authored-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch and vsoch authored Jul 23, 2024
2 parents 678b187 + 67482d4 commit 7c4006f
Show file tree
Hide file tree
Showing 15 changed files with 151 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM golang:1.20 as builder
FROM golang:1.20 AS builder

WORKDIR /workspace

Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha2/minicluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ type PodSpec struct {
// +optional
ServiceAccountName string `json:"serviceAccountName,omitempty"`

// RuntimeClassName for the pod
// +optional
RuntimeClassName string `json:"runtimeClassName,omitempty"`

// Automatically mount the service account name
// +optional
AutomountServiceAccountToken bool `json:"automountServiceAccountToken,omitempty"`
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha2/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,10 @@
"description": "Restart Policy",
"type": "string"
},
"runtimeClassName": {
"description": "RuntimeClassName for the pod",
"type": "string"
},
"schedulerName": {
"description": "Scheduler name for the pod",
"type": "string"
Expand Down
7 changes: 7 additions & 0 deletions api/v1alpha2/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions chart/templates/minicluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,9 @@ spec:
restartPolicy:
description: Restart Policy
type: string
runtimeClassName:
description: RuntimeClassName for the pod
type: string
schedulerName:
description: Scheduler name for the pod
type: string
Expand Down
3 changes: 3 additions & 0 deletions config/crd/bases/flux-framework.org_miniclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,9 @@ spec:
restartPolicy:
description: Restart Policy
type: string
runtimeClassName:
description: RuntimeClassName for the pod
type: string
schedulerName:
description: Scheduler name for the pod
type: string
Expand Down
13 changes: 11 additions & 2 deletions controllers/flux/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ func NewMiniClusterJob(cluster *api.MiniCluster) (*batchv1.Job, error) {
Labels: cluster.Spec.JobLabels,
},

// Completions must be == to Parallelism to allow for scaling
Spec: batchv1.JobSpec{

BackoffLimit: &backoffLimit,
Completions: &cluster.Spec.Size,
Parallelism: &cluster.Spec.Size,
Expand All @@ -70,13 +70,22 @@ func NewMiniClusterJob(cluster *api.MiniCluster) (*batchv1.Job, error) {
ImagePullSecrets: getImagePullSecrets(cluster),
ServiceAccountName: cluster.Spec.Pod.ServiceAccountName,
AutomountServiceAccountToken: &cluster.Spec.Pod.AutomountServiceAccountToken,
RestartPolicy: corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy),
RestartPolicy: corev1.RestartPolicyOnFailure,
NodeSelector: cluster.Spec.Pod.NodeSelector,
SchedulerName: cluster.Spec.Pod.SchedulerName,
},
},
},
}
// Custom restart policy
if cluster.Spec.Pod.RestartPolicy != "" {
job.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy)
}

// Only add runClassName if defined
if cluster.Spec.Pod.RuntimeClassName != "" {
job.Spec.Template.Spec.RuntimeClassName = &cluster.Spec.Pod.RuntimeClassName
}

// Add Affinity to map one pod / node only if the user hasn't disbaled it
if !cluster.Spec.Network.DisableAffinity {
Expand Down
12 changes: 11 additions & 1 deletion controllers/flux/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,23 @@ func (r *MiniClusterReconciler) newServicePod(
SetHostnameAsFQDN: &setAsFQDN,
Volumes: existingVolumes,
ImagePullSecrets: getImagePullSecrets(cluster),
RestartPolicy: corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy),
RestartPolicy: corev1.RestartPolicyOnFailure,
ServiceAccountName: cluster.Spec.Pod.ServiceAccountName,
AutomountServiceAccountToken: &cluster.Spec.Pod.AutomountServiceAccountToken,
NodeSelector: cluster.Spec.Pod.NodeSelector,
},
}

// Custom restart policy
if cluster.Spec.Pod.RestartPolicy != "" {
pod.Spec.RestartPolicy = corev1.RestartPolicy(cluster.Spec.Pod.RestartPolicy)
}

// Only add runClassName if defined
if cluster.Spec.Pod.RuntimeClassName != "" {
pod.Spec.RuntimeClassName = &cluster.Spec.Pod.RuntimeClassName
}

// Assemble existing volume mounts - they are added with getContainers
mounts := []corev1.VolumeMount{}

Expand Down
37 changes: 33 additions & 4 deletions docs/getting_started/custom-resource-definition.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,11 @@ When enabled, meaning that we use flux from a view within the container, these c

- [ghcr.io/converged-computing/flux-view-rocky:tag-9](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-rocky)
- [ghcr.io/converged-computing/flux-view-rocky:tag-8](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-rocky)
- [ghcr.io/converged-computing/flux-view-ubuntu:tag-noble](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-ubuntu)
- [ghcr.io/converged-computing/flux-view-ubuntu:tag-jammy](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-ubuntu)
- [ghcr.io/converged-computing/flux-view-ubuntu:tag-focal](https://github.com/converged-computing/flux-views/pkgs/container/flux-view-ubuntu)


Note that we have [arm builds](https://github.com/converged-computing/flux-views/tree/main/arm) available for each of rocky and ubuntu as well.
If you don't want to use Flux from a view (and want to use the v1apha1 design of the Flux Operator that had the application alongside Flux) you can do that by way of disabling the flux view:

Expand Down Expand Up @@ -682,6 +685,34 @@ pod:
serviceAccountName: my-service-account
```

#### restartPolicy

To customize the restartPolicy for the pod:

```yaml
pod:
restartPolicy: Never
```

#### runtimeClassName

To add a runtime class name:

```yaml
pod:
runtimeClassName: nvidia
```

#### automountServiceAccountToken

If you want to automatically mount a service account token:

```yaml
pod:
automountServiceAccountToken: true
```


#### nodeSelector

A node selector is a set of key value pairs that helps to schedule pods to the right nodes! You can
Expand Down Expand Up @@ -720,10 +751,8 @@ name: rabbit

#### image

This is the only required attribute! You *must* provide a container base that has Flux.
The requirements of your container are defined in the README of the [flux-hpc](https://github.com/rse-ops/flux-hpc/)
repository. Generally speaking, you need to have Flux executables, Flux Python bindings,
and your own executables on the path, and should be started with root with a flux user.
You do not need to provide a container base that has Flux, but you must make sure your view (with a particular operator system) that will add Flux matches your container. We don't require you to start as root, but if you
have a container with a non-root user, the user needs to have sudo available (to act as root).
If you use the [fluxrm/flux-sched](https://hub.docker.com/r/fluxrm/flux-sched)
base containers this is usually a good start.

Expand Down
3 changes: 3 additions & 0 deletions examples/dist/flux-operator-arm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,9 @@ spec:
restartPolicy:
description: Restart Policy
type: string
runtimeClassName:
description: RuntimeClassName for the pod
type: string
schedulerName:
description: Scheduler name for the pod
type: string
Expand Down
3 changes: 3 additions & 0 deletions examples/dist/flux-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,9 @@ spec:
restartPolicy:
description: Restart Policy
type: string
runtimeClassName:
description: RuntimeClassName for the pod
type: string
schedulerName:
description: Scheduler name for the pod
type: string
Expand Down
23 changes: 12 additions & 11 deletions pkg/flux/templates/components.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ url=$goshareUrl/wait-fs-{{ .Spec.Flux.Arch }}
# This waiting script is intended to wait for the flux view, and then start running
curl -L -O -s -o ./wait-fs -s ${url} {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} || wget ${url} -q -O ./wait-fs {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }} || true
chmod +x ./wait-fs || true
mv ./wait-fs /usr/bin/goshare-wait-fs || true
${SUDO} mv ./wait-fs /usr/bin/goshare-wait-fs || true

# Ensure spack view is on the path, wherever it is mounted
viewbase="{{ .ViewBase }}"
Expand All @@ -47,9 +47,9 @@ goshare-wait-fs -p ${viewbase}/flux-operator-done.txt {{ if .Spec.Logging.Quiet
# Copy mount software to /opt/software
# If /opt/software already exists, we need to copy into it
if [[ -e "/opt/software" ]]; then
cp -R ${viewbase}/software/* /opt/software/ || true
${SUDO} cp -R ${viewbase}/software/* /opt/software/ || true
else
cp -R ${viewbase}/software /opt/software || true
${SUDO} cp -R ${viewbase}/software /opt/software || true
fi
{{end}}

Expand All @@ -72,10 +72,10 @@ echo "Python root: $foundroot" {{ if .Spec.Logging.Quiet }} > /dev/null 2>&1{{ e

# If we found the right python, ensure it's linked (old link does not work)
if [[ -f "${pythonversion}" ]]; then
rm -rf $viewroot/bin/python3
rm -rf $viewroot/bin/python
ln -s ${pythonversion} $viewroot/lib/python || true
ln -s ${pythonversion} $viewroot/lib/python3 || true
${SUDO} rm -rf $viewroot/bin/python3
${SUDO} rm -rf $viewroot/bin/python
${SUDO} ln -s ${pythonversion} $viewroot/lib/python || true
${SUDO} ln -s ${pythonversion} $viewroot/lib/python3 || true
fi

# Ensure we use flux's python (TODO update this to use variable)
Expand All @@ -87,15 +87,16 @@ find $viewroot . -name libpython*.so* {{ if .Spec.Logging.Quiet }}> /dev/null 2>
ls -l /mnt/flux/view/lib/libpython3.11.so.1.0 {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }}

# Write an easy file we can source for the environment
cat <<EOT >> ${viewbase}/flux-view.sh
cat <<EOT >> ./flux-view.sh
#!/bin/bash
export PATH=$PATH
export PYTHONPATH=$PYTHONPATH
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$viewroot/lib
export fluxsocket=local://${viewroot}/run/flux/local
EOT
${SUDO} mv ./flux-view.sh ${viewbase}/flux-view.sh
{{end}}
{{define "ensure-pip"}}
${pythonversion} -m pip --version || ${pythonversion} -m ensurepip || (wget https://bootstrap.pypa.io/get-pip.py && ${pythonversion} ./get-pip.py) {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }}
${pythonversion} -m pip --upgrade pip {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }}
{{end}}
${SUDO} ${pythonversion} -m pip --version || ${SUDO} ${pythonversion} -m ensurepip || (${SUDO} wget https://bootstrap.pypa.io/get-pip.py && ${pythonversion} ./get-pip.py) {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }}
${SUDO} ${pythonversion} -m pip --upgrade pip {{ if .Spec.Logging.Quiet }}> /dev/null 2>&1{{ end }}
{{end}}
41 changes: 26 additions & 15 deletions pkg/flux/templates/wait.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@
# We use the actual time command and not the wrapper, otherwise we get there is no argument -f
{{ if .Spec.Logging.Timed }}which /usr/bin/time > /dev/null 2>&1 || (echo "/usr/bin/time is required to use logging.timed true" && exit 1);{{ end }}

# Set the flux user and id from the getgo
fluxuser=$(whoami)
fluxuid=$(id -u $fluxuser)

# Add fluxuser to sudoers living... dangerously!
# A non root user container requires sudo to work
SUDO=""
if [[ "${fluxuser}" != "root" ]]; then
echo "${fluxuser} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
SUDO="sudo"
fi

# If any initCommand logic is defined
{{ .Container.Commands.Init}} {{ if .Spec.Logging.Quiet }}> /dev/null{{ end }}

Expand All @@ -14,10 +26,6 @@
{{template "wait-view" .}}
{{ if not .Spec.Flux.Container.Disable }}{{template "paths" .}}{{ end }}

# Set the flux user and id from the getgo
fluxuser=$(whoami)
fluxuid=$(id -u $fluxuser)

# Variables we can use again
cfg="${viewroot}/etc/flux/config"
command="{{ .Container.Command }}"
Expand All @@ -28,19 +36,21 @@ command="{{ .Container.Command }}"
{{ if not .Spec.Logging.Quiet }}
echo
echo "Hello user ${fluxuser}"{{ end }}

# Add fluxuser to sudoers living... dangerously!
if [[ "${fluxuser}" != "root" ]]; then
echo "${fluxuser} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
fi

# Ensure the flux user owns the curve.cert
# We need to move the curve.cert because config map volume is read only
curvesrc=/flux_operator/curve.cert
curvepath=$viewroot/curve/curve.cert

mkdir -p $viewroot/curve
cp $curvesrc $curvepath
# run directory must be owned by this user
# and /var/lib/flux
if [[ "${fluxuser}" != "root" ]]; then
${SUDO} chown -R ${fluxuser} ${viewroot}/run/flux ${viewroot}/var/lib/flux
fi

# Prepare curve certificate!
${SUDO} mkdir -p $viewroot/curve
${SUDO} cp $curvesrc $curvepath
{{ if not .Spec.Logging.Quiet }}
echo
echo "🌟️ Curve Certificate"
Expand All @@ -49,9 +59,9 @@ cat ${curvepath}
{{ end }}

# Remove group and other read
chmod o-r ${curvepath}
chmod g-r ${curvepath}
chown -R ${fluxuid} ${curvepath}
${SUDO} chmod o-r ${curvepath}
${SUDO} chmod g-r ${curvepath}
${SUDO} chown -R ${fluxuid} ${curvepath}

# If we have disabled the view, we need to use the flux here to generate resources
{{ if .Spec.Flux.Container.Disable }}
Expand All @@ -61,7 +71,8 @@ echo
echo "📦 Resources"
echo "flux R encode --hosts=${hosts} --local"
{{ end }}
flux R encode --hosts=${hosts} --local > ${viewroot}/etc/flux/system/R
flux R encode --hosts=${hosts} --local > /tmp/R
${SUDO} mv /tmp/R ${viewroot}/etc/flux/system/R
{{ if not .Spec.Logging.Quiet }}cat ${viewroot}/etc/flux/system/R{{ end }}
{{ end }}

Expand Down
1 change: 1 addition & 0 deletions sdk/python/v1alpha2/docs/PodSpec.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Name | Type | Description | Notes
**node_selector** | **dict[str, str]** | NodeSelectors for a pod | [optional]
**resources** | [**dict[str, IntOrString]**](IntOrString.md) | Resources include limits and requests | [optional]
**restart_policy** | **str** | Restart Policy | [optional]
**runtime_class_name** | **str** | RuntimeClassName for the pod | [optional]
**scheduler_name** | **str** | Scheduler name for the pod | [optional]
**service_account_name** | **str** | Service account name for the pod | [optional]

Expand Down
Loading

0 comments on commit 7c4006f

Please sign in to comment.