Skip to content

Commit

Permalink
use cert-manager for certification management (Qiskit#687)
Browse files Browse the repository at this point in the history
* use cert-manager for certification management

Signed-off-by: Akihiko Kuroda <akihikokuroda2020@gmail.com>
  • Loading branch information
akihikokuroda authored Jun 20, 2023
1 parent d2e7590 commit f8a5e36
Show file tree
Hide file tree
Showing 11 changed files with 299 additions and 7 deletions.
47 changes: 47 additions & 0 deletions gateway/api/ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import yaml
from kubernetes import client, config
from kubernetes.dynamic.client import DynamicClient
from kubernetes.dynamic.exceptions import ResourceNotFoundError

import requests
from ray.dashboard.modules.job.sdk import JobSubmissionClient
Expand Down Expand Up @@ -195,4 +196,50 @@ def kill_ray_cluster(cluster_name: str) -> bool:
"Something went wrong during ray cluster deletion request: %s",
delete_response.text,
)
try:
cert_client = dyn_client.resources.get(api_version="v1", kind="Certificate")
except ResourceNotFoundError:
return success

delete_response = cert_client.delete(name=cluster_name, namespace=namespace)
if delete_response.status == "Success":
success = True
else:
logging.error(
"Something went wrong during ray certification deletion request: %s",
delete_response.text,
)

delete_response = cert_client.delete(
name=f"{cluster_name}-worker", namespace=namespace
)
if delete_response.status == "Success":
success = True
else:
logging.error(
"Something went wrong during ray certification deletion request: %s",
delete_response.text,
)

corev1 = client.CoreV1Api()
delete_response = corev1.delete_namespaced_secret(
name=cluster_name, namespace=namespace
)
if delete_response.status == "Success":
success = True
else:
logging.error(
"Something went wrong during certification secret deletion request: %s",
delete_response.text,
)
delete_response = corev1.delete_namespaced_secret(
name=f"{cluster_name}-worker", namespace=namespace
)
if delete_response.status == "Success":
success = True
else:
logging.error(
"Something went wrong during certification secret deletion request: %s",
delete_response.text,
)
return success
7 changes: 6 additions & 1 deletion gateway/tests/api/test_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,14 @@ def test_kill_cluster(self):
DynamicClient.resources = MagicMock()
mock = mock_delete()
DynamicClient.resources.get = MagicMock(return_value=mock)
client.CoreV1Api = MagicMock()

success = kill_ray_cluster("some_cluster")
self.assertTrue(success)
DynamicClient.resources.get.assert_called_once_with(
DynamicClient.resources.get.assert_any_call(
api_version="v1alpha1", kind="RayCluster"
)
DynamicClient.resources.get.assert_any_call(
api_version="v1", kind="Certificate"
)
client.CoreV1Api.assert_called()
2 changes: 2 additions & 0 deletions infrastructure/helm/quantumserverless/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ For our Ray Charts dependencies we are using the configuration created by the Ra

- For Ray Api Server you can read their [values.yaml](https://github.com/ray-project/kuberay-helm/blob/main/helm-chart/kuberay-apiserver/values.yaml).

TLS is enabled for the gRPC communication among Ray components. It uses a self-signed certificate by derault. It can optionally use certificates signed by the cert manager in the environment that has the cert manager installed. The option is `gateway.useCertManager: ture`

**Keycloak**

- The initial user ID and password for both keycload console(adminUser/adminPassword) and Ray dashboard(keycloakUserID/keycloakPassword) can be changed in the values.yaml file. It is good to change them before apply the helm.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{- if .Values.useCertManager }}
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: selfsigned-issuer
spec:
selfSigned: {}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: ray-selfsigned-ca
spec:
isCA: true
commonName: ray-selfsigned-ca
secretName: ray-root-secret
privateKey:
algorithm: ECDSA
size: 256
issuerRef:
name: selfsigned-issuer
kind: ClusterIssuer
group: cert-manager.io
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: ray-ca-issuer
spec:
ca:
secretName: ray-root-secret
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,8 @@ spec:
volumeMounts:
- mountPath: "/usr/src/app/media/"
name: gateway-pv-storage
- mountPath: "/tmp/templates/rayclustertemplate.yaml"
- mountPath: "/tmp/templates/"
name: ray-cluster-template
subPath: rayclustertemplate.yaml
env:
- name: DEBUG
value: {{ .Values.application.debug | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,54 @@ data:
-CAcreateserial -out /etc/ray/tls/tls.crt \
-days 365 \
-sha256 -extfile /etc/ray/tls/cert.conf
{{- if .Values.useCertManager }}
gencert_cert_head.sh: |
#!/bin/sh
BASE_DIR=$1
CLUSTER_NAME=$2
IP_ADDRESS=$3
NAMESPACE=$4
kubectl apply -f - <<EOF
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: $CLUSTER_NAME
spec:
commonName: quantummiddleware.com
dnsNames:
- $CLUSTER_NAME-svc.$NAMESPACE.svc.cluster.local
duration: 2160h0m0s
ipAddresses:
- $IP_ADDRESS
issuerRef:
group: cert-manager.io
kind: Issuer
name: ray-ca-issuer
privateKey:
algorithm: RSA
encoding: PKCS1
size: 2048
renewBefore: 360h0m0s
secretName: $CLUSTER_NAME
subject:
organizations:
- quantummiddleware
usages:
- server auth
- client auth
EOF
status="False"
while [ $status = "False" ]
do
status=$(kubectl get cert $CLUSTER_NAME -o=jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
done
echo "Certificate for $CLUSTER_NAME is ready"
kubectl get cert $CLUSTER_NAME -o json
kubectl get secret $CLUSTER_NAME -o=jsonpath='{.data.ca\.crt}' | base64 -d > $BASE_DIR/ca.crt
kubectl get secret $CLUSTER_NAME -o=jsonpath='{.data.tls\.crt}' | base64 -d > $BASE_DIR/tls.crt
kubectl get secret $CLUSTER_NAME -o=jsonpath='{.data.tls\.key}' | base64 -d > $BASE_DIR/tls.key
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{{- if .Values.useCertManager }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: ray-cluster-sa
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: ray-cluster-role
rules:
- apiGroups:
- ray.io
resources:
- rayclusters
verbs:
- create
- delete
- get
- list
- apiGroups:
- cert-manager.io
resources:
- certificates
verbs:
- create
- delete
- get
- list
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- delete
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ray-cluster-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: ray-cluster-role
subjects:
- kind: ServiceAccount
name: ray-cluster-sa
{{- end }}
Loading

0 comments on commit f8a5e36

Please sign in to comment.