From 1608762f296a248e28e2e7059f9cdb015b8b196d Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Fri, 1 Jun 2018 23:12:59 +0200 Subject: [PATCH] Improvements and bug fixes in DM config. (#904) * Improvements and bug fixes in DM config. * Create a service account to be used to authorize TFJobs and other work within the cluster. * Create a helper script to download service account keys and turn them into K8s keys * Fix some bugs in the docs. * Fix #878 create a GCP service account for the user. * IAP script needs a GCP service account with network admin privileges. * Add network admin privileges to the admin service account. * Name the secrets in K8s so that be default the names are the same across the deployments. This way there's one less parameter to set for every deployment. * VM service account should have a unique name per deployment so deployments are isolated. * Need to grant the VM service account logs and monitoring access to support monitoring. * I don't think there's any reason to allow user to specify name of the VM service account in the YAML file right now. * Address comments. * Autoformat jsonnet. --- docs/gke/configs/cluster-kubeflow.yaml | 2 - docs/gke/configs/cluster.jinja | 56 +++++++++++++++++-- docs/gke/configs/env-kubeflow.sh | 3 + docs/gke/create_k8s_secrets.sh | 18 ++++++ docs/gke/gke_setup.md | 26 ++++----- kubeflow/core/iap.libsonnet | 20 +++++++ .../core/prototypes/cloud-endpoints.jsonnet | 4 +- 7 files changed, 104 insertions(+), 25 deletions(-) create mode 100755 docs/gke/create_k8s_secrets.sh diff --git a/docs/gke/configs/cluster-kubeflow.yaml b/docs/gke/configs/cluster-kubeflow.yaml index 14280436113..b7025ba0d56 100644 --- a/docs/gke/configs/cluster-kubeflow.yaml +++ b/docs/gke/configs/cluster-kubeflow.yaml @@ -63,8 +63,6 @@ resources: # This is the name of the GCP static ip address to reserve for your domain. # This must be different for each Kubeflow deployment in your project. ipName: kubeflow-ip - # Name of the service account to use for k8s worker node pools - vmServiceAccountName: kubeflow-service-account # Provide the config for the bootstrapper. This should be a string # containing the YAML spec for the bootstrapper. # diff --git a/docs/gke/configs/cluster.jinja b/docs/gke/configs/cluster.jinja index 7dc1457a65c..ab558cce42f 100644 --- a/docs/gke/configs/cluster.jinja +++ b/docs/gke/configs/cluster.jinja @@ -41,8 +41,15 @@ limitations under the License. {% set STATEFULSETS_COLLECTION = '/apis/apps/v1/namespaces/{namespace}/statefulsets' %} {% set CLUSTER_ROLE_BINDING_COLLECTION = '/apis/rbac.authorization.k8s.io/v1/clusterrolebindings' %} -{# Names for service accounts.#} +{# Names for service accounts. + -admin is to be used for admin tasks + -user is to be used by users for actual jobs. + -vm is used for the VM service account attached to the GKE VMs. + #} {% set KF_ADMIN_NAME = NAME_PREFIX + '-admin' %} +{% set KF_USER_NAME = NAME_PREFIX + '-user' %} +{% set KF_VM_SA_NAME = NAME_PREFIX + '-vm' %} + {# For most of the K8s resources we set the deletePolicy to abandon; otherwise deployment manager reports various errors. Since we delete the cluster all the K8s resources will be deleted anyway. @@ -58,7 +65,14 @@ resources: properties: accountId: {{ KF_ADMIN_NAME }} displayName: Service Account used for Kubeflow admin actions. -- name: kubeflow-cluster-vm-service-account + +- name: {{ KF_USER_NAME }} + type: iam.v1.serviceAccount + properties: + accountId: {{ KF_USER_NAME }} + displayName: Service Account used for Kubeflow user actions. + +- name: {{ KF_VM_SA_NAME }} type: iam.v1.serviceAccount properties: accountId: {{ properties['vmServiceAccountName'] }} @@ -106,7 +120,7 @@ resources: {% endif %} nodeConfig: machineType: n1-standard-1 - serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com + serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com {% if properties['securityConfig']['secureNodeMetadata'] %} workloadMetadataConfig: nodeMetadata: SECURE @@ -114,7 +128,7 @@ resources: oauthScopes: {{ VM_OAUTH_SCOPES }} metadata: dependsOn: - - kubeflow-cluster-vm-service-account + - {{ KF_VM_SA_NAME }} # We manage the node pools as separate resources. # We do this so that if we want to make changes we can delete the existing resource and then recreate it. @@ -166,7 +180,7 @@ resources: nodeMetadata: SECURE {% endif %} machineType: n1-standard-8 - serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com + serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com oauthScopes: {{ VM_OAUTH_SCOPES }} accelerators: - acceleratorCount: 1 @@ -286,9 +300,40 @@ TODO(jlewi): Do we need to serialize API activation members: - {{ 'serviceAccount:' + env['project_number'] + '@cloudservices.gserviceaccount.com' }} + {# servicemanagement.admin is needed by CloudEndpoints controller + so we can create a service to get a hostname. + #} - role: roles/servicemanagement.admin members: - {{ 'serviceAccount:' + KF_ADMIN_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} + {# Network admin is needed to enable IAP and configure network settings + like backend timeouts and health checks. + #} + - role: roles/compute.networkAdmin + members: + - {{ 'serviceAccount:' + KF_ADMIN_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} + + - role: roles/storage.admin + members: + - {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} + + - role: roles/bigquery.admin + members: + - {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} + + - role: roles/dataflow.admin + members: + - {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} + + - role: roles/logging.logWriter + members: + {# VM service account is used to write logs. #} + - {{ 'serviceAccount:' + KF_VM_SA_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} + + - role: roles/monitoring.metricWriter + members: + {# VM service account is used to write monitoring data. #} + - {{ 'serviceAccount:' + KF_VM_SA_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }} remove: [] @@ -297,6 +342,7 @@ TODO(jlewi): Do we need to serialize API activation - get-iam-policy - iam-api - {{ KF_ADMIN_NAME }} + - {{ KF_USER_NAME }} runtimePolicy: - UPDATE_ALWAYS diff --git a/docs/gke/configs/env-kubeflow.sh b/docs/gke/configs/env-kubeflow.sh index cae2c0856d5..c59bfe6bb45 100644 --- a/docs/gke/configs/env-kubeflow.sh +++ b/docs/gke/configs/env-kubeflow.sh @@ -13,6 +13,9 @@ export PROJECT=kubeflow # The name must be unique for each deployment within your project. export DEPLOYMENT_NAME=kubeflow +# Set this to the zone in your ${CONFIG_FILE} +export ZONE=us-east1-d + # Set config file to the YAML file defining your deployment manager configs. export CONFIG_FILE=cluster-${PROJECT}.yaml diff --git a/docs/gke/create_k8s_secrets.sh b/docs/gke/create_k8s_secrets.sh new file mode 100755 index 00000000000..89022335e80 --- /dev/null +++ b/docs/gke/create_k8s_secrets.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# +# A simple helper script to download secrets for Kubeflow service +# accounts and store them as K8s secrets. +set -ex +export SA_EMAIL=${DEPLOYMENT_NAME}-admin@${PROJECT}.iam.gserviceaccount.com + +# TODO(jlewi): We should name the secrets more consistently based on the service account name. +# We will need to update the component configs though +gcloud --project=${PROJECT} iam service-accounts keys create ${SA_EMAIL}.json --iam-account ${SA_EMAIL} +kubectl create secret generic --namespace=kubeflow admin-gcp-sa --from-file=admin-gcp-sa.json=./${SA_EMAIL}.json + +export USER_EMAIL=${DEPLOYMENT_NAME}-user@${PROJECT}.iam.gserviceaccount.com +export USER_SECRET_NAME=${DEPLOYMENT_NAME}-user +gcloud --project=${PROJECT} iam service-accounts keys create ${USER_EMAIL}.json --iam-account $USER_EMAIL +# We want the secret name to be the same by default for all clusters so +# that users don't have to set it manually. +kubectl create secret generic --namespace=kubeflow user-gcp-sa --from-file=user-gcp-sa.json=./${USER_EMAIL}.json \ No newline at end of file diff --git a/docs/gke/gke_setup.md b/docs/gke/gke_setup.md index 9a654c0d94f..23daed82fde 100644 --- a/docs/gke/gke_setup.md +++ b/docs/gke/gke_setup.md @@ -15,12 +15,12 @@ The instructions also take advantage of IAP to provide secure authenticated acce 1. Modify `cluster-kubeflow.yaml` - 1. Set the zone for your cluster - 1. Set property `ipName` to a value that is unique with respect to your project - 1. Set parameter ipName in bootstrapperConfig to the value selected in the previous step - 1. Set parameter acmeEmail in bootstrapperConfig to your email address - 1. Set parameter hostname in bootstrapperConfig - 1. Change the initial number of nodes if desired + 1. Set the zone for your cluster + 1. Set property `ipName` to a value that is unique with respect to your project + 1. Set parameter ipName in bootstrapperConfig to the value selected in the previous step + 1. Set parameter acmeEmail in bootstrapperConfig to your email address + 1. Set parameter hostname in bootstrapperConfig + 1. Change the initial number of nodes if desired * If you want GPUs set a non-zero number for number of GPU nodes. @@ -47,24 +47,18 @@ The instructions also take advantage of IAP to provide secure authenticated acce 1. Get credentials for the newly configured cluster ``` - gcloud --project=${PROJECT} container clusters get-credentials --zone=${ZONE} ${DEPLOYMENT_NAME}-${NAME} + gcloud --project=${PROJECT} container clusters get-credentials --zone=${ZONE} ${DEPLOYMENT_NAME} ``` * ZONE - this will be the zone specified in your ${CONFIG_FILE} - * NAME - this will be the name specified in your ${CONFIG_FILE} -1. Create a service account and IAM bindings for the cloud-endpoints-controller - - * You can skip this step if you are using a custom domain. +1. Create K8s secrets containing the secrets for the GCP service accounts to be used with Kubeflow ``` - export SA_EMAIL=${DEPLOYMENT_NAME}-${NAME}@${PROJECT}.iam.gserviceaccount.com - gcloud --project=${PROJECT} iam service-accounts keys create ${SA_EMAIL}.json --iam-account $SA_EMAIL - kubectl create secret generic --namespace=kubeflow cloudep-sa --from-file=./${SA_EMAIL}.json + . env-kubeflow.sh + ./create_k8s_secrets.sh ``` - * ${NAME} is the name of the resource in your ${CONFIG_FILE} - ### Create oauth client credentials Create an OAuth Client ID to be used to identify IAP when requesting acces to user's email to verify their identity. diff --git a/kubeflow/core/iap.libsonnet b/kubeflow/core/iap.libsonnet index cda4618bdb7..2aadd8bd3d8 100644 --- a/kubeflow/core/iap.libsonnet +++ b/kubeflow/core/iap.libsonnet @@ -223,6 +223,10 @@ name: "ENVOY_ADMIN", value: "http://localhost:" + envoyAdminPort, }, + { + name: "GOOGLE_APPLICATION_CREDENTIALS", + value: "/var/run/secrets/sa/admin-gcp-sa.json", + }, ], volumeMounts: [ { @@ -233,6 +237,11 @@ mountPath: "/var/shared/", name: "shared", }, + { + name: "sa-key", + readOnly: true, + mountPath: "/var/run/secrets/sa", + }, ], }, ], @@ -250,6 +259,12 @@ }, name: "shared", }, + { + name: "sa-key", + secret: { + secretName: "admin-gcp-sa", + }, + }, ], }, }, @@ -316,6 +331,11 @@ exit 1 fi + # Activate the service account + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + # Print out the config for debugging + gcloud config list + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') while [[ -z ${BACKEND_ID} ]]; do BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${NODE_PORT}- --format='value(id)'); diff --git a/kubeflow/core/prototypes/cloud-endpoints.jsonnet b/kubeflow/core/prototypes/cloud-endpoints.jsonnet index edb627d6e9a..971a56a38cb 100644 --- a/kubeflow/core/prototypes/cloud-endpoints.jsonnet +++ b/kubeflow/core/prototypes/cloud-endpoints.jsonnet @@ -3,8 +3,8 @@ // @description Provides cloud-endpoints prototypes for creating Cloud Endpoints services and DNS records. // @shortDescription Cloud Endpoint domain creation. // @param name string Name for the component -// @param secretName string Name of secret containing the json service account key. -// @optionalParam secretKey string cloudep-sa.json Name of the key in the secret containing the JSON service account key. +// @optionalParam secretName string admin-gcp-sa Name of secret containing the json service account key. +// @optionalParam secretKey string admin-gcp-sa.json Name of the key in the secret containing the JSON service account key. // @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set. local k = import "k.libsonnet";