From a894e2c951bd63caf00277d75766643bb09053a9 Mon Sep 17 00:00:00 2001 From: Steakley Date: Thu, 13 Apr 2023 16:29:52 -0700 Subject: [PATCH 1/3] use latest dlc images for pytorch and tensorflow --- .../configs/spawner_ui_config.yaml | 10 +- .../s3/disable-service-account.yaml | 7 + ...-config-dmh59b856d-kubeflow-ConfigMap.yaml | 193 ------------------ ...-config-mgf762gt24-kubeflow-ConfigMap.yaml | 95 +++++++++ ...meters-42k97gcbmb-kubeflow-ConfigMap.yaml} | 2 +- ...eb-app-deployment-kubeflow-Deployment.yaml | 2 +- components/notebook-dockerfiles/README.md | 10 +- .../pytorch/cpu.Dockerfile | 8 +- .../pytorch/cuda.Dockerfile | 8 +- .../pytorch/requirements.txt | 24 +-- .../tensorflow/cpu.Dockerfile | 2 +- .../tensorflow/cuda.Dockerfile | 2 +- .../tensorflow/requirements.txt | 24 +-- .../notebooks/verify_ack_integration.ipynb | 8 +- .../verify_pytorch_installation.ipynb | 2 +- .../verify_tensorflow_installation.ipynb | 2 +- tests/e2e/tests/test_notebook_images.py | 17 +- 17 files changed, 149 insertions(+), 267 deletions(-) create mode 100644 awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml delete mode 100644 charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-dmh59b856d-kubeflow-ConfigMap.yaml create mode 100644 charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-mgf762gt24-kubeflow-ConfigMap.yaml rename charts/apps/jupyter-web-app/templates/ConfigMap/{jupyter-web-app-parameters-dhcbt5dtdf-kubeflow-ConfigMap.yaml => jupyter-web-app-parameters-42k97gcbmb-kubeflow-ConfigMap.yaml} (87%) diff --git a/awsconfigs/apps/jupyter-web-app/configs/spawner_ui_config.yaml b/awsconfigs/apps/jupyter-web-app/configs/spawner_ui_config.yaml index bf4365c167..267cbc2042 100644 --- a/awsconfigs/apps/jupyter-web-app/configs/spawner_ui_config.yaml +++ b/awsconfigs/apps/jupyter-web-app/configs/spawner_ui_config.yaml @@ -17,14 +17,14 @@ spawnerFormDefaults: image: # The container Image for the user's Jupyter Notebook - value: public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1 + value: public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0 # The list of available standard container Images options: - kubeflownotebookswg/jupyter-scipy:v1.7.0 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-gpu-py39-cu112-ubuntu20.04-ec2-v1.1 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-gpu-py38-cu116-ubuntu20.04-ec2-v1.2 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-cpu-py38-ubuntu20.04-ec2-v1.2 + - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 + - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0 + - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 + - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-cpu-py310-ubuntu20.04-ec2-v1.0 imageGroupOne: # The container Image for the user's Group One Server # The annotation `notebooks.kubeflow.org/http-rewrite-uri: /` diff --git a/awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml b/awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml new file mode 100644 index 0000000000..6d6675a980 --- /dev/null +++ b/awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml @@ -0,0 +1,7 @@ +$patch: delete +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ml-pipeline + namespace: kubeflow +eksctl create addon --name aws-ebs-csi-driver --cluster tests --service-account-role-arn arn:aws:iam::024825446905:role/AmazonEKS_EBS_CSI_DriverRole --force \ No newline at end of file diff --git a/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-dmh59b856d-kubeflow-ConfigMap.yaml b/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-dmh59b856d-kubeflow-ConfigMap.yaml deleted file mode 100644 index e86ed4dbb3..0000000000 --- a/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-dmh59b856d-kubeflow-ConfigMap.yaml +++ /dev/null @@ -1,193 +0,0 @@ -apiVersion: v1 -data: - spawner_ui_config.yaml: | - # Configuration file for the Jupyter UI. - # - # Each Jupyter UI option is configured by two keys: 'value' and 'readOnly' - # - The 'value' key contains the default value - # - The 'readOnly' key determines if the option will be available to users - # - # If the 'readOnly' key is present and set to 'true', the respective option - # will be disabled for users and only set by the admin. Also when a - # Notebook is POSTED to the API if a necessary field is not present then - # the value from the config will be used. - # - # If the 'readOnly' key is missing (defaults to 'false'), the respective option - # will be available for users to edit. - # - # Note that some values can be templated. Such values are the names of the - # Volumes as well as their StorageClass - spawnerFormDefaults: - image: - # The container Image for the user's Jupyter Notebook - value: public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1 - # The list of available standard container Images - options: - - kubeflownotebookswg/jupyter-scipy:v1.7.0 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-gpu-py39-cu112-ubuntu20.04-ec2-v1.1 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-gpu-py38-cu116-ubuntu20.04-ec2-v1.2 - - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-cpu-py38-ubuntu20.04-ec2-v1.2 - imageGroupOne: - # The container Image for the user's Group One Server - # The annotation `notebooks.kubeflow.org/http-rewrite-uri: /` - # is applied to notebook in this group, configuring - # the Istio rewrite for containers that host their web UI at `/` - value: kubeflownotebookswg/codeserver-python:v1.7.0 - # The list of available standard container Images - options: - - kubeflownotebookswg/codeserver-python:v1.7.0 - imageGroupTwo: - # The container Image for the user's Group Two Server - # The annotation `notebooks.kubeflow.org/http-rewrite-uri: /` - # is applied to notebook in this group, configuring - # the Istio rewrite for containers that host their web UI at `/` - # The annotation `notebooks.kubeflow.org/http-headers-request-set` - # is applied to notebook in this group, configuring Istio - # to add the `X-RStudio-Root-Path` header to requests - value: kubeflownotebookswg/rstudio-tidyverse:v1.7.0 - # The list of available standard container Images - options: - - kubeflownotebookswg/rstudio-tidyverse:v1.7.0 - # If true, hide registry and/or tag name in the image selection dropdown - hideRegistry: true - hideTag: false - allowCustomImage: true - # If true, users can input custom images - # If false, users can only select from the images in this config - imagePullPolicy: - # Supported values: Always, IfNotPresent, Never - value: IfNotPresent - readOnly: false - cpu: - # CPU for user's Notebook - value: '0.5' - # Factor by with to multiply request to calculate limit - # if no limit is set, to disable set "none" - limitFactor: "1.2" - readOnly: false - memory: - # Memory for user's Notebook - value: 1.0Gi - # Factor by with to multiply request to calculate limit - # if no limit is set, to disable set "none" - limitFactor: "1.2" - readOnly: false - environment: - value: {} - readOnly: false - workspaceVolume: - # Workspace Volume to be attached to user's Notebook - # If you don't want a workspace volume then delete the 'value' key - value: - mount: /home/jovyan - newPvc: - metadata: - name: '{notebook-name}-workspace' - spec: - resources: - requests: - storage: 10Gi - accessModes: - - ReadWriteOnce - readOnly: false - dataVolumes: - # List of additional Data Volumes to be attached to the user's Notebook - value: [] - # For example, a list with 2 Data Volumes: - # value: - # - mount: /home/jovyan/datavol-1 - # newPvc: - # metadata: - # name: '{notebook-name}-datavol-1' - # spec: - # resources: - # requests: - # storage: 5Gi - # accessModes: - # - ReadWriteOnce - # - mount: /home/jovyan/datavol-1 - # existingSource: - # persistentVolumeClaim: - # claimName: test-pvc - readOnly: false - gpus: - # Number of GPUs to be assigned to the Notebook Container - value: - # values: "none", "1", "2", "4", "8" - num: "none" - # Determines what the UI will show and send to the backend - vendors: - - limitsKey: "nvidia.com/gpu" - uiName: "NVIDIA" - - limitsKey: "amd.com/gpu" - uiName: "AMD" - # Values: "" or a `limits-key` from the vendors list - vendor: "" - readOnly: false - affinityConfig: - # If readonly, the default value will be the only option - # value is a list of `configKey`s that we want to be selected by default - value: "" - # The list of available affinity configs - options: [] - #options: - # - configKey: "exclusive__n1-standard-2" - # displayName: "Exclusive: n1-standard-2" - # affinity: - # # (Require) Node having label: `node_pool=notebook-n1-standard-2` - # nodeAffinity: - # requiredDuringSchedulingIgnoredDuringExecution: - # nodeSelectorTerms: - # - matchExpressions: - # - key: "node_pool" - # operator: "In" - # values: - # - "notebook-n1-standard-2" - # # (Require) Node WITHOUT existing Pod having label: `notebook-name` - # podAntiAffinity: - # requiredDuringSchedulingIgnoredDuringExecution: - # - labelSelector: - # matchExpressions: - # - key: "notebook-name" - # operator: "Exists" - # namespaces: [] - # topologyKey: "kubernetes.io/hostname" - #readOnly: false - tolerationGroup: - # The default `groupKey` from the options list - # If readonly, the default value will be the only option - value: "" - # The list of available tolerationGroup configs - options: [] - #options: - # - groupKey: "group_1" - # displayName: "Group 1: description" - # tolerations: - # - key: "key1" - # operator: "Equal" - # value: "value1" - # effect: "NoSchedule" - # - key: "key2" - # operator: "Equal" - # value: "value2" - # effect: "NoSchedule" - readOnly: false - shm: - value: true - readOnly: false - configurations: - # List of labels to be selected, these are the labels from PodDefaults - # value: - # - add-aws-secret - # - default-editor - value: [] - readOnly: false -kind: ConfigMap -metadata: - annotations: {} - labels: - app: jupyter-web-app - kustomize.component: jupyter-web-app - name: jupyter-web-app-config-dmh59b856d - namespace: kubeflow \ No newline at end of file diff --git a/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-mgf762gt24-kubeflow-ConfigMap.yaml b/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-mgf762gt24-kubeflow-ConfigMap.yaml new file mode 100644 index 0000000000..0d6d6cc779 --- /dev/null +++ b/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-config-mgf762gt24-kubeflow-ConfigMap.yaml @@ -0,0 +1,95 @@ +apiVersion: v1 +data: + spawner_ui_config.yaml: "# Configuration file for the Jupyter UI.\n#\n# Each Jupyter\ + \ UI option is configured by two keys: 'value' and 'readOnly'\n# - The 'value'\ + \ key contains the default value\n# - The 'readOnly' key determines if the option\ + \ will be available to users\n#\n# If the 'readOnly' key is present and set to\ + \ 'true', the respective option\n# will be disabled for users and only set by\ + \ the admin. Also when a\n# Notebook is POSTED to the API if a necessary field\ + \ is not present then\n# the value from the config will be used.\n#\n# If the\ + \ 'readOnly' key is missing (defaults to 'false'), the respective option\n# will\ + \ be available for users to edit.\n#\n# Note that some values can be templated.\ + \ Such values are the names of the\n# Volumes as well as their StorageClass\n\ + spawnerFormDefaults:\n image:\n # The container Image for the user's Jupyter\ + \ Notebook\n value: public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0\n\ + \ # The list of available standard container Images\n options:\n - kubeflownotebookswg/jupyter-scipy:v1.7.0\n\ + \ - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0\n\ + \ - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0\n\ + \ - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0\n\ + \ - public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-cpu-py310-ubuntu20.04-ec2-v1.0\n\ + \ imageGroupOne:\n # The container Image for the user's Group One Server\n\ + \ # The annotation `notebooks.kubeflow.org/http-rewrite-uri: /`\n # is applied\ + \ to notebook in this group, configuring\n # the Istio rewrite for containers\ + \ that host their web UI at `/`\n value: kubeflownotebookswg/codeserver-python:v1.7.0\n\ + \ # The list of available standard container Images\n options:\n - kubeflownotebookswg/codeserver-python:v1.7.0\n\ + \ imageGroupTwo:\n # The container Image for the user's Group Two Server\n\ + \ # The annotation `notebooks.kubeflow.org/http-rewrite-uri: /`\n # is applied\ + \ to notebook in this group, configuring\n # the Istio rewrite for containers\ + \ that host their web UI at `/`\n # The annotation `notebooks.kubeflow.org/http-headers-request-set`\n\ + \ # is applied to notebook in this group, configuring Istio\n # to add the\ + \ `X-RStudio-Root-Path` header to requests\n value: kubeflownotebookswg/rstudio-tidyverse:v1.7.0\n\ + \ # The list of available standard container Images\n options:\n - kubeflownotebookswg/rstudio-tidyverse:v1.7.0\n\ + \ # If true, hide registry and/or tag name in the image selection dropdown\n\ + \ hideRegistry: true\n hideTag: false\n allowCustomImage: true\n # If true,\ + \ users can input custom images\n # If false, users can only select from the\ + \ images in this config\n imagePullPolicy:\n # Supported values: Always, IfNotPresent,\ + \ Never\n value: IfNotPresent\n readOnly: false\n cpu:\n # CPU for user's\ + \ Notebook\n value: '0.5'\n # Factor by with to multiply request to calculate\ + \ limit\n # if no limit is set, to disable set \"none\"\n limitFactor: \"\ + 1.2\"\n readOnly: false\n memory:\n # Memory for user's Notebook\n value:\ + \ 1.0Gi\n # Factor by with to multiply request to calculate limit\n # if\ + \ no limit is set, to disable set \"none\"\n limitFactor: \"1.2\"\n readOnly:\ + \ false\n environment:\n value: {}\n readOnly: false\n workspaceVolume:\n\ + \ # Workspace Volume to be attached to user's Notebook\n # If you don't\ + \ want a workspace volume then delete the 'value' key\n value:\n mount:\ + \ /home/jovyan\n newPvc:\n metadata:\n name: '{notebook-name}-workspace'\n\ + \ spec:\n resources:\n requests:\n storage:\ + \ 10Gi\n accessModes:\n - ReadWriteOnce\n readOnly: false\n\ + \ dataVolumes:\n # List of additional Data Volumes to be attached to the user's\ + \ Notebook\n value: []\n # For example, a list with 2 Data Volumes:\n \ + \ # value:\n # - mount: /home/jovyan/datavol-1\n # newPvc:\n #\ + \ metadata:\n # name: '{notebook-name}-datavol-1'\n # \ + \ spec:\n # resources:\n # requests:\n # \ + \ storage: 5Gi\n # accessModes:\n # - ReadWriteOnce\n\ + \ # - mount: /home/jovyan/datavol-1\n # existingSource:\n # \ + \ persistentVolumeClaim:\n # claimName: test-pvc\n readOnly:\ + \ false\n gpus:\n # Number of GPUs to be assigned to the Notebook Container\n\ + \ value:\n # values: \"none\", \"1\", \"2\", \"4\", \"8\"\n num:\ + \ \"none\"\n # Determines what the UI will show and send to the backend\n\ + \ vendors:\n - limitsKey: \"nvidia.com/gpu\"\n uiName: \"NVIDIA\"\ + \n - limitsKey: \"amd.com/gpu\"\n uiName: \"AMD\"\n # Values:\ + \ \"\" or a `limits-key` from the vendors list\n vendor: \"\"\n readOnly:\ + \ false\n affinityConfig:\n # If readonly, the default value will be the only\ + \ option\n # value is a list of `configKey`s that we want to be selected by\ + \ default\n value: \"\"\n # The list of available affinity configs\n \ + \ options: []\n #options:\n # - configKey: \"exclusive__n1-standard-2\"\ + \n # displayName: \"Exclusive: n1-standard-2\"\n # affinity:\n \ + \ # # (Require) Node having label: `node_pool=notebook-n1-standard-2`\n \ + \ # nodeAffinity:\n # requiredDuringSchedulingIgnoredDuringExecution:\n\ + \ # nodeSelectorTerms:\n # - matchExpressions:\n \ + \ # - key: \"node_pool\"\n # operator: \"In\"\ + \n # values:\n # - \"notebook-n1-standard-2\"\ + \n # # (Require) Node WITHOUT existing Pod having label: `notebook-name`\n\ + \ # podAntiAffinity:\n # requiredDuringSchedulingIgnoredDuringExecution:\n\ + \ # - labelSelector:\n # matchExpressions:\n #\ + \ - key: \"notebook-name\"\n # operator: \"\ + Exists\"\n # namespaces: []\n # topologyKey: \"kubernetes.io/hostname\"\ + \n #readOnly: false\n tolerationGroup:\n # The default `groupKey` from\ + \ the options list\n # If readonly, the default value will be the only option\n\ + \ value: \"\"\n # The list of available tolerationGroup configs\n options:\ + \ []\n #options:\n # - groupKey: \"group_1\"\n # displayName: \"\ + Group 1: description\"\n # tolerations:\n # - key: \"key1\"\n \ + \ # operator: \"Equal\"\n # value: \"value1\"\n # \ + \ effect: \"NoSchedule\"\n # - key: \"key2\"\n # operator: \"\ + Equal\"\n # value: \"value2\"\n # effect: \"NoSchedule\"\n\ + \ readOnly: false\n shm:\n value: true\n readOnly: false\n configurations:\n\ + \ # List of labels to be selected, these are the labels from PodDefaults\n\ + \ # value:\n # - add-aws-secret\n # - default-editor\n value:\ + \ []\n readOnly: false\n" +kind: ConfigMap +metadata: + labels: + app: jupyter-web-app + kustomize.component: jupyter-web-app + name: jupyter-web-app-config-mgf762gt24 + namespace: kubeflow diff --git a/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-parameters-dhcbt5dtdf-kubeflow-ConfigMap.yaml b/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-parameters-42k97gcbmb-kubeflow-ConfigMap.yaml similarity index 87% rename from charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-parameters-dhcbt5dtdf-kubeflow-ConfigMap.yaml rename to charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-parameters-42k97gcbmb-kubeflow-ConfigMap.yaml index 09ffe28948..b6064620e1 100644 --- a/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-parameters-dhcbt5dtdf-kubeflow-ConfigMap.yaml +++ b/charts/apps/jupyter-web-app/templates/ConfigMap/jupyter-web-app-parameters-42k97gcbmb-kubeflow-ConfigMap.yaml @@ -11,5 +11,5 @@ metadata: labels: app: jupyter-web-app kustomize.component: jupyter-web-app - name: jupyter-web-app-parameters-dhcbt5dtdf + name: jupyter-web-app-parameters-42k97gcbmb namespace: kubeflow diff --git a/charts/apps/jupyter-web-app/templates/Deployment/jupyter-web-app-deployment-kubeflow-Deployment.yaml b/charts/apps/jupyter-web-app/templates/Deployment/jupyter-web-app-deployment-kubeflow-Deployment.yaml index c9b62b71b3..4d580caa8e 100644 --- a/charts/apps/jupyter-web-app/templates/Deployment/jupyter-web-app-deployment-kubeflow-Deployment.yaml +++ b/charts/apps/jupyter-web-app/templates/Deployment/jupyter-web-app-deployment-kubeflow-Deployment.yaml @@ -42,7 +42,7 @@ spec: serviceAccountName: jupyter-web-app-service-account volumes: - configMap: - name: jupyter-web-app-config-dmh59b856d + name: jupyter-web-app-config-mgf762gt24 name: config-volume - configMap: name: jupyter-web-app-logos diff --git a/components/notebook-dockerfiles/README.md b/components/notebook-dockerfiles/README.md index e430007822..8149e56811 100644 --- a/components/notebook-dockerfiles/README.md +++ b/components/notebook-dockerfiles/README.md @@ -7,10 +7,10 @@ This directory contains the source code for these jupyter images which is based The following AWS Optimized container images are available from the [Amazon Elastic Container Registry](https://gallery.ecr.aws/kubeflow-on-aws/) (Amazon ECR). ``` -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-gpu-py39-cu112-ubuntu20.04-ec2-v1.1 -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1 -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-gpu-py38-cu116-ubuntu20.04-ec2-v1.2 -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-cpu-py38-ubuntu20.04-ec2-v1.2 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-cpu-py310-ubuntu20.04-ec2-v1.0 ``` These images are based on AWS Deep Learning Containers which provide optimized environments with popular machine learning frameworks such as TensorFlow and PyTorch, and are available in the Amazon ECR. For more information on AWS Deep Learning Container options, see [Deep Learning Container Docs](https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/what-is-dlc.html). @@ -18,8 +18,6 @@ Along with specific machine learning frameworks, these container images have add ``` kfp kfserving -h5py -pandas awscli boto3 ``` diff --git a/components/notebook-dockerfiles/pytorch/cpu.Dockerfile b/components/notebook-dockerfiles/pytorch/cpu.Dockerfile index 70a5feaab5..15f02ccfbe 100644 --- a/components/notebook-dockerfiles/pytorch/cpu.Dockerfile +++ b/components/notebook-dockerfiles/pytorch/cpu.Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.12.1-cpu-py38-ubuntu20.04-ec2-v1.2 +ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:2.0.0-cpu-py310-ubuntu20.04-ec2-v1.0 FROM $BASE_IMAGE ARG NB_USER=jovyan @@ -49,7 +49,7 @@ RUN apt-get update \ # install -- node.js RUN export DEBIAN_FRONTEND=noninteractive \ && curl -sL "https://deb.nodesource.com/gpgkey/nodesource.gpg.key" | apt-key add - \ - && echo "deb https://deb.nodesource.com/node_14.x focal main" > /etc/apt/sources.list.d/nodesource.list \ + && echo "deb http://deb.nodesource.com/node_14.x focal main" > /etc/apt/sources.list.d/nodesource.list \ && apt-get -yq update \ && apt-get -yq install --no-install-recommends \ nodejs \ @@ -57,8 +57,8 @@ RUN export DEBIAN_FRONTEND=noninteractive \ && rm -rf /var/lib/apt/lists/* # Install kubectl client -RUN echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list \ - && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ + && echo "deb http://packages.cloud.google.com/apt/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list \ && apt-get update \ && apt-get install -y kubectl diff --git a/components/notebook-dockerfiles/pytorch/cuda.Dockerfile b/components/notebook-dockerfiles/pytorch/cuda.Dockerfile index 6a3f4ab855..46cce6475e 100644 --- a/components/notebook-dockerfiles/pytorch/cuda.Dockerfile +++ b/components/notebook-dockerfiles/pytorch/cuda.Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.12.1-gpu-py38-cu116-ubuntu20.04-ec2-v1.2 +ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:2.0.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 FROM $BASE_IMAGE @@ -46,7 +46,7 @@ RUN apt-get update \ # install -- node.js RUN export DEBIAN_FRONTEND=noninteractive \ && curl -sL "https://deb.nodesource.com/gpgkey/nodesource.gpg.key" | apt-key add - \ - && echo "deb https://deb.nodesource.com/node_14.x focal main" > /etc/apt/sources.list.d/nodesource.list \ + && echo "deb http://deb.nodesource.com/node_14.x focal main" > /etc/apt/sources.list.d/nodesource.list \ && apt-get -yq update \ && apt-get -yq install --no-install-recommends \ nodejs \ @@ -54,8 +54,8 @@ RUN export DEBIAN_FRONTEND=noninteractive \ && rm -rf /var/lib/apt/lists/* # Install kubectl client -RUN echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list \ - && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ + && echo "deb http://packages.cloud.google.com/apt/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list \ && apt-get update \ && apt-get install -y kubectl diff --git a/components/notebook-dockerfiles/pytorch/requirements.txt b/components/notebook-dockerfiles/pytorch/requirements.txt index cb3ecdecf9..0f61e9a1c5 100644 --- a/components/notebook-dockerfiles/pytorch/requirements.txt +++ b/components/notebook-dockerfiles/pytorch/requirements.txt @@ -12,23 +12,7 @@ widgetsnbextension==3.6.1 ipywidgets==7.7.1 # Kubeflow Related Packages -kfp==1.8.13 -kfp-server-api==1.8.3 -kserve==0.9.0 -kubeflow-training==1.4.0 -kubeflow-katib==0.14.0 - -# Common data science packages -h5py==3.7.0 -pandas==1.4.3 -matplotlib==3.5.2 -xgboost==1.6.1 -ipympl==0.9.1 - -# AWS related packages -awscli==1.22.101 -boto3==1.21.0 - -# Pytorch packages -# a version mismatch for fastai can cause a different version of torch to get installed, be careful. -fastai==2.7.9 \ No newline at end of file +kfp==1.8.20 +kserve==0.10.1 +kubeflow-training==1.6.0 +git+https://github.com/kubeflow/katib.git@1d3ab5726f2f2181f174b5324b600cbfdd5f0cec#subdirectory=sdk/python/v1beta1 diff --git a/components/notebook-dockerfiles/tensorflow/cpu.Dockerfile b/components/notebook-dockerfiles/tensorflow/cpu.Dockerfile index 77358677f9..4f6c060ffd 100644 --- a/components/notebook-dockerfiles/tensorflow/cpu.Dockerfile +++ b/components/notebook-dockerfiles/tensorflow/cpu.Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1 +ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0 FROM $BASE_IMAGE diff --git a/components/notebook-dockerfiles/tensorflow/cuda.Dockerfile b/components/notebook-dockerfiles/tensorflow/cuda.Dockerfile index 318263dc96..6a482c3499 100644 --- a/components/notebook-dockerfiles/tensorflow/cuda.Dockerfile +++ b/components/notebook-dockerfiles/tensorflow/cuda.Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.10.0-gpu-py39-cu112-ubuntu20.04-ec2-v1.1 +ARG BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:2.12.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 FROM $BASE_IMAGE diff --git a/components/notebook-dockerfiles/tensorflow/requirements.txt b/components/notebook-dockerfiles/tensorflow/requirements.txt index 251a062e2d..33b7fe8705 100644 --- a/components/notebook-dockerfiles/tensorflow/requirements.txt +++ b/components/notebook-dockerfiles/tensorflow/requirements.txt @@ -12,23 +12,7 @@ widgetsnbextension==3.6.1 ipywidgets==7.7.1 # Kubeflow Related Packages -kfp==1.8.13 -kfp-server-api==1.8.3 -kserve==0.9.0 -kubeflow-training==1.4.0 -kubeflow-katib==0.14.0 - -# Common data science packages -h5py==3.7.0 -pandas==1.4.3 -matplotlib==3.5.2 -xgboost==1.6.1 -ipympl==0.9.1 - -# AWS related packages -awscli==1.22.101 -boto3==1.21.0 - -# TF Packages -keras==2.9.0 -protobuf==3.19.5 \ No newline at end of file +kfp==1.8.20 +kubeflow-training==1.6.0 +kserve==0.10.1 +git+https://github.com/kubeflow/katib.git@1d3ab5726f2f2181f174b5324b600cbfdd5f0cec#subdirectory=sdk/python/v1beta1 diff --git a/tests/e2e/resources/notebooks/verify_ack_integration.ipynb b/tests/e2e/resources/notebooks/verify_ack_integration.ipynb index 9cacf3750a..4a11e9d80f 100644 --- a/tests/e2e/resources/notebooks/verify_ack_integration.ipynb +++ b/tests/e2e/resources/notebooks/verify_ack_integration.ipynb @@ -11,8 +11,14 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.10.6" }, "orig_nbformat": 4 }, diff --git a/tests/e2e/resources/notebooks/verify_pytorch_installation.ipynb b/tests/e2e/resources/notebooks/verify_pytorch_installation.ipynb index 4cbdf8d48f..d0ce5d2038 100644 --- a/tests/e2e/resources/notebooks/verify_pytorch_installation.ipynb +++ b/tests/e2e/resources/notebooks/verify_pytorch_installation.ipynb @@ -27,7 +27,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/tests/e2e/resources/notebooks/verify_tensorflow_installation.ipynb b/tests/e2e/resources/notebooks/verify_tensorflow_installation.ipynb index 42204ca530..f90ac984fa 100644 --- a/tests/e2e/resources/notebooks/verify_tensorflow_installation.ipynb +++ b/tests/e2e/resources/notebooks/verify_tensorflow_installation.ipynb @@ -27,7 +27,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/tests/e2e/tests/test_notebook_images.py b/tests/e2e/tests/test_notebook_images.py index 4932e7fd8d..9724e7b5a1 100644 --- a/tests/e2e/tests/test_notebook_images.py +++ b/tests/e2e/tests/test_notebook_images.py @@ -29,27 +29,27 @@ NOTEBOOK_IMAGES = [ "kubeflownotebookswg/jupyter-scipy:v1.7.0", - "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-gpu-py39-cu112-ubuntu20.04-ec2-v1.1", - "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.10.0-cpu-py39-ubuntu20.04-ec2-v1.1", - "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-gpu-py38-cu116-ubuntu20.04-ec2-v1.2", - "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.1-cpu-py38-ubuntu20.04-ec2-v1.2", + "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0", + "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0", + "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0", + "public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-cpu-py310-ubuntu20.04-ec2-v1.0", ] testdata = [ ("scipy", NOTEBOOK_IMAGES[0], "sanity_check.ipynb", "Hello World!"), - ("tf-gpu", NOTEBOOK_IMAGES[1], "verify_tensorflow_installation.ipynb", "2.10.0"), - ("tf-cpu", NOTEBOOK_IMAGES[2], "verify_tensorflow_installation.ipynb", "2.10.0"), + ("tf-gpu", NOTEBOOK_IMAGES[1], "verify_tensorflow_installation.ipynb", "2.12.0"), + ("tf-cpu", NOTEBOOK_IMAGES[2], "verify_tensorflow_installation.ipynb", "2.12.0"), ( "pytorch-gpu", NOTEBOOK_IMAGES[3], "verify_pytorch_installation.ipynb", - "1.12.1+cu116", + "2.0.0", ), ( "pytorch-cpu", NOTEBOOK_IMAGES[4], "verify_pytorch_installation.ipynb", - "1.12.1+cpu", + "2.0.0", ), ] @@ -100,4 +100,5 @@ def test_notebook_container( cmd.append(sub_cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode() + print(output) assert expected_output in output From d7d5401e0a26642b79d42b274b29d9b40195df8f Mon Sep 17 00:00:00 2001 From: Steakley Date: Thu, 13 Apr 2023 16:33:44 -0700 Subject: [PATCH 2/3] remove testing print --- tests/e2e/tests/test_notebook_images.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/tests/test_notebook_images.py b/tests/e2e/tests/test_notebook_images.py index 9724e7b5a1..3fe48d624b 100644 --- a/tests/e2e/tests/test_notebook_images.py +++ b/tests/e2e/tests/test_notebook_images.py @@ -100,5 +100,4 @@ def test_notebook_container( cmd.append(sub_cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode() - print(output) assert expected_output in output From 5f1a7674f7c9532e696a0402d3a3f47d7d7c29b4 Mon Sep 17 00:00:00 2001 From: Steakley Date: Thu, 13 Apr 2023 23:20:06 -0700 Subject: [PATCH 3/3] update website --- .../pipeline-static/s3/disable-service-account.yaml | 7 ------- website/content/en/docs/component-guides/notebooks.md | 10 ++++------ 2 files changed, 4 insertions(+), 13 deletions(-) delete mode 100644 awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml diff --git a/awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml b/awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml deleted file mode 100644 index 6d6675a980..0000000000 --- a/awsconfigs/apps/pipeline-static/s3/disable-service-account.yaml +++ /dev/null @@ -1,7 +0,0 @@ -$patch: delete -apiVersion: v1 -kind: ServiceAccount -metadata: - name: ml-pipeline - namespace: kubeflow -eksctl create addon --name aws-ebs-csi-driver --cluster tests --service-account-role-arn arn:aws:iam::024825446905:role/AmazonEKS_EBS_CSI_DriverRole --force \ No newline at end of file diff --git a/website/content/en/docs/component-guides/notebooks.md b/website/content/en/docs/component-guides/notebooks.md index 45e09f71b0..a75a9d533a 100644 --- a/website/content/en/docs/component-guides/notebooks.md +++ b/website/content/en/docs/component-guides/notebooks.md @@ -20,10 +20,10 @@ Use AWS-optimized Kubeflow Notebook server images to quickly get started with a These container images are available on the [Amazon Elastic Container Registry (Amazon ECR)](https://gallery.ecr.aws/kubeflow-on-aws/). The following images are available as part of this release, however you can always find the latest updated images in the linked ECR repository. ``` -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.9.1-gpu-py39-cu112-ubuntu20.04-e3-v1.2-2022-09-20 -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.9.1-cpu-py39-ubuntu20.04-e3-v1.2-2022-09-20 -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.0-gpu-py38-cu116-ubuntu20.04-ec2-2022-09-20 -public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:1.12.0-cpu-py38-ubuntu20.04-ec2-2022-09-20 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-tensorflow:2.12.0-cpu-py310-ubuntu20.04-ec2-v1.0 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-gpu-py310-cu118-ubuntu20.04-ec2-v1.0 +public.ecr.aws/kubeflow-on-aws/notebook-servers/jupyter-pytorch:2.0.0-cpu-py310-ubuntu20.04-ec2-v1.0 ``` AWS Deep Learning Containers provide optimized environments with popular machine learning frameworks such as TensorFlow and PyTorch, and are available in the Amazon ECR. For more information on AWS Deep Learning Container options, see [Available Deep Learning Containers Images](https://github.com/aws/deep-learning-containers/blob/master/available_images.md). @@ -31,8 +31,6 @@ AWS Deep Learning Containers provide optimized environments with popular machine Along with specific machine learning frameworks, these container images have additional pre-installed packages: - `kfp` - `kfserving` -- `h5py` -- `pandas` - `awscli` - `boto3`