From 21d98896c780f69ebc1cc41c940a521bee5b701a Mon Sep 17 00:00:00 2001 From: helenxie-bit Date: Tue, 30 Jul 2024 10:48:38 +0800 Subject: [PATCH 1/3] update the name of pvc Signed-off-by: helenxie-bit --- sdk/python/kubeflow/training/api/training_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/kubeflow/training/api/training_client.py b/sdk/python/kubeflow/training/api/training_client.py index 8750fcbb2d..ff70eeec33 100644 --- a/sdk/python/kubeflow/training/api/training_client.py +++ b/sdk/python/kubeflow/training/api/training_client.py @@ -204,7 +204,7 @@ def train( self.core_api.create_namespaced_persistent_volume_claim( namespace=namespace, body=utils.get_pvc_spec( - pvc_name=constants.STORAGE_INITIALIZER, + pvc_name=name, namespace=namespace, storage_config=storage_config, ), @@ -213,9 +213,9 @@ def train( pvc_list = self.core_api.list_namespaced_persistent_volume_claim(namespace) # Check if the PVC with the specified name exists for pvc in pvc_list.items: - if pvc.metadata.name == constants.STORAGE_INITIALIZER: + if pvc.metadata.name == name: print( - f"PVC '{constants.STORAGE_INITIALIZER}' already exists in namespace " + f"PVC '{name}' already exists in namespace " f"{namespace}." ) break From 799b8787baa0a397c86f4cc39d166b64b147fefe Mon Sep 17 00:00:00 2001 From: helenxie-bit Date: Tue, 30 Jul 2024 11:00:09 +0800 Subject: [PATCH 2/3] reformat with black Signed-off-by: helenxie-bit --- sdk/python/kubeflow/training/api/training_client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sdk/python/kubeflow/training/api/training_client.py b/sdk/python/kubeflow/training/api/training_client.py index ff70eeec33..ccd1f82601 100644 --- a/sdk/python/kubeflow/training/api/training_client.py +++ b/sdk/python/kubeflow/training/api/training_client.py @@ -214,10 +214,7 @@ def train( # Check if the PVC with the specified name exists for pvc in pvc_list.items: if pvc.metadata.name == name: - print( - f"PVC '{name}' already exists in namespace " - f"{namespace}." - ) + print(f"PVC '{name}' already exists in namespace " f"{namespace}.") break else: raise RuntimeError(f"failed to create PVC. Error: {e}") From 7794940a6b7ab52000c440d5fe4f6f5d4e66ef5b Mon Sep 17 00:00:00 2001 From: helenxie-bit Date: Tue, 30 Jul 2024 15:49:47 +0800 Subject: [PATCH 3/3] update 'storage_initializer_volume' Signed-off-by: helenxie-bit --- sdk/python/kubeflow/training/api/training_client.py | 11 +++++++++-- sdk/python/kubeflow/training/constants/constants.py | 7 +------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/sdk/python/kubeflow/training/api/training_client.py b/sdk/python/kubeflow/training/api/training_client.py index ccd1f82601..57504bb1b7 100644 --- a/sdk/python/kubeflow/training/api/training_client.py +++ b/sdk/python/kubeflow/training/api/training_client.py @@ -276,17 +276,24 @@ def train( resources=resources_per_worker, ) + storage_initializer_volume = models.V1Volume( + name=constants.STORAGE_INITIALIZER, + persistent_volume_claim=models.V1PersistentVolumeClaimVolumeSource( + claim_name=name + ), + ) + # create worker pod spec worker_pod_template_spec = utils.get_pod_template_spec( containers=[container_spec], - volumes=[constants.STORAGE_INITIALIZER_VOLUME], + volumes=[storage_initializer_volume], ) # create master pod spec master_pod_template_spec = utils.get_pod_template_spec( containers=[container_spec], init_containers=[init_container_spec], - volumes=[constants.STORAGE_INITIALIZER_VOLUME], + volumes=[storage_initializer_volume], ) job = utils.get_pytorchjob_template( diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py index 0513c3e31e..07c98bc787 100644 --- a/sdk/python/kubeflow/training/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -84,12 +84,7 @@ name=STORAGE_INITIALIZER, mount_path=INIT_CONTAINER_MOUNT_PATH, ) -STORAGE_INITIALIZER_VOLUME = models.V1Volume( - name=STORAGE_INITIALIZER, - persistent_volume_claim=models.V1PersistentVolumeClaimVolumeSource( - claim_name=STORAGE_INITIALIZER - ), -) + TRAINER_TRANSFORMER_IMAGE = "docker.io/kubeflow/trainer-huggingface" # TFJob constants.