diff --git a/sdk/python/kubeflow/training/api/training_client.py b/sdk/python/kubeflow/training/api/training_client.py index 8750fcbb2d..57504bb1b7 100644 --- a/sdk/python/kubeflow/training/api/training_client.py +++ b/sdk/python/kubeflow/training/api/training_client.py @@ -204,7 +204,7 @@ def train( self.core_api.create_namespaced_persistent_volume_claim( namespace=namespace, body=utils.get_pvc_spec( - pvc_name=constants.STORAGE_INITIALIZER, + pvc_name=name, namespace=namespace, storage_config=storage_config, ), @@ -213,11 +213,8 @@ def train( pvc_list = self.core_api.list_namespaced_persistent_volume_claim(namespace) # Check if the PVC with the specified name exists for pvc in pvc_list.items: - if pvc.metadata.name == constants.STORAGE_INITIALIZER: - print( - f"PVC '{constants.STORAGE_INITIALIZER}' already exists in namespace " - f"{namespace}." - ) + if pvc.metadata.name == name: + print(f"PVC '{name}' already exists in namespace " f"{namespace}.") break else: raise RuntimeError(f"failed to create PVC. Error: {e}") @@ -279,17 +276,24 @@ def train( resources=resources_per_worker, ) + storage_initializer_volume = models.V1Volume( + name=constants.STORAGE_INITIALIZER, + persistent_volume_claim=models.V1PersistentVolumeClaimVolumeSource( + claim_name=name + ), + ) + # create worker pod spec worker_pod_template_spec = utils.get_pod_template_spec( containers=[container_spec], - volumes=[constants.STORAGE_INITIALIZER_VOLUME], + volumes=[storage_initializer_volume], ) # create master pod spec master_pod_template_spec = utils.get_pod_template_spec( containers=[container_spec], init_containers=[init_container_spec], - volumes=[constants.STORAGE_INITIALIZER_VOLUME], + volumes=[storage_initializer_volume], ) job = utils.get_pytorchjob_template( diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py index 0513c3e31e..07c98bc787 100644 --- a/sdk/python/kubeflow/training/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -84,12 +84,7 @@ name=STORAGE_INITIALIZER, mount_path=INIT_CONTAINER_MOUNT_PATH, ) -STORAGE_INITIALIZER_VOLUME = models.V1Volume( - name=STORAGE_INITIALIZER, - persistent_volume_claim=models.V1PersistentVolumeClaimVolumeSource( - claim_name=STORAGE_INITIALIZER - ), -) + TRAINER_TRANSFORMER_IMAGE = "docker.io/kubeflow/trainer-huggingface" # TFJob constants.