From 67d3cd6dbc0569d0050ee11bbcca9bcd80e457fb Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 22 Apr 2024 12:03:04 -0700 Subject: [PATCH] feat(components): Introduce placeholders: SERVICE_ACCOUNT_PLACEHOLDER, NETWORK_PLACEHOLDER, PERSISTENT_RESOURCE_ID_PLACEHOLDER and ENCYRPTION_SPEC_KMS_KEY_NAME_PLACEHOLDER. In addition, use PERSISTENT_RESOURCE_ID_PLACEHOLDER as the default value of persistent_resource_id for CustomTrainingJobOp and create_custom_training_job_op_from_component. With this change, custom job created without explicitly setting persistent_resource_id will inherit job level persistent_resource_id, if Persistent Resource is set as job level runtime PiperOrigin-RevId: 627113501 --- components/google-cloud/RELEASE.md | 2 ++ .../_placeholders.py | 31 ++++++++++++++++++- .../preview/custom_job/component.py | 4 +-- .../preview/custom_job/utils.py | 5 +-- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 23066ca18bb..b263dca2c95 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -11,6 +11,8 @@ * Add support for `text-bison@002` to `preview.llm.rlhf_pipeline`. * Apply latest GCPC image vulnerability resolutions (base OS and software updates). * Fix `preview.model_evaluation.autosxs_pipeline` documentation to show `autorater_prompt_parameters` as required. +* Introduce placeholders: `SERVICE_ACCOUNT_PLACEHOLDER`, `NETWORK_PLACEHOLDER`, `PERSISTENT_RESOURCE_ID_PLACEHOLDER` and `ENCRYPTION_SPEC_KMS_KEY_NAME_PLACEHOLDER` +* Use `PERSISTENT_RESOURCE_ID_PLACEHOLDER` as the default value of `persistent_resource_id` for `CustomTrainingJobOp` and `create_custom_training_job_op_from_component`. With this change, custom job created without explicitly setting `persistent_resource_id` will inherit job level `persistent_resource_id`, if Persistent Resource is set as job level runtime. ## Release 2.12.0 * Log TensorBoard metrics from the `preview.llm.rlhf_pipeline` in real time. diff --git a/components/google-cloud/google_cloud_pipeline_components/_placeholders.py b/components/google-cloud/google_cloud_pipeline_components/_placeholders.py index 409b30c6955..5a7cc732274 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_placeholders.py +++ b/components/google-cloud/google_cloud_pipeline_components/_placeholders.py @@ -13,9 +13,38 @@ # limitations under the License. """Placeholders for use in component authoring.""" -# prefer not using PIPELINE_TASK_ prefix like KFP does for reduced verbosity +# prefer not using PIPELINE_TASK_ or PIPELINE_ prefix like KFP does for reduced +# verbosity PROJECT_ID_PLACEHOLDER = "{{$.pipeline_google_cloud_project_id}}" +"""A placeholder used to obtain Google Cloud project id where the pipeline +executes. The placeholder value is set at pipeline runtime. +""" LOCATION_PLACEHOLDER = "{{$.pipeline_google_cloud_location}}" +"""A placeholder used to obtain Google Cloud location where the pipeline +executes. The placeholder value is set at pipeline runtime. +""" +SERVICE_ACCOUNT_PLACEHOLDER = "{{$.pipeline_service_account}}" +"""A placeholder used to obtain service account that is defined in [PipelineJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs). +If PipelineJob doesn't have a service account set, this placeholder will be resolved to default service account. +The placeholder value is set at pipeline runtime. +""" +NETWORK_PLACEHOLDER = "{{$.pipeline_network}}" +"""A placeholder used to obtain network that is defined in [PipelineJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs). +If PipelineJob doesn't have a network set, this placeholder will be empty. The +placeholder value is set at pipeline runtime. +""" +PERSISTENT_RESOURCE_ID_PLACEHOLDER = "{{$.pipeline_persistent_resource_id}}" +"""A placeholder used to obtain persistent resource id that is defined in +PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig). +If PipelineJob doesn't have a persistent resource id, this placeholder will be +empty. The placeholder value is set at pipeline runtime. +""" +ENCRYPTION_SPEC_KMS_KEY_NAME_PLACEHOLDER = "{{$.pipeline_encryption_key_name}}" +"""A placeholder used to obtain kmsKeyName that is defined in +PipelineJob's [EncryptionSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/EncryptionSpec). +If PipelineJob doesn't have a encryption key name, this placeholder will be +empty. The placeholder value is set at pipeline runtime. +""" # omit placeholder type annotation to avoid dependency on KFP SDK internals diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py index b155e391305..585c9423e9c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py @@ -37,7 +37,7 @@ def custom_training_job( base_output_directory: str = '', labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', - persistent_resource_id: str = '', + persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER, project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off @@ -57,7 +57,7 @@ def custom_training_job( base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. - persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) + persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py index 14a91fefab5..10498be9ec8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py @@ -18,6 +18,7 @@ from typing import Callable, Dict, List, Optional import warnings +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.preview.custom_job import component from kfp import components import yaml @@ -68,7 +69,7 @@ def create_custom_training_job_from_component( nfs_mounts: Optional[List[Dict[str, str]]] = None, base_output_directory: str = '', labels: Optional[Dict[str, str]] = None, - persistent_resource_id: str = '', + persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER, env: Optional[List[Dict[str, str]]] = None, ) -> Callable: # fmt: off @@ -96,7 +97,7 @@ def create_custom_training_job_from_component( nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). - persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) + persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100. Returns: