kubeflow · copybara-service · Apr 22, 2024 · Apr 22, 2024
diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md
@@ -11,6 +11,8 @@
 * Add support for `text-bison@002` to `preview.llm.rlhf_pipeline`.
 * Apply latest GCPC image vulnerability resolutions (base OS and software updates).
 * Fix `preview.model_evaluation.autosxs_pipeline` documentation to show `autorater_prompt_parameters` as required.
+* Introduce placeholders: `SERVICE_ACCOUNT_PLACEHOLDER`, `NETWORK_PLACEHOLDER`, `PERSISTENT_RESOURCE_ID_PLACEHOLDER` and `ENCRYPTION_SPEC_KMS_KEY_NAME_PLACEHOLDER`
+* Use `PERSISTENT_RESOURCE_ID_PLACEHOLDER` as the default value of `persistent_resource_id` for `CustomTrainingJobOp` and `create_custom_training_job_op_from_component`. With this change, custom job created without explicitly setting `persistent_resource_id` will inherit job level `persistent_resource_id`, if Persistent Resource is set as job level runtime.
 
 ## Release 2.12.0
 * Log TensorBoard metrics from the `preview.llm.rlhf_pipeline` in real time.

diff --git a/components/google-cloud/google_cloud_pipeline_components/_placeholders.py b/components/google-cloud/google_cloud_pipeline_components/_placeholders.py
@@ -13,9 +13,38 @@
 # limitations under the License.
 """Placeholders for use in component authoring."""
 
-# prefer not using PIPELINE_TASK_ prefix like KFP does for reduced verbosity
+# prefer not using PIPELINE_TASK_ or PIPELINE_ prefix like KFP does for reduced
+# verbosity
 PROJECT_ID_PLACEHOLDER = "{{$.pipeline_google_cloud_project_id}}"
+"""A placeholder used to obtain Google Cloud project id where the pipeline
+executes. The placeholder value is set at pipeline runtime.
+"""
 LOCATION_PLACEHOLDER = "{{$.pipeline_google_cloud_location}}"
+"""A placeholder used to obtain Google Cloud location where the pipeline
+executes. The placeholder value is set at pipeline runtime.
+"""
+SERVICE_ACCOUNT_PLACEHOLDER = "{{$.pipeline_service_account}}"
+"""A placeholder used to obtain service account that is defined in [PipelineJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs).
+If PipelineJob doesn't have a service account set, this placeholder will be resolved to default service account.
+The placeholder value is set at pipeline runtime.
+"""
+NETWORK_PLACEHOLDER = "{{$.pipeline_network}}"
+"""A placeholder used to obtain network that is defined in [PipelineJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs).
+If PipelineJob doesn't have a network set, this placeholder will be empty. The
+placeholder value is set at pipeline runtime.
+"""
+PERSISTENT_RESOURCE_ID_PLACEHOLDER = "{{$.pipeline_persistent_resource_id}}"
+"""A placeholder used to obtain persistent resource id that is defined in
+PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig).
+If PipelineJob doesn't have a persistent resource id, this placeholder will be
+empty. The placeholder value is set at pipeline runtime.
+"""
+ENCRYPTION_SPEC_KMS_KEY_NAME_PLACEHOLDER = "{{$.pipeline_encryption_key_name}}"
+"""A placeholder used to obtain kmsKeyName that is defined in
+PipelineJob's [EncryptionSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/EncryptionSpec).
+If PipelineJob doesn't have a encryption key name, this placeholder will be
+empty. The placeholder value is set at pipeline runtime.
+"""
 
 
 # omit placeholder type annotation to avoid dependency on KFP SDK internals

diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py
@@ -37,7 +37,7 @@ def custom_training_job(
  base_output_directory: str = '',
  labels: Dict[str, str] = {},
  encryption_spec_key_name: str = '',
- persistent_resource_id: str = '',
+ persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
  project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
 ):
  # fmt: off
@@ -57,7 +57,7 @@ def custom_training_job(
  base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
  labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
  encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key.
- persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
+ persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
  project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run.
  Returns:
  gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob.

diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py
@@ -18,6 +18,7 @@
 from typing import Callable, Dict, List, Optional
 import warnings
 
+from google_cloud_pipeline_components import _placeholders
 from google_cloud_pipeline_components.preview.custom_job import component
 from kfp import components
 import yaml
@@ -68,7 +69,7 @@ def create_custom_training_job_from_component(
  nfs_mounts: Optional[List[Dict[str, str]]] = None,
  base_output_directory: str = '',
  labels: Optional[Dict[str, str]] = None,
- persistent_resource_id: str = '',
+ persistent_resource_id: str = _placeholders.PERSISTENT_RESOURCE_ID_PLACEHOLDER,
  env: Optional[List[Dict[str, str]]] = None,
 ) -> Callable:
  # fmt: off
@@ -96,7 +97,7 @@ def create_custom_training_job_from_component(
  nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share).
  base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination).
  labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf).
- persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
+ persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. The default value is a placeholder that will be resolved to the PipelineJob [RuntimeConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.pipelineJobs#PipelineJob.RuntimeConfig)'s persistent resource id at runtime. However, if the PipelineJob doesn't set Persistent Resource as the job level runtime, the placedholder will be resolved to an empty string and the custom job will be run on demand. If the value is set explicitly, the custom job will runs in the specified persistent resource, in this case, please note the network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.)
  env: Environment variables to be passed to the container. Takes the form `[{'name': '...', 'value': '...'}]`. Maximum limit is 100.
 
  Returns: