From 75734f854a5b35715f068327d69de6185396b5d0 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Wed, 22 Sep 2021 22:49:59 +0200 Subject: [PATCH 1/8] Add Python SDK for Kubeflow Training Operator --- hack/python-sdk/post_gen.py | 44 ++ sdk/python/README.md | 31 +- sdk/python/docs/V1JobCondition.md | 5 +- sdk/python/docs/V1JobStatus.md | 7 +- sdk/python/docs/V1MXJob.md | 15 + sdk/python/docs/V1MXJobList.md | 14 + sdk/python/docs/V1MXJobSpec.md | 13 + sdk/python/docs/V1PyTorchJob.md | 15 + sdk/python/docs/V1PyTorchJobList.md | 14 + sdk/python/docs/V1PyTorchJobSpec.md | 12 + sdk/python/docs/V1ReplicaSpec.md | 3 +- sdk/python/docs/V1ReplicaStatus.md | 1 + sdk/python/docs/V1RunPolicy.md | 15 + sdk/python/docs/V1SchedulingPolicy.md | 14 + sdk/python/docs/V1TFJob.md | 11 +- sdk/python/docs/V1TFJobList.md | 7 +- sdk/python/docs/V1TFJobSpec.md | 8 +- sdk/python/docs/V1XGBoostJob.md | 15 + sdk/python/docs/V1XGBoostJobList.md | 14 + sdk/python/docs/V1XGBoostJobSpec.md | 12 + sdk/python/kubeflow/tfjob/__init__.py | 34 -- sdk/python/kubeflow/tfjob/api/__init__.py | 19 - .../kubeflow/tfjob/api/tf_job_client.py | 442 ------------------ sdk/python/kubeflow/tfjob/api/tf_job_watch.py | 59 --- sdk/python/kubeflow/tfjob/configuration.py | 251 ---------- sdk/python/kubeflow/tfjob/models/__init__.py | 38 -- .../kubeflow/tfjob/models/v1_tf_job_spec.py | 246 ---------- sdk/python/kubeflow/tfjob/models/v1_time.py | 101 ---- sdk/python/kubeflow/tfjob/utils/utils.py | 74 --- sdk/python/kubeflow/training/__init__.py | 48 ++ sdk/python/kubeflow/training/api/__init__.py | 5 + .../training/api/py_torch_job_client.py | 384 +++++++++++++++ .../training/api/py_torch_job_watch.py | 60 +++ .../kubeflow/training/api/tf_job_client.py | 434 +++++++++++++++++ .../kubeflow/training/api/tf_job_watch.py | 60 +++ .../{tfjob => training}/api_client.py | 216 +++++---- sdk/python/kubeflow/training/configuration.py | 376 +++++++++++++++ .../{tfjob => training}/constants/__init__.py | 0 .../constants/constants.py | 27 +- sdk/python/kubeflow/training/exceptions.py | 120 +++++ .../kubeflow/training/models/__init__.py | 34 ++ .../models/v1_job_condition.py | 71 ++- .../models/v1_job_status.py | 81 ++-- .../kubeflow/training/models/v1_mx_job.py | 228 +++++++++ .../training/models/v1_mx_job_list.py | 203 ++++++++ .../training/models/v1_mx_job_spec.py | 179 +++++++ .../training/models/v1_py_torch_job.py | 228 +++++++++ .../training/models/v1_py_torch_job_list.py | 205 ++++++++ .../training/models/v1_py_torch_job_spec.py | 150 ++++++ .../models/v1_replica_spec.py | 59 +-- .../models/v1_replica_status.py | 51 +- .../kubeflow/training/models/v1_run_policy.py | 232 +++++++++ .../training/models/v1_scheduling_policy.py | 198 ++++++++ .../{tfjob => training}/models/v1_tf_job.py | 73 ++- .../models/v1_tf_job_list.py | 70 ++- .../training/models/v1_tf_job_spec.py | 206 ++++++++ .../training/models/v1_xg_boost_job.py | 228 +++++++++ .../training/models/v1_xg_boost_job_list.py | 203 ++++++++ .../training/models/v1_xg_boost_job_spec.py | 148 ++++++ .../kubeflow/{tfjob => training}/rest.py | 70 +-- .../{tfjob => training}/utils/__init__.py | 0 sdk/python/kubeflow/training/utils/utils.py | 108 +++++ sdk/python/requirements.txt | 2 +- sdk/python/setup.py | 8 +- sdk/python/test/e2e/__init__.py | 0 sdk/python/test/e2e/test_e2e_pytorchjob.py | 82 ++++ sdk/python/test/e2e/test_e2e_tfjob.py | 74 +++ sdk/python/test/models/__init__.py | 0 .../test/models/test_v1_job_condition.py | 59 +++ sdk/python/test/models/test_v1_job_status.py | 84 ++++ sdk/python/test/models/test_v1_mx_job.py | 93 ++++ sdk/python/test/models/test_v1_mx_job_list.py | 144 ++++++ sdk/python/test/models/test_v1_mx_job_spec.py | 89 ++++ .../test/models/test_v1_py_torch_job.py | 92 ++++ .../test/models/test_v1_py_torch_job_list.py | 142 ++++++ .../test/models/test_v1_py_torch_job_spec.py | 87 ++++ .../test/models/test_v1_replica_spec.py | 54 +++ .../test/models/test_v1_replica_status.py | 54 +++ sdk/python/test/models/test_v1_run_policy.py | 62 +++ .../test/models/test_v1_scheduling_policy.py | 57 +++ sdk/python/test/models/test_v1_tf_job.py | 94 ++++ sdk/python/test/models/test_v1_tf_job_list.py | 146 ++++++ sdk/python/test/models/test_v1_tf_job_spec.py | 89 ++++ .../test/models/test_v1_xg_boost_job.py | 92 ++++ .../test/models/test_v1_xg_boost_job_list.py | 142 ++++++ .../test/models/test_v1_xg_boost_job_spec.py | 87 ++++ sdk/python/test/test_e2e.py | 72 --- sdk/python/test/test_v1_job_condition.py | 54 --- sdk/python/test/test_v1_job_status.py | 54 --- sdk/python/test/test_v1_replica_spec.py | 54 --- sdk/python/test/test_v1_replica_status.py | 54 --- sdk/python/test/test_v1_tf_job.py | 54 --- sdk/python/test/test_v1_tf_job_list.py | 54 --- sdk/python/test/test_v1_tf_job_spec.py | 54 --- 94 files changed, 6420 insertions(+), 2152 deletions(-) create mode 100755 hack/python-sdk/post_gen.py create mode 100644 sdk/python/docs/V1MXJob.md create mode 100644 sdk/python/docs/V1MXJobList.md create mode 100644 sdk/python/docs/V1MXJobSpec.md create mode 100644 sdk/python/docs/V1PyTorchJob.md create mode 100644 sdk/python/docs/V1PyTorchJobList.md create mode 100644 sdk/python/docs/V1PyTorchJobSpec.md create mode 100644 sdk/python/docs/V1RunPolicy.md create mode 100644 sdk/python/docs/V1SchedulingPolicy.md create mode 100644 sdk/python/docs/V1XGBoostJob.md create mode 100644 sdk/python/docs/V1XGBoostJobList.md create mode 100644 sdk/python/docs/V1XGBoostJobSpec.md delete mode 100644 sdk/python/kubeflow/tfjob/__init__.py delete mode 100644 sdk/python/kubeflow/tfjob/api/__init__.py delete mode 100644 sdk/python/kubeflow/tfjob/api/tf_job_client.py delete mode 100644 sdk/python/kubeflow/tfjob/api/tf_job_watch.py delete mode 100644 sdk/python/kubeflow/tfjob/configuration.py delete mode 100644 sdk/python/kubeflow/tfjob/models/__init__.py delete mode 100644 sdk/python/kubeflow/tfjob/models/v1_tf_job_spec.py delete mode 100644 sdk/python/kubeflow/tfjob/models/v1_time.py delete mode 100644 sdk/python/kubeflow/tfjob/utils/utils.py create mode 100644 sdk/python/kubeflow/training/__init__.py create mode 100644 sdk/python/kubeflow/training/api/__init__.py create mode 100644 sdk/python/kubeflow/training/api/py_torch_job_client.py create mode 100644 sdk/python/kubeflow/training/api/py_torch_job_watch.py create mode 100644 sdk/python/kubeflow/training/api/tf_job_client.py create mode 100644 sdk/python/kubeflow/training/api/tf_job_watch.py rename sdk/python/kubeflow/{tfjob => training}/api_client.py (79%) create mode 100644 sdk/python/kubeflow/training/configuration.py rename sdk/python/kubeflow/{tfjob => training}/constants/__init__.py (100%) rename sdk/python/kubeflow/{tfjob => training}/constants/constants.py (62%) create mode 100644 sdk/python/kubeflow/training/exceptions.py create mode 100644 sdk/python/kubeflow/training/models/__init__.py rename sdk/python/kubeflow/{tfjob => training}/models/v1_job_condition.py (77%) rename sdk/python/kubeflow/{tfjob => training}/models/v1_job_status.py (65%) create mode 100644 sdk/python/kubeflow/training/models/v1_mx_job.py create mode 100644 sdk/python/kubeflow/training/models/v1_mx_job_list.py create mode 100644 sdk/python/kubeflow/training/models/v1_mx_job_spec.py create mode 100644 sdk/python/kubeflow/training/models/v1_py_torch_job.py create mode 100644 sdk/python/kubeflow/training/models/v1_py_torch_job_list.py create mode 100644 sdk/python/kubeflow/training/models/v1_py_torch_job_spec.py rename sdk/python/kubeflow/{tfjob => training}/models/v1_replica_spec.py (70%) rename sdk/python/kubeflow/{tfjob => training}/models/v1_replica_status.py (75%) create mode 100644 sdk/python/kubeflow/training/models/v1_run_policy.py create mode 100644 sdk/python/kubeflow/training/models/v1_scheduling_policy.py rename sdk/python/kubeflow/{tfjob => training}/models/v1_tf_job.py (69%) rename sdk/python/kubeflow/{tfjob => training}/models/v1_tf_job_list.py (71%) create mode 100644 sdk/python/kubeflow/training/models/v1_tf_job_spec.py create mode 100644 sdk/python/kubeflow/training/models/v1_xg_boost_job.py create mode 100644 sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py create mode 100644 sdk/python/kubeflow/training/models/v1_xg_boost_job_spec.py rename sdk/python/kubeflow/{tfjob => training}/rest.py (85%) rename sdk/python/kubeflow/{tfjob => training}/utils/__init__.py (100%) create mode 100644 sdk/python/kubeflow/training/utils/utils.py create mode 100644 sdk/python/test/e2e/__init__.py create mode 100644 sdk/python/test/e2e/test_e2e_pytorchjob.py create mode 100644 sdk/python/test/e2e/test_e2e_tfjob.py create mode 100644 sdk/python/test/models/__init__.py create mode 100644 sdk/python/test/models/test_v1_job_condition.py create mode 100644 sdk/python/test/models/test_v1_job_status.py create mode 100644 sdk/python/test/models/test_v1_mx_job.py create mode 100644 sdk/python/test/models/test_v1_mx_job_list.py create mode 100644 sdk/python/test/models/test_v1_mx_job_spec.py create mode 100644 sdk/python/test/models/test_v1_py_torch_job.py create mode 100644 sdk/python/test/models/test_v1_py_torch_job_list.py create mode 100644 sdk/python/test/models/test_v1_py_torch_job_spec.py create mode 100644 sdk/python/test/models/test_v1_replica_spec.py create mode 100644 sdk/python/test/models/test_v1_replica_status.py create mode 100644 sdk/python/test/models/test_v1_run_policy.py create mode 100644 sdk/python/test/models/test_v1_scheduling_policy.py create mode 100644 sdk/python/test/models/test_v1_tf_job.py create mode 100644 sdk/python/test/models/test_v1_tf_job_list.py create mode 100644 sdk/python/test/models/test_v1_tf_job_spec.py create mode 100644 sdk/python/test/models/test_v1_xg_boost_job.py create mode 100644 sdk/python/test/models/test_v1_xg_boost_job_list.py create mode 100644 sdk/python/test/models/test_v1_xg_boost_job_spec.py delete mode 100644 sdk/python/test/test_e2e.py delete mode 100644 sdk/python/test/test_v1_job_condition.py delete mode 100644 sdk/python/test/test_v1_job_status.py delete mode 100644 sdk/python/test/test_v1_replica_spec.py delete mode 100644 sdk/python/test/test_v1_replica_status.py delete mode 100644 sdk/python/test/test_v1_tf_job.py delete mode 100644 sdk/python/test/test_v1_tf_job_list.py delete mode 100644 sdk/python/test/test_v1_tf_job_spec.py diff --git a/hack/python-sdk/post_gen.py b/hack/python-sdk/post_gen.py new file mode 100755 index 0000000000..ae737fd8fd --- /dev/null +++ b/hack/python-sdk/post_gen.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +""" +This script is used for updating generated SDK files. +""" + +import os +import fileinput +import re + +__replacements = [ + ("import kubeflow.training", "from kubeflow.training.models import *"), + ("kubeflow.training.models.v1\/.*.v1.", "V1") +] + +sdk_dir = os.path.abspath(os.path.join(__file__, "../../..", "sdk/python")) + + +def main(): + fix_test_files() + + +def fix_test_files() -> None: + """ + Fix invalid model imports in generated model tests + """ + os.path.realpath(__file__) + test_folder_dir = os.path.join(sdk_dir, "test/models") + test_files = os.listdir(test_folder_dir) + for test_file in test_files: + print(test_file) + with fileinput.FileInput(os.path.join(test_folder_dir, test_file), inplace=True) as file: + for line in file: + print(_apply_regex(line), end='') + + +def _apply_regex(input_str: str) -> str: + for pattern, replacement in __replacements: + input_str = re.sub(pattern, replacement, input_str) + return input_str + + +if __name__ == '__main__': + main() diff --git a/sdk/python/README.md b/sdk/python/README.md index 35800a16a8..31a128b911 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -1,5 +1,5 @@ -# Kubeflow TFJob SDK -Python SDK for TF-Operator +# Kubeflow Training SDK +Python SDK for Training Operator ## Requirements. @@ -9,12 +9,12 @@ Python 2.7 and 3.5+ ### pip install ```sh -pip install kubeflow-tfjob +pip install kubeflow-training ``` Then import the package: ```python -from kubeflow import tfjob +from kubeflow import training ``` ### Setuptools @@ -46,14 +46,37 @@ Class | Method | Description [TFJobClient](docs/TFJobClient.md) | [is_job_succeeded](docs/TFJobClient.md#is_job_succeeded) | Check if the TFJob status is Succeeded | [TFJobClient](docs/TFJobClient.md) | [get_pod_names](docs/TFJobClient.md#get_pod_names) | Get pod names of TFJob | [TFJobClient](docs/TFJobClient.md) | [get_logs](docs/TFJobClient.md#get_logs) | Get training logs of the TFJob | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [create](docs/PyTorchJobClient.md#create) | Create PyTorchJob| +[PyTorchJobClient](docs/PyTorchJobClient.md) | [get](docs/PyTorchJobClient.md#get) | Get the specified PyTorchJob or all PyTorchJob in the namespace | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [patch](docs/PyTorchJobClient.md#patch) | Patch the specified PyTorchJob| +[PyTorchJobClient](docs/PyTorchJobClient.md) | [delete](docs/PyTorchJobClient.md#delete) | Delete the specified PyTorchJob | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [wait_for_job](docs/PyTorchJobClient.md#wait_for_job) | Wait for the specified job to finish | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [wait_for_condition](docs/PyTorchJobClient.md#wait_for_condition) | Waits until any of the specified conditions occur | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [get_job_status](docs/PyTorchJobClient.md#get_job_status) | Get the PyTorchJob status| +[PyTorchJobClient](docs/PyTorchJobClient.md) | [is_job_running](docs/PyTorchJobClient.md#is_job_running) | Check if the PyTorchJob running | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [is_job_succeeded](docs/PyTorchJobClient.md#is_job_succeeded) | Check if the PyTorchJob Succeeded | +[PyTorchJobClient](docs/PyTorchJobClient.md) | [get_pod_names](docs/PyTorchJobClient.md#get_pod_names) | Get pod names of PyTorchJob | +[PyTorchJobClient](docs/PyTorchJobClient.md)| [get_logs](docs/PyTorchJobClient.md#get_logs) | Get training logs of the PyTorchJob | +## Documentation For Models ## Documentation For Models - [V1JobCondition](docs/V1JobCondition.md) - [V1JobStatus](docs/V1JobStatus.md) + - [V1MXJob](docs/V1MXJob.md) + - [V1MXJobList](docs/V1MXJobList.md) + - [V1MXJobSpec](docs/V1MXJobSpec.md) + - [V1PyTorchJob](docs/V1PyTorchJob.md) + - [V1PyTorchJobList](docs/V1PyTorchJobList.md) + - [V1PyTorchJobSpec](docs/V1PyTorchJobSpec.md) - [V1ReplicaSpec](docs/V1ReplicaSpec.md) - [V1ReplicaStatus](docs/V1ReplicaStatus.md) + - [V1RunPolicy](docs/V1RunPolicy.md) + - [V1SchedulingPolicy](docs/V1SchedulingPolicy.md) - [V1TFJob](docs/V1TFJob.md) - [V1TFJobList](docs/V1TFJobList.md) - [V1TFJobSpec](docs/V1TFJobSpec.md) + - [V1XGBoostJob](docs/V1XGBoostJob.md) + - [V1XGBoostJobList](docs/V1XGBoostJobList.md) + - [V1XGBoostJobSpec](docs/V1XGBoostJobSpec.md) diff --git a/sdk/python/docs/V1JobCondition.md b/sdk/python/docs/V1JobCondition.md index 2202f464fc..37f0f6f094 100644 --- a/sdk/python/docs/V1JobCondition.md +++ b/sdk/python/docs/V1JobCondition.md @@ -1,10 +1,11 @@ # V1JobCondition +JobCondition describes the state of the job at a certain point. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**last_transition_time** | [**V1Time**](V1Time.md) | Last time the condition transitioned from one status to another. | [optional] -**last_update_time** | [**V1Time**](V1Time.md) | The last time this condition was updated. | [optional] +**last_transition_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] +**last_update_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] **message** | **str** | A human readable message indicating details about the transition. | [optional] **reason** | **str** | The reason for the condition's last transition. | [optional] **status** | **str** | Status of the condition, one of True, False, Unknown. | diff --git a/sdk/python/docs/V1JobStatus.md b/sdk/python/docs/V1JobStatus.md index 4d027c4374..fad337e8b6 100644 --- a/sdk/python/docs/V1JobStatus.md +++ b/sdk/python/docs/V1JobStatus.md @@ -1,13 +1,14 @@ # V1JobStatus +JobStatus represents the current observed state of the training Job. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**completion_time** | [**V1Time**](V1Time.md) | Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. | [optional] +**completion_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] **conditions** | [**list[V1JobCondition]**](V1JobCondition.md) | Conditions is an array of current observed job conditions. | -**last_reconcile_time** | [**V1Time**](V1Time.md) | Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. | [optional] +**last_reconcile_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] **replica_statuses** | [**dict(str, V1ReplicaStatus)**](V1ReplicaStatus.md) | ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica. | -**start_time** | [**V1Time**](V1Time.md) | Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. | [optional] +**start_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1MXJob.md b/sdk/python/docs/V1MXJob.md new file mode 100644 index 0000000000..4485f9da78 --- /dev/null +++ b/sdk/python/docs/V1MXJob.md @@ -0,0 +1,15 @@ +# V1MXJob + +MXJob is the Schema for the mxjobs API +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**spec** | [**V1MXJobSpec**](V1MXJobSpec.md) | | [optional] +**status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1MXJobList.md b/sdk/python/docs/V1MXJobList.md new file mode 100644 index 0000000000..b31f61d39d --- /dev/null +++ b/sdk/python/docs/V1MXJobList.md @@ -0,0 +1,14 @@ +# V1MXJobList + +MXJobList contains a list of MXJob +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**items** | [**list[V1MXJob]**](V1MXJob.md) | | +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1MXJobSpec.md b/sdk/python/docs/V1MXJobSpec.md new file mode 100644 index 0000000000..caa943d9b3 --- /dev/null +++ b/sdk/python/docs/V1MXJobSpec.md @@ -0,0 +1,13 @@ +# V1MXJobSpec + +MXJobSpec defines the desired state of MXJob +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**job_mode** | **str** | JobMode specify the kind of MXjob to do. Different mode may have different MXReplicaSpecs request | +**mx_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | MXReplicaSpecs is map of common.ReplicaType and common.ReplicaSpec specifies the MX replicas to run. For example, { \"Scheduler\": common.ReplicaSpec, \"Server\": common.ReplicaSpec, \"Worker\": common.ReplicaSpec, } | +**run_policy** | [**V1RunPolicy**](V1RunPolicy.md) | | + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1PyTorchJob.md b/sdk/python/docs/V1PyTorchJob.md new file mode 100644 index 0000000000..dde8ebeb00 --- /dev/null +++ b/sdk/python/docs/V1PyTorchJob.md @@ -0,0 +1,15 @@ +# V1PyTorchJob + +PyTorchJob Represents a PyTorchJob resource. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**spec** | [**V1PyTorchJobSpec**](V1PyTorchJobSpec.md) | | [optional] +**status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1PyTorchJobList.md b/sdk/python/docs/V1PyTorchJobList.md new file mode 100644 index 0000000000..61f8e2ec08 --- /dev/null +++ b/sdk/python/docs/V1PyTorchJobList.md @@ -0,0 +1,14 @@ +# V1PyTorchJobList + +PyTorchJobList is a list of PyTorchJobs. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**items** | [**list[V1PyTorchJob]**](V1PyTorchJob.md) | List of PyTorchJobs. | +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1PyTorchJobSpec.md b/sdk/python/docs/V1PyTorchJobSpec.md new file mode 100644 index 0000000000..90064e5c1f --- /dev/null +++ b/sdk/python/docs/V1PyTorchJobSpec.md @@ -0,0 +1,12 @@ +# V1PyTorchJobSpec + +PyTorchJobSpec is a desired state description of the PyTorchJob. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**pytorch_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example, { \"Master\": PyTorchReplicaSpec, \"Worker\": PyTorchReplicaSpec, } | +**run_policy** | [**V1RunPolicy**](V1RunPolicy.md) | | + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1ReplicaSpec.md b/sdk/python/docs/V1ReplicaSpec.md index 6f9faf0c7c..b8ac81f92f 100644 --- a/sdk/python/docs/V1ReplicaSpec.md +++ b/sdk/python/docs/V1ReplicaSpec.md @@ -1,11 +1,12 @@ # V1ReplicaSpec +ReplicaSpec is a description of the replica ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **replicas** | **int** | Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1. | [optional] **restart_policy** | **str** | Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never. | [optional] -**template** | [**V1PodTemplateSpec**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1PodTemplateSpec.md) | Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec | [optional] +**template** | [**K8sIoApiCoreV1PodTemplateSpec**](K8sIoApiCoreV1PodTemplateSpec.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1ReplicaStatus.md b/sdk/python/docs/V1ReplicaStatus.md index 847e365ebe..a9902ee5d8 100644 --- a/sdk/python/docs/V1ReplicaStatus.md +++ b/sdk/python/docs/V1ReplicaStatus.md @@ -1,5 +1,6 @@ # V1ReplicaStatus +ReplicaStatus represents the current observed state of the replica. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- diff --git a/sdk/python/docs/V1RunPolicy.md b/sdk/python/docs/V1RunPolicy.md new file mode 100644 index 0000000000..7a6895e794 --- /dev/null +++ b/sdk/python/docs/V1RunPolicy.md @@ -0,0 +1,15 @@ +# V1RunPolicy + +RunPolicy encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**active_deadline_seconds** | **int** | Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. | [optional] +**backoff_limit** | **int** | Optional number of retries before marking this job failed. | [optional] +**clean_pod_policy** | **str** | CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running. | [optional] +**scheduling_policy** | [**V1SchedulingPolicy**](V1SchedulingPolicy.md) | | [optional] +**ttl_seconds_after_finished** | **int** | TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1SchedulingPolicy.md b/sdk/python/docs/V1SchedulingPolicy.md new file mode 100644 index 0000000000..3f64a74764 --- /dev/null +++ b/sdk/python/docs/V1SchedulingPolicy.md @@ -0,0 +1,14 @@ +# V1SchedulingPolicy + +SchedulingPolicy encapsulates various scheduling policies of the distributed training job, for example `minAvailable` for gang-scheduling. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**min_available** | **int** | | [optional] +**min_resources** | [**dict(str, K8sIoApimachineryPkgApiResourceQuantity)**](K8sIoApimachineryPkgApiResourceQuantity.md) | | [optional] +**priority_class** | **str** | | [optional] +**queue** | **str** | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1TFJob.md b/sdk/python/docs/V1TFJob.md index a3d679901d..eca12a4459 100644 --- a/sdk/python/docs/V1TFJob.md +++ b/sdk/python/docs/V1TFJob.md @@ -1,13 +1,14 @@ # V1TFJob +TFJob represents a TFJob resource. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources | [optional] -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ObjectMeta**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ObjectMeta.md) | Standard Kubernetes object's metadata. | [optional] -**spec** | [**V1TFJobSpec**](V1TFJobSpec.md) | Specification of the desired state of the TFJob. | [optional] -**status** | [**V1JobStatus**](V1JobStatus.md) | Most recently observed status of the TFJob. Read-only (modified by the system). | [optional] +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**spec** | [**V1TFJobSpec**](V1TFJobSpec.md) | | [optional] +**status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1TFJobList.md b/sdk/python/docs/V1TFJobList.md index eeb06a3e09..eafb589e7e 100644 --- a/sdk/python/docs/V1TFJobList.md +++ b/sdk/python/docs/V1TFJobList.md @@ -1,12 +1,13 @@ # V1TFJobList +TFJobList is a list of TFJobs. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources | [optional] +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **items** | [**list[V1TFJob]**](V1TFJob.md) | List of TFJobs. | -**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds | [optional] -**metadata** | [**V1ListMeta**](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ListMeta.md) | Standard list metadata. | [optional] +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1TFJobSpec.md b/sdk/python/docs/V1TFJobSpec.md index 7c90b45304..03efaa2d67 100644 --- a/sdk/python/docs/V1TFJobSpec.md +++ b/sdk/python/docs/V1TFJobSpec.md @@ -1,13 +1,13 @@ # V1TFJobSpec +TFJobSpec is a desired state description of the TFJob. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**active_deadline_seconds** | **int** | Specifies the duration (in seconds) since startTime during which the job can remain active before it is terminated. Must be a positive integer. This setting applies only to pods where restartPolicy is OnFailure or Always. | [optional] -**backoff_limit** | **int** | Number of retries before marking this job as failed. | [optional] -**clean_pod_policy** | **str** | Defines the policy for cleaning up pods after the TFJob completes. Defaults to Running. | [optional] +**enable_dynamic_worker** | **bool** | A switch to enable dynamic worker | [optional] +**run_policy** | [**V1RunPolicy**](V1RunPolicy.md) | | +**success_policy** | **str** | SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules. | [optional] **tf_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } | -**ttl_seconds_after_finished** | **int** | Defines the TTL for cleaning up finished TFJobs (temporary before kubernetes adds the cleanup controller). It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Defaults to infinite. | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1XGBoostJob.md b/sdk/python/docs/V1XGBoostJob.md new file mode 100644 index 0000000000..d6cab1bb67 --- /dev/null +++ b/sdk/python/docs/V1XGBoostJob.md @@ -0,0 +1,15 @@ +# V1XGBoostJob + +XGBoostJob is the Schema for the xgboostjobs API +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**spec** | [**V1XGBoostJobSpec**](V1XGBoostJobSpec.md) | | [optional] +**status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1XGBoostJobList.md b/sdk/python/docs/V1XGBoostJobList.md new file mode 100644 index 0000000000..0ba29ba98d --- /dev/null +++ b/sdk/python/docs/V1XGBoostJobList.md @@ -0,0 +1,14 @@ +# V1XGBoostJobList + +XGBoostJobList contains a list of XGBoostJob +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] +**items** | [**list[V1XGBoostJob]**](V1XGBoostJob.md) | | +**kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] +**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/docs/V1XGBoostJobSpec.md b/sdk/python/docs/V1XGBoostJobSpec.md new file mode 100644 index 0000000000..30264ae9b0 --- /dev/null +++ b/sdk/python/docs/V1XGBoostJobSpec.md @@ -0,0 +1,12 @@ +# V1XGBoostJobSpec + +XGBoostJobSpec defines the desired state of XGBoostJob +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**run_policy** | [**V1RunPolicy**](V1RunPolicy.md) | | +**xgb_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | | + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/sdk/python/kubeflow/tfjob/__init__.py b/sdk/python/kubeflow/tfjob/__init__.py deleted file mode 100644 index d45b636361..0000000000 --- a/sdk/python/kubeflow/tfjob/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 - -# flake8: noqa - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -# import utils and constants -from kubeflow.tfjob.utils import utils -from kubeflow.tfjob.constants import constants - -# import ApiClient -from kubeflow.tfjob.api_client import ApiClient -from kubeflow.tfjob.configuration import Configuration -from kubeflow.tfjob.api.tf_job_client import TFJobClient - -# import models into sdk package -from kubeflow.tfjob.models.v1_job_condition import V1JobCondition -from kubeflow.tfjob.models.v1_job_status import V1JobStatus -from kubeflow.tfjob.models.v1_replica_spec import V1ReplicaSpec -from kubeflow.tfjob.models.v1_replica_status import V1ReplicaStatus -from kubeflow.tfjob.models.v1_tf_job import V1TFJob -from kubeflow.tfjob.models.v1_tf_job_list import V1TFJobList -from kubeflow.tfjob.models.v1_tf_job_spec import V1TFJobSpec diff --git a/sdk/python/kubeflow/tfjob/api/__init__.py b/sdk/python/kubeflow/tfjob/api/__init__.py deleted file mode 100644 index 079d313d8f..0000000000 --- a/sdk/python/kubeflow/tfjob/api/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -# flake8: noqa - -# import apis into api package diff --git a/sdk/python/kubeflow/tfjob/api/tf_job_client.py b/sdk/python/kubeflow/tfjob/api/tf_job_client.py deleted file mode 100644 index 5906ba2466..0000000000 --- a/sdk/python/kubeflow/tfjob/api/tf_job_client.py +++ /dev/null @@ -1,442 +0,0 @@ -# Copyright 2019 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import multiprocessing -import time -import logging -import threading -import queue - -from kubernetes import client, config -from kubernetes import watch as k8s_watch - -from kubeflow.tfjob.constants import constants -from kubeflow.tfjob.utils import utils - -from .tf_job_watch import watch as tfjob_watch - -logging.basicConfig(format='%(message)s') -logging.getLogger().setLevel(logging.INFO) - - -def wrap_log_stream(q, stream): - while True: - try: - logline = next(stream) - q.put(logline) - except StopIteration: - q.put(None) - return - except Exception as e: - raise RuntimeError( - "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) - - -def get_log_queue_pool(streams): - pool = [] - for stream in streams: - q = queue.Queue(maxsize=100) - pool.append(q) - threading.Thread(target=wrap_log_stream, args=(q, stream)).start() - return pool - - -class TFJobClient(object): - def __init__(self, config_file=None, context=None, # pylint: disable=too-many-arguments - client_configuration=None, persist_config=True): - """ - TFJob client constructor - :param config_file: kubeconfig file, defaults to ~/.kube/config - :param context: kubernetes context - :param client_configuration: kubernetes configuration object - :param persist_config: - """ - if config_file or not utils.is_running_in_k8s(): - config.load_kube_config( - config_file=config_file, - context=context, - client_configuration=client_configuration, - persist_config=persist_config) - else: - config.load_incluster_config() - - self.custom_api = client.CustomObjectsApi() - self.core_api = client.CoreV1Api() - - - def create(self, tfjob, namespace=None): - """ - Create the TFJob - :param tfjob: tfjob object - :param namespace: defaults to current or default namespace - :return: created tfjob - """ - - if namespace is None: - namespace = utils.set_tfjob_namespace(tfjob) - - try: - outputs = self.custom_api.create_namespaced_custom_object( - constants.TFJOB_GROUP, - constants.TFJOB_VERSION, - namespace, - constants.TFJOB_PLURAL, - tfjob) - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CustomObjectsApi->create_namespaced_custom_object:\ - %s\n" % e) - - return outputs - - def get(self, name=None, namespace=None, watch=False, timeout_seconds=600): #pylint: disable=inconsistent-return-statements - """ - Get the tfjob - :param name: existing tfjob name, if not defined, the get all tfjobs in the namespace. - :param namespace: defaults to current or default namespace - :param watch: Watch the TFJob if `True`. - :param timeout_seconds: How long to watch the job.. - :return: tfjob - """ - if namespace is None: - namespace = utils.get_default_target_namespace() - - if name: - if watch: - tfjob_watch( - name=name, - namespace=namespace, - timeout_seconds=timeout_seconds) - else: - thread = self.custom_api.get_namespaced_custom_object( - constants.TFJOB_GROUP, - constants.TFJOB_VERSION, - namespace, - constants.TFJOB_PLURAL, - name, - async_req=True) - - tfjob = None - try: - tfjob = thread.get(constants.APISERVER_TIMEOUT) - except multiprocessing.TimeoutError: - raise RuntimeError("Timeout trying to get TFJob.") - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CustomObjectsApi->get_namespaced_custom_object:\ - %s\n" % e) - except Exception as e: - raise RuntimeError( - "There was a problem to get TFJob {0} in namespace {1}. Exception: \ - {2} ".format(name, namespace, e)) - return tfjob - else: - if watch: - tfjob_watch( - namespace=namespace, - timeout_seconds=timeout_seconds) - else: - thread = self.custom_api.list_namespaced_custom_object( - constants.TFJOB_GROUP, - constants.TFJOB_VERSION, - namespace, - constants.TFJOB_PLURAL, - async_req=True) - - tfjobs = None - try: - tfjobs = thread.get(constants.APISERVER_TIMEOUT) - except multiprocessing.TimeoutError: - raise RuntimeError("Timeout trying to get TFJob.") - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CustomObjectsApi->list_namespaced_custom_object:\ - %s\n" % e) - except Exception as e: - raise RuntimeError( - "There was a problem to list TFJobs in namespace {0}. \ - Exception: {1} ".format(namespace, e)) - return tfjobs - - - def patch(self, name, tfjob, namespace=None): - """ - Patch existing tfjob - :param name: existing tfjob name - :param tfjob: patched tfjob - :param namespace: defaults to current or default namespace - :return: patched tfjob - """ - if namespace is None: - namespace = utils.set_tfjob_namespace(tfjob) - - try: - outputs = self.custom_api.patch_namespaced_custom_object( - constants.TFJOB_GROUP, - constants.TFJOB_VERSION, - namespace, - constants.TFJOB_PLURAL, - name, - tfjob) - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CustomObjectsApi->patch_namespaced_custom_object:\ - %s\n" % e) - - return outputs - - - def delete(self, name, namespace=None): - """ - Delete the tfjob - :param name: tfjob name - :param namespace: defaults to current or default namespace - :return: - """ - if namespace is None: - namespace = utils.get_default_target_namespace() - - try: - return self.custom_api.delete_namespaced_custom_object( - group=constants.TFJOB_GROUP, - version=constants.TFJOB_VERSION, - namespace=namespace, - plural=constants.TFJOB_PLURAL, - name=name, - body=client.V1DeleteOptions()) - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CustomObjectsApi->delete_namespaced_custom_object:\ - %s\n" % e) - - - def wait_for_job(self, name, #pylint: disable=inconsistent-return-statements - namespace=None, - timeout_seconds=600, - polling_interval=30, - watch=False, - status_callback=None): - """Wait for the specified job to finish. - - :param name: Name of the TfJob. - :param namespace: defaults to current or default namespace. - :param timeout_seconds: How long to wait for the job. - :param polling_interval: How often to poll for the status of the job. - :param watch: Watch the TFJob if `True`. - :param status_callback: (Optional): Callable. If supplied this callable is - invoked after we poll the job. Callable takes a single argument which - is the job. - :return: - """ - if namespace is None: - namespace = utils.get_default_target_namespace() - - if watch: - tfjob_watch( - name=name, - namespace=namespace, - timeout_seconds=timeout_seconds) - else: - return self.wait_for_condition( - name, - ["Succeeded", "Failed"], - namespace=namespace, - timeout_seconds=timeout_seconds, - polling_interval=polling_interval, - status_callback=status_callback) - - - def wait_for_condition(self, name, - expected_condition, - namespace=None, - timeout_seconds=600, - polling_interval=30, - status_callback=None): - """Waits until any of the specified conditions occur. - - :param name: Name of the job. - :param expected_condition: A list of conditions. Function waits until any of the - supplied conditions is reached. - :param namespace: defaults to current or default namespace. - :param timeout_seconds: How long to wait for the job. - :param polling_interval: How often to poll for the status of the job. - :param status_callback: (Optional): Callable. If supplied this callable is - invoked after we poll the job. Callable takes a single argument which - is the job. - :return: Object TFJob status - """ - - if namespace is None: - namespace = utils.get_default_target_namespace() - - for _ in range(round(timeout_seconds/polling_interval)): - - tfjob = None - tfjob = self.get(name, namespace=namespace) - - if tfjob: - if status_callback: - status_callback(tfjob) - - # If we poll the CRD quick enough status won't have been set yet. - conditions = tfjob.get("status", {}).get("conditions", []) - # Conditions might have a value of None in status. - conditions = conditions or [] - for c in conditions: - if c.get("type", "") in expected_condition: - return tfjob - - time.sleep(polling_interval) - - raise RuntimeError( - "Timeout waiting for TFJob {0} in namespace {1} to enter one of the " - "conditions {2}.".format(name, namespace, expected_condition), tfjob) - - - def get_job_status(self, name, namespace=None): - """Returns TFJob status, such as Running, Failed or Succeeded. - - :param name: The TFJob name. - :param namespace: defaults to current or default namespace. - :return: Object TFJob status - """ - if namespace is None: - namespace = utils.get_default_target_namespace() - - tfjob = self.get(name, namespace=namespace) - last_condition = tfjob.get("status", {}).get("conditions", [{}])[-1] - return last_condition.get("type", "") - - - def is_job_running(self, name, namespace=None): - """Returns true if the TFJob running; false otherwise. - - :param name: The TFJob name. - :param namespace: defaults to current or default namespace. - :return: True or False - """ - tfjob_status = self.get_job_status(name, namespace=namespace) - return tfjob_status.lower() == "running" - - - def is_job_succeeded(self, name, namespace=None): - """Returns true if the TFJob succeeded; false otherwise. - - :param name: The TFJob name. - :param namespace: defaults to current or default namespace. - :return: True or False - """ - tfjob_status = self.get_job_status(name, namespace=namespace) - return tfjob_status.lower() == "succeeded" - - - def get_pod_names(self, name, namespace=None, master=False, #pylint: disable=inconsistent-return-statements - replica_type=None, replica_index=None): - """ - Get pod names of TFJob. - :param name: tfjob name - :param namespace: defaults to current or default namespace. - :param master: Only get pod with label 'job-role: master' pod if True. - :param replica_type: User can specify one of 'worker, ps, chief' to only get one type pods. - By default get all type pods. - :param replica_index: User can specfy replica index to get one pod of TFJob. - :return: set: pods name - """ - - if namespace is None: - namespace = utils.get_default_target_namespace() - - labels = utils.get_labels(name, master=master, - replica_type=replica_type, - replica_index=replica_index) - - try: - resp = self.core_api.list_namespaced_pod( - namespace, label_selector=utils.to_selector(labels)) - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) - - pod_names = [] - for pod in resp.items: - if pod.metadata and pod.metadata.name: - pod_names.append(pod.metadata.name) - - if not pod_names: - logging.warning("Not found Pods of the TFJob %s with the labels %s.", name, labels) - else: - return set(pod_names) - - def get_logs(self, name, namespace=None, master=True, - replica_type=None, replica_index=None, - follow=False): - """ - Get training logs of the TFJob. - By default only get the logs of Pod that has labels 'job-role: master'. - :param name: tfjob name - :param namespace: defaults to current or default namespace. - :param master: By default get pod with label 'job-role: master' pod if True. - If need to get more Pod Logs, set False. - :param replica_type: User can specify one of 'worker, ps, chief' to only get one type pods. - By default get all type pods. - :param replica_index: User can specfy replica index to get one pod of TFJob. - :param follow: Follow the log stream of the pod. Defaults to false. - :return: str: pods logs - """ - - if namespace is None: - namespace = utils.get_default_target_namespace() - - pod_names = list(self.get_pod_names(name, namespace=namespace, - master=master, - replica_type=replica_type, - replica_index=replica_index)) - if pod_names: - if follow: - log_streams = [] - for pod in pod_names: - log_streams.append(k8s_watch.Watch().stream(self.core_api.read_namespaced_pod_log, - name=pod, namespace=namespace)) - finished = [False for _ in log_streams] - - # create thread and queue per stream, for non-blocking iteration - log_queue_pool = get_log_queue_pool(log_streams) - - # iterate over every watching pods' log queue - while True: - for index, log_queue in enumerate(log_queue_pool): - if all(finished): - return - if finished[index]: - continue - # grouping the every 50 log lines of the same pod - for _ in range(50): - try: - logline = log_queue.get(timeout=1) - if logline is None: - finished[index] = True - break - logging.info("[Pod %s]: %s", pod_names[index], logline) - except queue.Empty: - break - else: - for pod in pod_names: - try: - pod_logs = self.core_api.read_namespaced_pod_log(pod, namespace) - logging.info("The logs of Pod %s:\n %s", pod, pod_logs) - except client.rest.ApiException as e: - raise RuntimeError( - "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) - else: - raise RuntimeError("Not found Pods of the TFJob {} " - "in namespace {}".format(name, namespace)) diff --git a/sdk/python/kubeflow/tfjob/api/tf_job_watch.py b/sdk/python/kubeflow/tfjob/api/tf_job_watch.py deleted file mode 100644 index 4f4558ef9c..0000000000 --- a/sdk/python/kubeflow/tfjob/api/tf_job_watch.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2019 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import retrying -from kubernetes import client -from kubernetes import watch as k8s_watch -from table_logger import TableLogger - -from kubeflow.tfjob.constants import constants -from kubeflow.tfjob.utils import utils - -tbl = TableLogger( - columns='NAME,STATE,TIME', - colwidth={'NAME': 30, 'STATE':20, 'TIME':30}, - border=False) - -@retrying.retry(wait_fixed=1000, stop_max_attempt_number=20) -def watch(name=None, namespace=None, timeout_seconds=600): - """Watch the created or patched InferenceService in the specified namespace""" - - if namespace is None: - namespace = utils.get_default_target_namespace() - - stream = k8s_watch.Watch().stream( - client.CustomObjectsApi().list_namespaced_custom_object, - constants.TFJOB_GROUP, - constants.TFJOB_VERSION, - namespace, - constants.TFJOB_PLURAL, - timeout_seconds=timeout_seconds) - - for event in stream: - tfjob = event['object'] - tfjob_name = tfjob['metadata']['name'] - if name and name != tfjob_name: - continue - else: - status = '' - update_time = '' - last_condition = tfjob.get('status', {}).get('conditions', [{}])[-1] - status = last_condition.get('type', '') - update_time = last_condition.get('lastTransitionTime', '') - - tbl(tfjob_name, status, update_time) - - if name == tfjob_name: - if status == 'Succeeded' or status == 'Failed': - break diff --git a/sdk/python/kubeflow/tfjob/configuration.py b/sdk/python/kubeflow/tfjob/configuration.py deleted file mode 100644 index eb1fc8ee30..0000000000 --- a/sdk/python/kubeflow/tfjob/configuration.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import copy -import logging -import multiprocessing -import sys -import urllib3 - -import six -from six.moves import http_client as httplib - - -class Configuration(object): - """NOTE: This class is auto generated by the swagger code generator program. - - Ref: https://github.com/swagger-api/swagger-codegen - Do not edit the class manually. - """ - - _default = None - - def __init__(self): - """Constructor""" - if self._default: - for key in self._default.__dict__.keys(): - self.__dict__[key] = copy.copy(self._default.__dict__[key]) - return - - # Default Base url - self.host = "https://localhost" - # Temp file folder for downloading files - self.temp_folder_path = None - - # Authentication Settings - # dict to store API key(s) - self.api_key = {} - # dict to store API prefix (e.g. Bearer) - self.api_key_prefix = {} - # Username for HTTP basic authentication - self.username = "" - # Password for HTTP basic authentication - self.password = "" - - # Logging Settings - self.logger = {} - self.logger["package_logger"] = logging.getLogger("tfjob") - self.logger["urllib3_logger"] = logging.getLogger("urllib3") - # Log format - self.logger_format = '%(asctime)s %(levelname)s %(message)s' - # Log stream handler - self.logger_stream_handler = None - # Log file handler - self.logger_file_handler = None - # Debug file location - self.logger_file = None - # Debug switch - self.debug = False - - # SSL/TLS verification - # Set this to false to skip verifying SSL certificate when calling API - # from https server. - self.verify_ssl = True - # Set this to customize the certificate file to verify the peer. - self.ssl_ca_cert = None - # client certificate file - self.cert_file = None - # client key file - self.key_file = None - # Set this to True/False to enable/disable SSL hostname verification. - self.assert_hostname = None - - # urllib3 connection pool's maximum number of connections saved - # per pool. urllib3 uses 1 connection as default value, but this is - # not the best value when you are making a lot of possibly parallel - # requests to the same host, which is often the case here. - # cpu_count * 5 is used as default value to increase performance. - self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 - - # Proxy URL - self.proxy = None - # Safe chars for path_param - self.safe_chars_for_path_param = '' - - @classmethod - def set_default(cls, default): - cls._default = default - - @property - def logger_file(self): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - return self.__logger_file - - @logger_file.setter - def logger_file(self, value): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - self.__logger_file = value - if self.__logger_file: - # If set logging file, - # then add file handler and remove stream handler. - self.logger_file_handler = logging.FileHandler(self.__logger_file) - self.logger_file_handler.setFormatter(self.logger_formatter) - for _, logger in six.iteritems(self.logger): - logger.addHandler(self.logger_file_handler) - if self.logger_stream_handler: - logger.removeHandler(self.logger_stream_handler) - else: - # If not set logging file, - # then add stream handler and remove file handler. - self.logger_stream_handler = logging.StreamHandler() - self.logger_stream_handler.setFormatter(self.logger_formatter) - for _, logger in six.iteritems(self.logger): - logger.addHandler(self.logger_stream_handler) - if self.logger_file_handler: - logger.removeHandler(self.logger_file_handler) - - @property - def debug(self): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - return self.__debug - - @debug.setter - def debug(self, value): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - self.__debug = value - if self.__debug: - # if debug status is True, turn on debug logging - for _, logger in six.iteritems(self.logger): - logger.setLevel(logging.DEBUG) - # turn on httplib debug - httplib.HTTPConnection.debuglevel = 1 - else: - # if debug status is False, turn off debug logging, - # setting log level to default `logging.WARNING` - for _, logger in six.iteritems(self.logger): - logger.setLevel(logging.WARNING) - # turn off httplib debug - httplib.HTTPConnection.debuglevel = 0 - - @property - def logger_format(self): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - return self.__logger_format - - @logger_format.setter - def logger_format(self, value): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - self.__logger_format = value - self.logger_formatter = logging.Formatter(self.__logger_format) - - def get_api_key_with_prefix(self, identifier): - """Gets API key (with prefix if set). - - :param identifier: The identifier of apiKey. - :return: The token for api key authentication. - """ - if (self.api_key.get(identifier) and - self.api_key_prefix.get(identifier)): - return self.api_key_prefix[identifier] + ' ' + self.api_key[identifier] # noqa: E501 - elif self.api_key.get(identifier): - return self.api_key[identifier] - - def get_basic_auth_token(self): - """Gets HTTP basic authentication header (string). - - :return: The token for basic HTTP authentication. - """ - return urllib3.util.make_headers( - basic_auth=self.username + ':' + self.password - ).get('authorization') - - def auth_settings(self): - """Gets Auth Settings dict for api client. - - :return: The Auth Settings information dict. - """ - return { - - } - - def to_debug_report(self): - """Gets the essential information for debugging. - - :return: The report for debugging. - """ - return "Python SDK Debug Report:\n"\ - "OS: {env}\n"\ - "Python Version: {pyversion}\n"\ - "Version of the API: v0.1\n"\ - "SDK Package Version: 0.1".\ - format(env=sys.platform, pyversion=sys.version) diff --git a/sdk/python/kubeflow/tfjob/models/__init__.py b/sdk/python/kubeflow/tfjob/models/__init__.py deleted file mode 100644 index 40a6d8503a..0000000000 --- a/sdk/python/kubeflow/tfjob/models/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -# flake8: noqa -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -# import models into model package -from kubeflow.tfjob.models.v1_job_condition import V1JobCondition -from kubeflow.tfjob.models.v1_job_status import V1JobStatus -from kubeflow.tfjob.models.v1_replica_spec import V1ReplicaSpec -from kubeflow.tfjob.models.v1_replica_status import V1ReplicaStatus -from kubeflow.tfjob.models.v1_tf_job import V1TFJob -from kubeflow.tfjob.models.v1_tf_job_list import V1TFJobList -from kubeflow.tfjob.models.v1_tf_job_spec import V1TFJobSpec diff --git a/sdk/python/kubeflow/tfjob/models/v1_tf_job_spec.py b/sdk/python/kubeflow/tfjob/models/v1_tf_job_spec.py deleted file mode 100644 index b98bb8e7e8..0000000000 --- a/sdk/python/kubeflow/tfjob/models/v1_tf_job_spec.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -import pprint -import re # noqa: F401 - -import six - -from kubeflow.tfjob.models.v1_replica_spec import V1ReplicaSpec # noqa: F401,E501 - - -class V1TFJobSpec(object): - """NOTE: This class is auto generated by the swagger code generator program. - - Do not edit the class manually. - """ - - """ - Attributes: - swagger_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - swagger_types = { - 'active_deadline_seconds': 'int', - 'backoff_limit': 'int', - 'clean_pod_policy': 'str', - 'tf_replica_specs': 'dict(str, V1ReplicaSpec)', - 'ttl_seconds_after_finished': 'int' - } - - attribute_map = { - 'active_deadline_seconds': 'activeDeadlineSeconds', - 'backoff_limit': 'backoffLimit', - 'clean_pod_policy': 'cleanPodPolicy', - 'tf_replica_specs': 'tfReplicaSpecs', - 'ttl_seconds_after_finished': 'ttlSecondsAfterFinished' - } - - def __init__(self, active_deadline_seconds=None, backoff_limit=None, clean_pod_policy=None, tf_replica_specs=None, ttl_seconds_after_finished=None): # noqa: E501 - """V1TFJobSpec - a model defined in Swagger""" # noqa: E501 - - self._active_deadline_seconds = None - self._backoff_limit = None - self._clean_pod_policy = None - self._tf_replica_specs = None - self._ttl_seconds_after_finished = None - self.discriminator = None - - if active_deadline_seconds is not None: - self.active_deadline_seconds = active_deadline_seconds - if backoff_limit is not None: - self.backoff_limit = backoff_limit - if clean_pod_policy is not None: - self.clean_pod_policy = clean_pod_policy - self.tf_replica_specs = tf_replica_specs - if ttl_seconds_after_finished is not None: - self.ttl_seconds_after_finished = ttl_seconds_after_finished - - @property - def active_deadline_seconds(self): - """Gets the active_deadline_seconds of this V1TFJobSpec. # noqa: E501 - - Specifies the duration (in seconds) since startTime during which the job can remain active before it is terminated. Must be a positive integer. This setting applies only to pods where restartPolicy is OnFailure or Always. # noqa: E501 - - :return: The active_deadline_seconds of this V1TFJobSpec. # noqa: E501 - :rtype: int - """ - return self._active_deadline_seconds - - @active_deadline_seconds.setter - def active_deadline_seconds(self, active_deadline_seconds): - """Sets the active_deadline_seconds of this V1TFJobSpec. - - Specifies the duration (in seconds) since startTime during which the job can remain active before it is terminated. Must be a positive integer. This setting applies only to pods where restartPolicy is OnFailure or Always. # noqa: E501 - - :param active_deadline_seconds: The active_deadline_seconds of this V1TFJobSpec. # noqa: E501 - :type: int - """ - - self._active_deadline_seconds = active_deadline_seconds - - @property - def backoff_limit(self): - """Gets the backoff_limit of this V1TFJobSpec. # noqa: E501 - - Number of retries before marking this job as failed. # noqa: E501 - - :return: The backoff_limit of this V1TFJobSpec. # noqa: E501 - :rtype: int - """ - return self._backoff_limit - - @backoff_limit.setter - def backoff_limit(self, backoff_limit): - """Sets the backoff_limit of this V1TFJobSpec. - - Number of retries before marking this job as failed. # noqa: E501 - - :param backoff_limit: The backoff_limit of this V1TFJobSpec. # noqa: E501 - :type: int - """ - - self._backoff_limit = backoff_limit - - @property - def clean_pod_policy(self): - """Gets the clean_pod_policy of this V1TFJobSpec. # noqa: E501 - - Defines the policy for cleaning up pods after the TFJob completes. Defaults to Running. # noqa: E501 - - :return: The clean_pod_policy of this V1TFJobSpec. # noqa: E501 - :rtype: str - """ - return self._clean_pod_policy - - @clean_pod_policy.setter - def clean_pod_policy(self, clean_pod_policy): - """Sets the clean_pod_policy of this V1TFJobSpec. - - Defines the policy for cleaning up pods after the TFJob completes. Defaults to Running. # noqa: E501 - - :param clean_pod_policy: The clean_pod_policy of this V1TFJobSpec. # noqa: E501 - :type: str - """ - - self._clean_pod_policy = clean_pod_policy - - @property - def tf_replica_specs(self): - """Gets the tf_replica_specs of this V1TFJobSpec. # noqa: E501 - - A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } # noqa: E501 - - :return: The tf_replica_specs of this V1TFJobSpec. # noqa: E501 - :rtype: dict(str, V1ReplicaSpec) - """ - return self._tf_replica_specs - - @tf_replica_specs.setter - def tf_replica_specs(self, tf_replica_specs): - """Sets the tf_replica_specs of this V1TFJobSpec. - - A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } # noqa: E501 - - :param tf_replica_specs: The tf_replica_specs of this V1TFJobSpec. # noqa: E501 - :type: dict(str, V1ReplicaSpec) - """ - if tf_replica_specs is None: - raise ValueError("Invalid value for `tf_replica_specs`, must not be `None`") # noqa: E501 - - self._tf_replica_specs = tf_replica_specs - - @property - def ttl_seconds_after_finished(self): - """Gets the ttl_seconds_after_finished of this V1TFJobSpec. # noqa: E501 - - Defines the TTL for cleaning up finished TFJobs (temporary before kubernetes adds the cleanup controller). It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Defaults to infinite. # noqa: E501 - - :return: The ttl_seconds_after_finished of this V1TFJobSpec. # noqa: E501 - :rtype: int - """ - return self._ttl_seconds_after_finished - - @ttl_seconds_after_finished.setter - def ttl_seconds_after_finished(self, ttl_seconds_after_finished): - """Sets the ttl_seconds_after_finished of this V1TFJobSpec. - - Defines the TTL for cleaning up finished TFJobs (temporary before kubernetes adds the cleanup controller). It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Defaults to infinite. # noqa: E501 - - :param ttl_seconds_after_finished: The ttl_seconds_after_finished of this V1TFJobSpec. # noqa: E501 - :type: int - """ - - self._ttl_seconds_after_finished = ttl_seconds_after_finished - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.swagger_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - if issubclass(V1TFJobSpec, dict): - for key, value in self.items(): - result[key] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, V1TFJobSpec): - return False - - return self.__dict__ == other.__dict__ - - def __ne__(self, other): - """Returns true if both objects are not equal""" - return not self == other diff --git a/sdk/python/kubeflow/tfjob/models/v1_time.py b/sdk/python/kubeflow/tfjob/models/v1_time.py deleted file mode 100644 index d58e1cc5e7..0000000000 --- a/sdk/python/kubeflow/tfjob/models/v1_time.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -import pprint -import re # noqa: F401 - -import six - - -class V1Time(object): - """NOTE: This class is auto generated by the swagger code generator program. - - Do not edit the class manually. - """ - - """ - Attributes: - swagger_types (dict): The key is attribute name - and the value is attribute type. - attribute_map (dict): The key is attribute name - and the value is json key in definition. - """ - swagger_types = { - } - - attribute_map = { - } - - def __init__(self): # noqa: E501 - """V1Time - a model defined in Swagger""" # noqa: E501 - self.discriminator = None - - def to_dict(self): - """Returns the model properties as a dict""" - result = {} - - for attr, _ in six.iteritems(self.swagger_types): - value = getattr(self, attr) - if isinstance(value, list): - result[attr] = list(map( - lambda x: x.to_dict() if hasattr(x, "to_dict") else x, - value - )) - elif hasattr(value, "to_dict"): - result[attr] = value.to_dict() - elif isinstance(value, dict): - result[attr] = dict(map( - lambda item: (item[0], item[1].to_dict()) - if hasattr(item[1], "to_dict") else item, - value.items() - )) - else: - result[attr] = value - if issubclass(V1Time, dict): - for key, value in self.items(): - result[key] = value - - return result - - def to_str(self): - """Returns the string representation of the model""" - return pprint.pformat(self.to_dict()) - - def __repr__(self): - """For `print` and `pprint`""" - return self.to_str() - - def __eq__(self, other): - """Returns true if both objects are equal""" - if not isinstance(other, V1Time): - return False - - return self.__dict__ == other.__dict__ - - def __ne__(self, other): - """Returns true if both objects are not equal""" - return not self == other diff --git a/sdk/python/kubeflow/tfjob/utils/utils.py b/sdk/python/kubeflow/tfjob/utils/utils.py deleted file mode 100644 index e9db7c7f4f..0000000000 --- a/sdk/python/kubeflow/tfjob/utils/utils.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from kubeflow.tfjob.constants import constants - -def is_running_in_k8s(): - return os.path.isdir('/var/run/secrets/kubernetes.io/') - - -def get_current_k8s_namespace(): - with open('/var/run/secrets/kubernetes.io/serviceaccount/namespace', 'r') as f: - return f.readline() - - -def get_default_target_namespace(): - if not is_running_in_k8s(): - return 'default' - return get_current_k8s_namespace() - - -def set_tfjob_namespace(tfjob): - tfjob_namespace = tfjob.metadata.namespace - namespace = tfjob_namespace or get_default_target_namespace() - return namespace - - -def get_labels(name, master=False, replica_type=None, replica_index=None): - """ - Get labels according to speficed flags. - :param name: tfjob name - :param master: if need include label 'job-role: master'. - :param replica_type: User can specify one of 'worker, ps, chief to only' get one type pods. - :param replica_index: Can specfy replica index to get one pod of TFJob. - :return: Dict: Labels - """ - labels = { - constants.TFJOB_GROUP_LABEL: 'kubeflow.org', - constants.TFJOB_NAME_LABEL: name, - } - - if master: - labels[constants.TFJOB_ROLE_LABEL] = 'master' - - if replica_type: - labels[constants.TFJOB_TYPE_LABEL] = str.lower(replica_type) - - if replica_index: - labels[constants.TFJOB_INDEX_LABEL] = replica_index - - return labels - - -def to_selector(labels): - """ - Transfer Labels to selector. - """ - parts = [] - for key in labels.keys(): - parts.append("{0}={1}".format(key, labels[key])) - - return ",".join(parts) diff --git a/sdk/python/kubeflow/training/__init__.py b/sdk/python/kubeflow/training/__init__.py new file mode 100644 index 0000000000..027069720d --- /dev/null +++ b/sdk/python/kubeflow/training/__init__.py @@ -0,0 +1,48 @@ +# coding: utf-8 + +# flake8: noqa + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +__version__ = "1.3.0" + +# import apis into sdk package + +# import ApiClient +from kubeflow.training.api_client import ApiClient +from kubeflow.training.configuration import Configuration +from kubeflow.training.exceptions import OpenApiException +from kubeflow.training.exceptions import ApiTypeError +from kubeflow.training.exceptions import ApiValueError +from kubeflow.training.exceptions import ApiKeyError +from kubeflow.training.exceptions import ApiException +# import models into sdk package +from kubeflow.training.models.v1_job_condition import V1JobCondition +from kubeflow.training.models.v1_job_status import V1JobStatus +from kubeflow.training.models.v1_mx_job import V1MXJob +from kubeflow.training.models.v1_mx_job_list import V1MXJobList +from kubeflow.training.models.v1_mx_job_spec import V1MXJobSpec +from kubeflow.training.models.v1_py_torch_job import V1PyTorchJob +from kubeflow.training.models.v1_py_torch_job_list import V1PyTorchJobList +from kubeflow.training.models.v1_py_torch_job_spec import V1PyTorchJobSpec +from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec +from kubeflow.training.models.v1_replica_status import V1ReplicaStatus +from kubeflow.training.models.v1_run_policy import V1RunPolicy +from kubeflow.training.models.v1_scheduling_policy import V1SchedulingPolicy +from kubeflow.training.models.v1_tf_job import V1TFJob +from kubeflow.training.models.v1_tf_job_list import V1TFJobList +from kubeflow.training.models.v1_tf_job_spec import V1TFJobSpec +from kubeflow.training.models.v1_xg_boost_job import V1XGBoostJob +from kubeflow.training.models.v1_xg_boost_job_list import V1XGBoostJobList +from kubeflow.training.models.v1_xg_boost_job_spec import V1XGBoostJobSpec + diff --git a/sdk/python/kubeflow/training/api/__init__.py b/sdk/python/kubeflow/training/api/__init__.py new file mode 100644 index 0000000000..36dce7fe22 --- /dev/null +++ b/sdk/python/kubeflow/training/api/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +# flake8: noqa + +# import apis into api package diff --git a/sdk/python/kubeflow/training/api/py_torch_job_client.py b/sdk/python/kubeflow/training/api/py_torch_job_client.py new file mode 100644 index 0000000000..cf62fa8f1a --- /dev/null +++ b/sdk/python/kubeflow/training/api/py_torch_job_client.py @@ -0,0 +1,384 @@ +# Copyright 2019 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import multiprocessing +import time +import logging + +from kubernetes import client, config + +from kubeflow.training.constants import constants +from kubeflow.training.utils import utils + +from .py_torch_job_watch import watch as pytorchjob_watch + +logging.basicConfig(format='%(message)s') +logging.getLogger().setLevel(logging.INFO) + + +class PyTorchJobClient(object): + def __init__(self, config_file=None, context=None, # pylint: disable=too-many-arguments + client_configuration=None, persist_config=True): + """ + PyTorchJob client constructor + :param config_file: kubeconfig file, defaults to ~/.kube/config + :param context: kubernetes context + :param client_configuration: kubernetes configuration object + :param persist_config: + """ + if config_file or not utils.is_running_in_k8s(): + config.load_kube_config( + config_file=config_file, + context=context, + client_configuration=client_configuration, + persist_config=persist_config) + else: + config.load_incluster_config() + + self.custom_api = client.CustomObjectsApi() + self.core_api = client.CoreV1Api() + + def create(self, pytorchjob, namespace=None): + """ + Create the PyTorchJob + :param pytorchjob: pytorchjob object + :param namespace: defaults to current or default namespace + :return: created pytorchjob + """ + + if namespace is None: + namespace = utils.set_pytorchjob_namespace(pytorchjob) + + try: + outputs = self.custom_api.create_namespaced_custom_object( + constants.PYTORCHJOB_GROUP, + constants.PYTORCHJOB_VERSION, + namespace, + constants.PYTORCHJOB_PLURAL, + pytorchjob) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->create_namespaced_custom_object:\ + %s\n" % e) + + return outputs + + def get(self, name=None, namespace=None, watch=False, + timeout_seconds=600): # pylint: disable=inconsistent-return-statements + """ + Get the pytorchjob + :param name: existing pytorchjob name, if not defined, get all pytorchjobs in the namespace. + :param namespace: defaults to current or default namespace + :param watch: Watch the pytorchjob if `True`. + :param timeout_seconds: How long to watch the pytorchjob. + :return: pytorchjob + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + if name: + if watch: + pytorchjob_watch( + name=name, + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + thread = self.custom_api.get_namespaced_custom_object( + constants.PYTORCHJOB_GROUP, + constants.PYTORCHJOB_VERSION, + namespace, + constants.PYTORCHJOB_PLURAL, + name, + async_req=True) + + pytorchjob = None + try: + pytorchjob = thread.get(constants.APISERVER_TIMEOUT) + except multiprocessing.TimeoutError: + raise RuntimeError("Timeout trying to get PyTorchJob.") + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->get_namespaced_custom_object:\ + %s\n" % e) + except Exception as e: + raise RuntimeError( + "There was a problem to get PyTorchJob {0} in namespace {1}. Exception: \ + {2} ".format(name, namespace, e)) + return pytorchjob + else: + if watch: + pytorchjob_watch( + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + thread = self.custom_api.list_namespaced_custom_object( + constants.PYTORCHJOB_GROUP, + constants.PYTORCHJOB_VERSION, + namespace, + constants.PYTORCHJOB_PLURAL, + async_req=True) + + pytorchjob = None + try: + pytorchjob = thread.get(constants.APISERVER_TIMEOUT) + except multiprocessing.TimeoutError: + raise RuntimeError("Timeout trying to get PyTorchJob.") + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->list_namespaced_custom_object: \ + %s\n" % e) + except Exception as e: + raise RuntimeError( + "There was a problem to List PyTorchJob in namespace {0}. \ + Exception: {1} ".format(namespace, e)) + + return pytorchjob + + def patch(self, name, pytorchjob, namespace=None): + """ + Patch existing pytorchjob + :param name: existing pytorchjob name + :param pytorchjob: patched pytorchjob + :param namespace: defaults to current or default namespace + :return: patched pytorchjob + """ + if namespace is None: + namespace = utils.set_pytorchjob_namespace(pytorchjob) + + try: + outputs = self.custom_api.patch_namespaced_custom_object( + constants.PYTORCHJOB_GROUP, + constants.PYTORCHJOB_VERSION, + namespace, + constants.PYTORCHJOB_PLURAL, + name, + pytorchjob) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->patch_namespaced_custom_object:\ + %s\n" % e) + + return outputs + + def delete(self, name, namespace=None): + """ + Delete the pytorchjob + :param name: pytorchjob name + :param namespace: defaults to current or default namespace + :return: + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + try: + return self.custom_api.delete_namespaced_custom_object( + constants.PYTORCHJOB_GROUP, + constants.PYTORCHJOB_VERSION, + namespace, + constants.PYTORCHJOB_PLURAL, + name, + client.V1DeleteOptions()) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->delete_namespaced_custom_object:\ + %s\n" % e) + + def wait_for_job(self, name, # pylint: disable=inconsistent-return-statements + namespace=None, + watch=False, + timeout_seconds=600, + polling_interval=30, + status_callback=None): + """Wait for the specified job to finish. + + :param name: Name of the PyTorchJob. + :param namespace: defaults to current or default namespace. + :param timeout_seconds: How long to wait for the job. + :param polling_interval: How often to poll for the status of the job. + :param status_callback: (Optional): Callable. If supplied this callable is + invoked after we poll the job. Callable takes a single argument which + is the job. + :return: + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + if watch: + pytorchjob_watch( + name=name, + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + return self.wait_for_condition( + name, + ["Succeeded", "Failed"], + namespace=namespace, + timeout_seconds=timeout_seconds, + polling_interval=polling_interval, + status_callback=status_callback) + + def wait_for_condition(self, name, + expected_condition, + namespace=None, + timeout_seconds=600, + polling_interval=30, + status_callback=None): + """Waits until any of the specified conditions occur. + + :param name: Name of the job. + :param expected_condition: A list of conditions. Function waits until any of the + supplied conditions is reached. + :param namespace: defaults to current or default namespace. + :param timeout_seconds: How long to wait for the job. + :param polling_interval: How often to poll for the status of the job. + :param status_callback: (Optional): Callable. If supplied this callable is + invoked after we poll the job. Callable takes a single argument which + is the job. + :return: Object: PyTorchJob + """ + + if namespace is None: + namespace = utils.get_default_target_namespace() + + for _ in range(round(timeout_seconds / polling_interval)): + + pytorchjob = None + pytorchjob = self.get(name, namespace=namespace) + + if pytorchjob: + if status_callback: + status_callback(pytorchjob) + + # If we poll the CRD quick enough status won't have been set yet. + conditions = pytorchjob.get("status", {}).get("conditions", []) + # Conditions might have a value of None in status. + conditions = conditions or [] + for c in conditions: + if c.get("type", "") in expected_condition: + return pytorchjob + + time.sleep(polling_interval) + + raise RuntimeError( + "Timeout waiting for PyTorchJob {0} in namespace {1} to enter one of the " + "conditions {2}.".format(name, namespace, expected_condition), pytorchjob) + + def get_job_status(self, name, namespace=None): + """Returns PyTorchJob status, such as Running, Failed or Succeeded. + + :param name: The PyTorchJob name. + :param namespace: defaults to current or default namespace. + :return: str: PyTorchJob status + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + pytorchjob = self.get(name, namespace=namespace) + last_condition = pytorchjob.get("status", {}).get("conditions", [])[-1] + return last_condition.get("type", "") + + def is_job_running(self, name, namespace=None): + """Returns true if the PyTorchJob running; false otherwise. + + :param name: The PyTorchJob name. + :param namespace: defaults to current or default namespace. + :return: True or False + """ + pytorchjob_status = self.get_job_status(name, namespace=namespace) + return pytorchjob_status.lower() == "running" + + def is_job_succeeded(self, name, namespace=None): + """Returns true if the PyTorchJob succeeded; false otherwise. + + :param name: The PyTorchJob name. + :param namespace: defaults to current or default namespace. + :return: True or False + """ + pytorchjob_status = self.get_job_status(name, namespace=namespace) + return pytorchjob_status.lower() == "succeeded" + + def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=inconsistent-return-statements + replica_type=None, replica_index=None): + """ + Get pod names of PyTorchJob. + :param name: PyTorchJob name + :param namespace: defaults to current or default namespace. + :param master: Only get pod with label 'job-role: master' pod if True. + :param replica_type: User can specify one of 'master, worker' to only get one type pods. + By default get all type pods. + :param replica_index: User can specfy replica index to get one pod of PyTorchJob. + :return: set: pods name + """ + + if namespace is None: + namespace = utils.get_default_target_namespace() + + labels = utils.get_pytorchjob_labels(name, master=master, + replica_type=replica_type, + replica_index=replica_index) + + try: + resp = self.core_api.list_namespaced_pod( + namespace, label_selector=utils.to_selector(labels)) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) + + pod_names = [] + for pod in resp.items: + if pod.metadata and pod.metadata.name: + pod_names.append(pod.metadata.name) + + if not pod_names: + logging.warning("Not found Pods of the PyTorchJob %s with the labels %s.", name, labels) + else: + return set(pod_names) + + def get_logs(self, name, namespace=None, master=True, + replica_type=None, replica_index=None, + follow=False): + """ + Get training logs of the PyTorchJob. + By default only get the logs of Pod that has labels 'job-role: master'. + :param name: PyTorchJob name + :param namespace: defaults to current or default namespace. + :param master: By default get pod with label 'job-role: master' pod if True. + If need to get more Pod Logs, set False. + :param replica_type: User can specify one of 'master, worker' to only get one type pods. + By default get all type pods. + :param replica_index: User can specfy replica index to get one pod of PyTorchJob. + :param follow: Follow the log stream of the pod. Defaults to false. + :return: str: pods logs + """ + + if namespace is None: + namespace = utils.get_default_target_namespace() + + pod_names = self.get_pod_names(name, namespace=namespace, + master=master, + replica_type=replica_type, + replica_index=replica_index) + + if pod_names: + for pod in pod_names: + try: + pod_logs = self.core_api.read_namespaced_pod_log( + pod, namespace, follow=follow) + logging.info("The logs of Pod %s:\n %s", pod, pod_logs) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) + else: + raise RuntimeError("Not found Pods of the PyTorchJob {} " + "in namespace {}".format(name, namespace)) diff --git a/sdk/python/kubeflow/training/api/py_torch_job_watch.py b/sdk/python/kubeflow/training/api/py_torch_job_watch.py new file mode 100644 index 0000000000..dcbf1469d3 --- /dev/null +++ b/sdk/python/kubeflow/training/api/py_torch_job_watch.py @@ -0,0 +1,60 @@ +# Copyright 2020 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import retrying +from kubernetes import client +from kubernetes import watch as k8s_watch +from table_logger import TableLogger + +from kubeflow.training.constants import constants +from kubeflow.training.utils import utils + +tbl = TableLogger( + columns='NAME,STATE,TIME', + colwidth={'NAME': 30, 'STATE': 20, 'TIME': 30}, + border=False) + + +@retrying.retry(wait_fixed=1000, stop_max_attempt_number=20) +def watch(name=None, namespace=None, timeout_seconds=600): + """Watch the created or patched InferenceService in the specified namespace""" + + if namespace is None: + namespace = utils.get_default_target_namespace() + + stream = k8s_watch.Watch().stream( + client.CustomObjectsApi().list_namespaced_custom_object, + constants.PYTORCHJOB_GROUP, + constants.PYTORCHJOB_VERSION, + namespace, + constants.PYTORCHJOB_PLURAL, + timeout_seconds=timeout_seconds) + + for event in stream: + pytorchjob = event['object'] + pytorchjob_name = pytorchjob['metadata']['name'] + if name and name != pytorchjob_name: + continue + else: + status = '' + update_time = '' + last_condition = pytorchjob.get('status', {}).get('conditions', [])[-1] + status = last_condition.get('type', '') + update_time = last_condition.get('lastTransitionTime', '') + + tbl(pytorchjob_name, status, update_time) + + if name == pytorchjob_name: + if status == 'Succeeded' or status == 'Failed': + break diff --git a/sdk/python/kubeflow/training/api/tf_job_client.py b/sdk/python/kubeflow/training/api/tf_job_client.py new file mode 100644 index 0000000000..a9d8cf4802 --- /dev/null +++ b/sdk/python/kubeflow/training/api/tf_job_client.py @@ -0,0 +1,434 @@ +# Copyright 2019 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import multiprocessing +import time +import logging +import threading +import queue + +from kubernetes import client, config +from kubernetes import watch as k8s_watch + +from kubeflow.training.constants import constants +from kubeflow.training.utils import utils + +from .tf_job_watch import watch as tfjob_watch + +logging.basicConfig(format='%(message)s') +logging.getLogger().setLevel(logging.INFO) + + +def wrap_log_stream(q, stream): + while True: + try: + logline = next(stream) + q.put(logline) + except StopIteration: + q.put(None) + return + except Exception as e: + raise RuntimeError( + "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) + + +def get_log_queue_pool(streams): + pool = [] + for stream in streams: + q = queue.Queue(maxsize=100) + pool.append(q) + threading.Thread(target=wrap_log_stream, args=(q, stream)).start() + return pool + + +class TFJobClient(object): + def __init__(self, config_file=None, context=None, # pylint: disable=too-many-arguments + client_configuration=None, persist_config=True): + """ + TFJob client constructor + :param config_file: kubeconfig file, defaults to ~/.kube/config + :param context: kubernetes context + :param client_configuration: kubernetes configuration object + :param persist_config: + """ + if config_file or not utils.is_running_in_k8s(): + config.load_kube_config( + config_file=config_file, + context=context, + client_configuration=client_configuration, + persist_config=persist_config) + else: + config.load_incluster_config() + + self.custom_api = client.CustomObjectsApi() + self.core_api = client.CoreV1Api() + + def create(self, tfjob, namespace=None): + """ + Create the TFJob + :param tfjob: tfjob object + :param namespace: defaults to current or default namespace + :return: created tfjob + """ + + if namespace is None: + namespace = utils.set_tfjob_namespace(tfjob) + + try: + outputs = self.custom_api.create_namespaced_custom_object( + constants.TFJOB_GROUP, + constants.TFJOB_VERSION, + namespace, + constants.TFJOB_PLURAL, + tfjob) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->create_namespaced_custom_object:\ + %s\n" % e) + + return outputs + + def get(self, name=None, namespace=None, watch=False, + timeout_seconds=600): # pylint: disable=inconsistent-return-statements + """ + Get the tfjob + :param name: existing tfjob name, if not defined, the get all tfjobs in the namespace. + :param namespace: defaults to current or default namespace + :param watch: Watch the TFJob if `True`. + :param timeout_seconds: How long to watch the job.. + :return: tfjob + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + if name: + if watch: + tfjob_watch( + name=name, + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + thread = self.custom_api.get_namespaced_custom_object( + constants.TFJOB_GROUP, + constants.TFJOB_VERSION, + namespace, + constants.TFJOB_PLURAL, + name, + async_req=True) + + tfjob = None + try: + tfjob = thread.get(constants.APISERVER_TIMEOUT) + except multiprocessing.TimeoutError: + raise RuntimeError("Timeout trying to get TFJob.") + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->get_namespaced_custom_object:\ + %s\n" % e) + except Exception as e: + raise RuntimeError( + "There was a problem to get TFJob {0} in namespace {1}. Exception: \ + {2} ".format(name, namespace, e)) + return tfjob + else: + if watch: + tfjob_watch( + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + thread = self.custom_api.list_namespaced_custom_object( + constants.TFJOB_GROUP, + constants.TFJOB_VERSION, + namespace, + constants.TFJOB_PLURAL, + async_req=True) + + tfjobs = None + try: + tfjobs = thread.get(constants.APISERVER_TIMEOUT) + except multiprocessing.TimeoutError: + raise RuntimeError("Timeout trying to get TFJob.") + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->list_namespaced_custom_object:\ + %s\n" % e) + except Exception as e: + raise RuntimeError( + "There was a problem to list TFJobs in namespace {0}. \ + Exception: {1} ".format(namespace, e)) + return tfjobs + + def patch(self, name, tfjob, namespace=None): + """ + Patch existing tfjob + :param name: existing tfjob name + :param tfjob: patched tfjob + :param namespace: defaults to current or default namespace + :return: patched tfjob + """ + if namespace is None: + namespace = utils.set_tfjob_namespace(tfjob) + + try: + outputs = self.custom_api.patch_namespaced_custom_object( + constants.TFJOB_GROUP, + constants.TFJOB_VERSION, + namespace, + constants.TFJOB_PLURAL, + name, + tfjob) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->patch_namespaced_custom_object:\ + %s\n" % e) + + return outputs + + def delete(self, name, namespace=None): + """ + Delete the tfjob + :param name: tfjob name + :param namespace: defaults to current or default namespace + :return: + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + try: + return self.custom_api.delete_namespaced_custom_object( + group=constants.TFJOB_GROUP, + version=constants.TFJOB_VERSION, + namespace=namespace, + plural=constants.TFJOB_PLURAL, + name=name, + body=client.V1DeleteOptions()) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CustomObjectsApi->delete_namespaced_custom_object:\ + %s\n" % e) + + def wait_for_job(self, name, # pylint: disable=inconsistent-return-statements + namespace=None, + timeout_seconds=600, + polling_interval=30, + watch=False, + status_callback=None): + """Wait for the specified job to finish. + + :param name: Name of the TfJob. + :param namespace: defaults to current or default namespace. + :param timeout_seconds: How long to wait for the job. + :param polling_interval: How often to poll for the status of the job. + :param watch: Watch the TFJob if `True`. + :param status_callback: (Optional): Callable. If supplied this callable is + invoked after we poll the job. Callable takes a single argument which + is the job. + :return: + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + if watch: + tfjob_watch( + name=name, + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + return self.wait_for_condition( + name, + ["Succeeded", "Failed"], + namespace=namespace, + timeout_seconds=timeout_seconds, + polling_interval=polling_interval, + status_callback=status_callback) + + def wait_for_condition(self, name, + expected_condition, + namespace=None, + timeout_seconds=600, + polling_interval=30, + status_callback=None): + """Waits until any of the specified conditions occur. + + :param name: Name of the job. + :param expected_condition: A list of conditions. Function waits until any of the + supplied conditions is reached. + :param namespace: defaults to current or default namespace. + :param timeout_seconds: How long to wait for the job. + :param polling_interval: How often to poll for the status of the job. + :param status_callback: (Optional): Callable. If supplied this callable is + invoked after we poll the job. Callable takes a single argument which + is the job. + :return: Object TFJob status + """ + + if namespace is None: + namespace = utils.get_default_target_namespace() + + for _ in range(round(timeout_seconds / polling_interval)): + + tfjob = None + tfjob = self.get(name, namespace=namespace) + + if tfjob: + if status_callback: + status_callback(tfjob) + + # If we poll the CRD quick enough status won't have been set yet. + conditions = tfjob.get("status", {}).get("conditions", []) + # Conditions might have a value of None in status. + conditions = conditions or [] + for c in conditions: + if c.get("type", "") in expected_condition: + return tfjob + + time.sleep(polling_interval) + + raise RuntimeError( + "Timeout waiting for TFJob {0} in namespace {1} to enter one of the " + "conditions {2}.".format(name, namespace, expected_condition), tfjob) + + def get_job_status(self, name, namespace=None): + """Returns TFJob status, such as Running, Failed or Succeeded. + + :param name: The TFJob name. + :param namespace: defaults to current or default namespace. + :return: Object TFJob status + """ + if namespace is None: + namespace = utils.get_default_target_namespace() + + tfjob = self.get(name, namespace=namespace) + last_condition = tfjob.get("status", {}).get("conditions", [{}])[-1] + return last_condition.get("type", "") + + def is_job_running(self, name, namespace=None): + """Returns true if the TFJob running; false otherwise. + + :param name: The TFJob name. + :param namespace: defaults to current or default namespace. + :return: True or False + """ + tfjob_status = self.get_job_status(name, namespace=namespace) + return tfjob_status.lower() == "running" + + def is_job_succeeded(self, name, namespace=None): + """Returns true if the TFJob succeeded; false otherwise. + + :param name: The TFJob name. + :param namespace: defaults to current or default namespace. + :return: True or False + """ + tfjob_status = self.get_job_status(name, namespace=namespace) + return tfjob_status.lower() == "succeeded" + + def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=inconsistent-return-statements + replica_type=None, replica_index=None): + """ + Get pod names of TFJob. + :param name: tfjob name + :param namespace: defaults to current or default namespace. + :param master: Only get pod with label 'job-role: master' pod if True. + :param replica_type: User can specify one of 'worker, ps, chief' to only get one type pods. + By default get all type pods. + :param replica_index: User can specfy replica index to get one pod of TFJob. + :return: set: pods name + """ + + if namespace is None: + namespace = utils.get_default_target_namespace() + + labels = utils.get_tfjob_labels(name, master=master, + replica_type=replica_type, + replica_index=replica_index) + + try: + resp = self.core_api.list_namespaced_pod( + namespace, label_selector=utils.to_selector(labels)) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) + + pod_names = [] + for pod in resp.items: + if pod.metadata and pod.metadata.name: + pod_names.append(pod.metadata.name) + + if not pod_names: + logging.warning("Not found Pods of the TFJob %s with the labels %s.", name, labels) + else: + return set(pod_names) + + def get_logs(self, name, namespace=None, master=True, + replica_type=None, replica_index=None, + follow=False): + """ + Get training logs of the TFJob. + By default only get the logs of Pod that has labels 'job-role: master'. + :param name: tfjob name + :param namespace: defaults to current or default namespace. + :param master: By default get pod with label 'job-role: master' pod if True. + If need to get more Pod Logs, set False. + :param replica_type: User can specify one of 'worker, ps, chief' to only get one type pods. + By default get all type pods. + :param replica_index: User can specfy replica index to get one pod of TFJob. + :param follow: Follow the log stream of the pod. Defaults to false. + :return: str: pods logs + """ + + if namespace is None: + namespace = utils.get_default_target_namespace() + + pod_names = list(self.get_pod_names(name, namespace=namespace, + master=master, + replica_type=replica_type, + replica_index=replica_index)) + if pod_names: + if follow: + log_streams = [] + for pod in pod_names: + log_streams.append(k8s_watch.Watch().stream(self.core_api.read_namespaced_pod_log, + name=pod, namespace=namespace)) + finished = [False for _ in log_streams] + + # create thread and queue per stream, for non-blocking iteration + log_queue_pool = get_log_queue_pool(log_streams) + + # iterate over every watching pods' log queue + while True: + for index, log_queue in enumerate(log_queue_pool): + if all(finished): + return + if finished[index]: + continue + # grouping the every 50 log lines of the same pod + for _ in range(50): + try: + logline = log_queue.get(timeout=1) + if logline is None: + finished[index] = True + break + logging.info("[Pod %s]: %s", pod_names[index], logline) + except queue.Empty: + break + else: + for pod in pod_names: + try: + pod_logs = self.core_api.read_namespaced_pod_log(pod, namespace) + logging.info("The logs of Pod %s:\n %s", pod, pod_logs) + except client.rest.ApiException as e: + raise RuntimeError( + "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) + else: + raise RuntimeError("Not found Pods of the TFJob {} " + "in namespace {}".format(name, namespace)) diff --git a/sdk/python/kubeflow/training/api/tf_job_watch.py b/sdk/python/kubeflow/training/api/tf_job_watch.py new file mode 100644 index 0000000000..36c1fc56f4 --- /dev/null +++ b/sdk/python/kubeflow/training/api/tf_job_watch.py @@ -0,0 +1,60 @@ +# Copyright 2021 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import retrying +from kubernetes import client +from kubernetes import watch as k8s_watch +from table_logger import TableLogger + +from kubeflow.training.constants import constants +from kubeflow.training.utils import utils + +tbl = TableLogger( + columns='NAME,STATE,TIME', + colwidth={'NAME': 30, 'STATE': 20, 'TIME': 30}, + border=False) + + +@retrying.retry(wait_fixed=1000, stop_max_attempt_number=20) +def watch(name=None, namespace=None, timeout_seconds=600): + """Watch the created or patched InferenceService in the specified namespace""" + + if namespace is None: + namespace = utils.get_default_target_namespace() + + stream = k8s_watch.Watch().stream( + client.CustomObjectsApi().list_namespaced_custom_object, + constants.TFJOB_GROUP, + constants.TFJOB_VERSION, + namespace, + constants.TFJOB_PLURAL, + timeout_seconds=timeout_seconds) + + for event in stream: + tfjob = event['object'] + tfjob_name = tfjob['metadata']['name'] + if name and name != tfjob_name: + continue + else: + status = '' + update_time = '' + last_condition = tfjob.get('status', {}).get('conditions', [{}])[-1] + status = last_condition.get('type', '') + update_time = last_condition.get('lastTransitionTime', '') + + tbl(tfjob_name, status, update_time) + + if name == tfjob_name: + if status == 'Succeeded' or status == 'Failed': + break diff --git a/sdk/python/kubeflow/tfjob/api_client.py b/sdk/python/kubeflow/training/api_client.py similarity index 79% rename from sdk/python/kubeflow/tfjob/api_client.py rename to sdk/python/kubeflow/training/api_client.py index 0696ff0294..5527be1f41 100644 --- a/sdk/python/kubeflow/tfjob/api_client.py +++ b/sdk/python/kubeflow/training/api_client.py @@ -1,31 +1,18 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ from __future__ import absolute_import +import atexit import datetime +from dateutil.parser import parse import json import mimetypes from multiprocessing.pool import ThreadPool @@ -37,21 +24,22 @@ import six from six.moves.urllib.parse import quote -from kubeflow.tfjob.configuration import Configuration -import kubeflow.tfjob.models -from kubeflow.tfjob import rest +from kubeflow.training.configuration import Configuration +import kubeflow.training.models +from kubeflow.training import rest +from kubeflow.training.exceptions import ApiValueError, ApiException class ApiClient(object): - """Generic API client for Swagger client library builds. + """Generic API client for OpenAPI client library builds. - Swagger generic API client. This client handles the client- + OpenAPI generic API client. This client handles the client- server communication, and is invariant across implementations. Specifics of - the methods and models for each application are generated from the Swagger + the methods and models for each application are generated from the OpenAPI templates. - NOTE: This class is auto generated by the swagger code generator program. - Ref: https://github.com/swagger-api/swagger-codegen + NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. :param configuration: .Configuration object for this client @@ -60,6 +48,8 @@ class ApiClient(object): the API. :param cookie: a cookie to include in the header when making calls to the API + :param pool_threads: The number of threads to use for async requests + to the API. More threads means more concurrent API requests. """ PRIMITIVE_TYPES = (float, bool, bytes, six.text_type) + six.integer_types @@ -73,32 +63,46 @@ class ApiClient(object): 'datetime': datetime.datetime, 'object': object, } + _pool = None def __init__(self, configuration=None, header_name=None, header_value=None, - cookie=None): + cookie=None, pool_threads=1): if configuration is None: - configuration = Configuration() + configuration = Configuration.get_default_copy() self.configuration = configuration + self.pool_threads = pool_threads - # Use the pool property to lazily initialize the ThreadPool. - self._pool = None self.rest_client = rest.RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'Swagger-Codegen/0.1/python' + self.user_agent = 'OpenAPI-Generator/1.3.0/python' + self.client_side_validation = configuration.client_side_validation + + def __enter__(self): + return self - def __del__(self): - if self._pool is not None: + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def close(self): + if self._pool: self._pool.close() self._pool.join() + self._pool = None + if hasattr(atexit, 'unregister'): + atexit.unregister(self.close) @property def pool(self): + """Create thread pool on first request + avoids instantiating unused threadpool for blocking clients. + """ if self._pool is None: - self._pool = ThreadPool() + atexit.register(self.close) + self._pool = ThreadPool(self.pool_threads) return self._pool @property @@ -118,7 +122,7 @@ def __call_api( query_params=None, header_params=None, body=None, post_params=None, files=None, response_type=None, auth_settings=None, _return_http_data_only=None, collection_formats=None, - _preload_content=True, _request_timeout=None): + _preload_content=True, _request_timeout=None, _host=None): config = self.configuration @@ -152,10 +156,11 @@ def __call_api( # post parameters if post_params or files: - post_params = self.prepare_post_parameters(post_params, files) + post_params = post_params if post_params else [] post_params = self.sanitize_for_serialization(post_params) post_params = self.parameters_to_tuples(post_params, collection_formats) + post_params.extend(self.files_parameters(files)) # auth setting self.update_params_for_auth(header_params, query_params, auth_settings) @@ -165,24 +170,44 @@ def __call_api( body = self.sanitize_for_serialization(body) # request url - url = self.configuration.host + resource_path + if _host is None: + url = self.configuration.host + resource_path + else: + # use server/host defined in path or operation instead + url = _host + resource_path - # perform request and return response - response_data = self.request( - method, url, query_params=query_params, headers=header_params, - post_params=post_params, body=body, - _preload_content=_preload_content, - _request_timeout=_request_timeout) + try: + # perform request and return response + response_data = self.request( + method, url, query_params=query_params, headers=header_params, + post_params=post_params, body=body, + _preload_content=_preload_content, + _request_timeout=_request_timeout) + except ApiException as e: + e.body = e.body.decode('utf-8') if six.PY3 else e.body + raise e + + content_type = response_data.getheader('content-type') self.last_response = response_data return_data = response_data - if _preload_content: - # deserialize response data - if response_type: - return_data = self.deserialize(response_data, response_type) - else: - return_data = None + + if not _preload_content: + return return_data + + if six.PY3 and response_type not in ["file", "bytes"]: + match = None + if content_type is not None: + match = re.search(r"charset=([a-zA-Z\-\d]+)[\s\;]?", content_type) + encoding = match.group(1) if match else "utf-8" + response_data.data = response_data.data.decode(encoding) + + # deserialize response data + if response_type: + return_data = self.deserialize(response_data, response_type) + else: + return_data = None if _return_http_data_only: return (return_data) @@ -199,7 +224,7 @@ def sanitize_for_serialization(self, obj): convert to string in iso8601 format. If obj is list, sanitize each element in the list. If obj is dict, return the dict. - If obj is swagger model, return the properties dict. + If obj is OpenAPI model, return the properties dict. :param obj: The data to serialize. :return: The serialized form of data. @@ -221,12 +246,12 @@ def sanitize_for_serialization(self, obj): obj_dict = obj else: # Convert model obj to dict except - # attributes `swagger_types`, `attribute_map` + # attributes `openapi_types`, `attribute_map` # and attributes which value is not None. # Convert attribute name to json key in # model definition for request. obj_dict = {obj.attribute_map[attr]: getattr(obj, attr) - for attr, _ in six.iteritems(obj.swagger_types) + for attr, _ in six.iteritems(obj.openapi_types) if getattr(obj, attr) is not None} return {key: self.sanitize_for_serialization(val) @@ -280,7 +305,7 @@ def __deserialize(self, data, klass): if klass in self.NATIVE_TYPES_MAPPING: klass = self.NATIVE_TYPES_MAPPING[klass] else: - klass = getattr(tfjob.models, klass) + klass = getattr(kubeflow.training.models, klass) if klass in self.PRIMITIVE_TYPES: return self.__deserialize_primitive(data, klass) @@ -289,7 +314,7 @@ def __deserialize(self, data, klass): elif klass == datetime.date: return self.__deserialize_date(data) elif klass == datetime.datetime: - return self.__deserialize_datatime(data) + return self.__deserialize_datetime(data) else: return self.__deserialize_model(data, klass) @@ -298,10 +323,10 @@ def call_api(self, resource_path, method, body=None, post_params=None, files=None, response_type=None, auth_settings=None, async_req=None, _return_http_data_only=None, collection_formats=None, - _preload_content=True, _request_timeout=None): + _preload_content=True, _request_timeout=None, _host=None): """Makes the HTTP request (synchronous) and returns deserialized data. - To make an async request, set the async_req parameter. + To make an async_req request, set the async_req parameter. :param resource_path: Path to method endpoint. :param method: Method to call. @@ -341,17 +366,20 @@ def call_api(self, resource_path, method, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, - _preload_content, _request_timeout) - else: - thread = self.pool.apply_async(self.__call_api, (resource_path, - method, path_params, query_params, - header_params, body, - post_params, files, - response_type, auth_settings, - _return_http_data_only, - collection_formats, - _preload_content, _request_timeout)) - return thread + _preload_content, _request_timeout, _host) + + return self.pool.apply_async(self.__call_api, (resource_path, + method, path_params, + query_params, + header_params, body, + post_params, files, + response_type, + auth_settings, + _return_http_data_only, + collection_formats, + _preload_content, + _request_timeout, + _host)) def request(self, method, url, query_params=None, headers=None, post_params=None, body=None, _preload_content=True, @@ -373,10 +401,8 @@ def request(self, method, url, query_params=None, headers=None, return self.rest_client.OPTIONS(url, query_params=query_params, headers=headers, - post_params=post_params, _preload_content=_preload_content, - _request_timeout=_request_timeout, - body=body) + _request_timeout=_request_timeout) elif method == "POST": return self.rest_client.POST(url, query_params=query_params, @@ -409,7 +435,7 @@ def request(self, method, url, query_params=None, headers=None, _request_timeout=_request_timeout, body=body) else: - raise ValueError( + raise ApiValueError( "http method must be `GET`, `HEAD`, `OPTIONS`," " `POST`, `PATCH`, `PUT` or `DELETE`." ) @@ -444,18 +470,14 @@ def parameters_to_tuples(self, params, collection_formats): new_params.append((k, v)) return new_params - def prepare_post_parameters(self, post_params=None, files=None): + def files_parameters(self, files=None): """Builds form parameters. - :param post_params: Normal form parameters. :param files: File parameters. :return: Form parameters with files. """ params = [] - if post_params: - params = post_params - if files: for k, v in six.iteritems(files): if not v: @@ -517,14 +539,14 @@ def update_params_for_auth(self, headers, querys, auth_settings): for auth in auth_settings: auth_setting = self.configuration.auth_settings().get(auth) if auth_setting: - if not auth_setting['value']: - continue + if auth_setting['in'] == 'cookie': + headers['Cookie'] = auth_setting['value'] elif auth_setting['in'] == 'header': headers[auth_setting['key']] = auth_setting['value'] elif auth_setting['in'] == 'query': querys.append((auth_setting['key'], auth_setting['value'])) else: - raise ValueError( + raise ApiValueError( 'Authentication token must be in `query` or `header`' ) @@ -568,7 +590,7 @@ def __deserialize_primitive(self, data, klass): return data def __deserialize_object(self, value): - """Return a original value. + """Return an original value. :return: object. """ @@ -581,7 +603,6 @@ def __deserialize_date(self, string): :return: date. """ try: - from dateutil.parser import parse return parse(string).date() except ImportError: return string @@ -591,7 +612,7 @@ def __deserialize_date(self, string): reason="Failed to parse `{0}` as date object".format(string) ) - def __deserialize_datatime(self, string): + def __deserialize_datetime(self, string): """Deserializes string to datetime. The string should be in iso8601 datetime format. @@ -600,7 +621,6 @@ def __deserialize_datatime(self, string): :return: datetime. """ try: - from dateutil.parser import parse return parse(string) except ImportError: return string @@ -613,9 +633,6 @@ def __deserialize_datatime(self, string): ) ) - def __hasattr(self, object, name): - return name in object.__class__.__dict__ - def __deserialize_model(self, data, klass): """Deserializes list or dict to model. @@ -623,29 +640,26 @@ def __deserialize_model(self, data, klass): :param klass: class literal. :return: model object. """ + has_discriminator = False + if (hasattr(klass, 'get_real_child_model') + and klass.discriminator_value_class_map): + has_discriminator = True - if (not klass.swagger_types and - not self.__hasattr(klass, 'get_real_child_model')): + if not klass.openapi_types and has_discriminator is False: return data kwargs = {} - if klass.swagger_types is not None: - for attr, attr_type in six.iteritems(klass.swagger_types): - if (data is not None and - klass.attribute_map[attr] in data and - isinstance(data, (list, dict))): + if (data is not None and + klass.openapi_types is not None and + isinstance(data, (list, dict))): + for attr, attr_type in six.iteritems(klass.openapi_types): + if klass.attribute_map[attr] in data: value = data[klass.attribute_map[attr]] kwargs[attr] = self.__deserialize(value, attr_type) instance = klass(**kwargs) - if (isinstance(instance, dict) and - klass.swagger_types is not None and - isinstance(data, dict)): - for key, value in data.items(): - if key not in klass.swagger_types: - instance[key] = value - if self.__hasattr(instance, 'get_real_child_model'): + if has_discriminator: klass_name = instance.get_real_child_model(data) if klass_name: instance = self.__deserialize(data, klass_name) diff --git a/sdk/python/kubeflow/training/configuration.py b/sdk/python/kubeflow/training/configuration.py new file mode 100644 index 0000000000..f4a81b8208 --- /dev/null +++ b/sdk/python/kubeflow/training/configuration.py @@ -0,0 +1,376 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import copy +import logging +import multiprocessing +import sys +import urllib3 + +import six +from six.moves import http_client as httplib + + +class Configuration(object): + """NOTE: This class is auto generated by OpenAPI Generator + + Ref: https://openapi-generator.tech + Do not edit the class manually. + + :param host: Base url + :param api_key: Dict to store API key(s). + Each entry in the dict specifies an API key. + The dict key is the name of the security scheme in the OAS specification. + The dict value is the API key secret. + :param api_key_prefix: Dict to store API prefix (e.g. Bearer) + The dict key is the name of the security scheme in the OAS specification. + The dict value is an API key prefix when generating the auth data. + :param username: Username for HTTP basic authentication + :param password: Password for HTTP basic authentication + :param discard_unknown_keys: Boolean value indicating whether to discard + unknown properties. A server may send a response that includes additional + properties that are not known by the client in the following scenarios: + 1. The OpenAPI document is incomplete, i.e. it does not match the server + implementation. + 2. The client was generated using an older version of the OpenAPI document + and the server has been upgraded since then. + If a schema in the OpenAPI document defines the additionalProperties attribute, + then all undeclared properties received by the server are injected into the + additional properties map. In that case, there are undeclared properties, and + nothing to discard. + + """ + + _default = None + + def __init__(self, host="http://localhost", + api_key=None, api_key_prefix=None, + username=None, password=None, + discard_unknown_keys=False, + ): + """Constructor + """ + self.host = host + """Default Base url + """ + self.temp_folder_path = None + """Temp file folder for downloading files + """ + # Authentication Settings + self.api_key = {} + if api_key: + self.api_key = api_key + """dict to store API key(s) + """ + self.api_key_prefix = {} + if api_key_prefix: + self.api_key_prefix = api_key_prefix + """dict to store API prefix (e.g. Bearer) + """ + self.refresh_api_key_hook = None + """function hook to refresh API key if expired + """ + self.username = username + """Username for HTTP basic authentication + """ + self.password = password + """Password for HTTP basic authentication + """ + self.discard_unknown_keys = discard_unknown_keys + self.logger = {} + """Logging Settings + """ + self.logger["package_logger"] = logging.getLogger("kubeflow.training") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + self.logger_format = '%(asctime)s %(levelname)s %(message)s' + """Log format + """ + self.logger_stream_handler = None + """Log stream handler + """ + self.logger_file_handler = None + """Log file handler + """ + self.logger_file = None + """Debug file location + """ + self.debug = False + """Debug switch + """ + + self.verify_ssl = True + """SSL/TLS verification + Set this to false to skip verifying SSL certificate when calling API + from https server. + """ + self.ssl_ca_cert = None + """Set this to customize the certificate file to verify the peer. + """ + self.cert_file = None + """client certificate file + """ + self.key_file = None + """client key file + """ + self.assert_hostname = None + """Set this to True/False to enable/disable SSL hostname verification. + """ + + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + """urllib3 connection pool's maximum number of connections saved + per pool. urllib3 uses 1 connection as default value, but this is + not the best value when you are making a lot of possibly parallel + requests to the same host, which is often the case here. + cpu_count * 5 is used as default value to increase performance. + """ + + self.proxy = None + """Proxy URL + """ + self.proxy_headers = None + """Proxy headers + """ + self.safe_chars_for_path_param = '' + """Safe chars for path_param + """ + self.retries = None + """Adding retries to override urllib3 default value 3 + """ + # Disable client side validation + self.client_side_validation = True + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k not in ('logger', 'logger_file_handler'): + setattr(result, k, copy.deepcopy(v, memo)) + # shallow copy of loggers + result.logger = copy.copy(self.logger) + # use setters to configure loggers + result.logger_file = self.logger_file + result.debug = self.debug + return result + + def __setattr__(self, name, value): + object.__setattr__(self, name, value) + + @classmethod + def set_default(cls, default): + """Set default instance of configuration. + + It stores default configuration, which can be + returned by get_default_copy method. + + :param default: object of Configuration + """ + cls._default = copy.deepcopy(default) + + @classmethod + def get_default_copy(cls): + """Return new instance of configuration. + + This method returns newly created, based on default constructor, + object of Configuration class or returns a copy of default + configuration passed by the set_default method. + + :return: The configuration object. + """ + if cls._default is not None: + return copy.deepcopy(cls._default) + return Configuration() + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in six.iteritems(self.logger): + logger.addHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.DEBUG) + # turn on httplib debug + httplib.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in six.iteritems(self.logger): + logger.setLevel(logging.WARNING) + # turn off httplib debug + httplib.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :return: The token for api key authentication. + """ + if self.refresh_api_key_hook is not None: + self.refresh_api_key_hook(self) + key = self.api_key.get(identifier) + if key: + prefix = self.api_key_prefix.get(identifier) + if prefix: + return "%s %s" % (prefix, key) + else: + return key + + def get_basic_auth_token(self): + """Gets HTTP basic authentication header (string). + + :return: The token for basic HTTP authentication. + """ + username = "" + if self.username is not None: + username = self.username + password = "" + if self.password is not None: + password = self.password + return urllib3.util.make_headers( + basic_auth=username + ':' + password + ).get('authorization') + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + auth = {} + return auth + + def to_debug_report(self): + """Gets the essential information for debugging. + + :return: The report for debugging. + """ + return "Python SDK Debug Report:\n"\ + "OS: {env}\n"\ + "Python Version: {pyversion}\n"\ + "Version of the API: v1.3.0\n"\ + "SDK Package Version: 1.3.0".\ + format(env=sys.platform, pyversion=sys.version) + + def get_host_settings(self): + """Gets an array of host settings + + :return: An array of host settings + """ + return [ + { + 'url': "/", + 'description': "No description provided", + } + ] + + def get_host_from_settings(self, index, variables=None): + """Gets host URL based on the index and variables + :param index: array index of the host settings + :param variables: hash of variable and the corresponding value + :return: URL based on host settings + """ + variables = {} if variables is None else variables + servers = self.get_host_settings() + + try: + server = servers[index] + except IndexError: + raise ValueError( + "Invalid index {0} when selecting the host settings. " + "Must be less than {1}".format(index, len(servers))) + + url = server['url'] + + # go through variables and replace placeholders + for variable_name, variable in server['variables'].items(): + used_value = variables.get( + variable_name, variable['default_value']) + + if 'enum_values' in variable \ + and used_value not in variable['enum_values']: + raise ValueError( + "The variable `{0}` in the host URL has invalid value " + "{1}. Must be {2}.".format( + variable_name, variables[variable_name], + variable['enum_values'])) + + url = url.replace("{" + variable_name + "}", used_value) + + return url diff --git a/sdk/python/kubeflow/tfjob/constants/__init__.py b/sdk/python/kubeflow/training/constants/__init__.py similarity index 100% rename from sdk/python/kubeflow/tfjob/constants/__init__.py rename to sdk/python/kubeflow/training/constants/__init__.py diff --git a/sdk/python/kubeflow/tfjob/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py similarity index 62% rename from sdk/python/kubeflow/tfjob/constants/constants.py rename to sdk/python/kubeflow/training/constants/constants.py index 3373e02d1b..1afdf6ffbf 100644 --- a/sdk/python/kubeflow/tfjob/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -1,4 +1,4 @@ -# Copyright 2019 kubeflow.org. +# Copyright 2021 kubeflow.org. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,10 @@ import os +# General constants +# How long to wait in seconds for requests to the ApiServer +APISERVER_TIMEOUT = 120 + # TFJob K8S constants TFJOB_GROUP = 'kubeflow.org' TFJOB_KIND = 'TFJob' @@ -22,12 +26,25 @@ TFJOB_LOGLEVEL = os.environ.get('TFJOB_LOGLEVEL', 'INFO').upper() -# How long to wait in seconds for requests to the ApiServer -APISERVER_TIMEOUT = 120 - -# TFJob Labels Name +# TFJob Label Names TFJOB_GROUP_LABEL = 'group-name' TFJOB_NAME_LABEL = 'job-name' TFJOB_TYPE_LABEL = 'replica-type' TFJOB_INDEX_LABEL = 'replica-index' TFJOB_ROLE_LABEL = 'job-role' + +# PyTorchJob K8S constants +PYTORCHJOB_GROUP = 'kubeflow.org' +PYTORCHJOB_KIND = 'PyTorchJob' +PYTORCHJOB_PLURAL = 'pytorchjobs' +PYTORCHJOB_VERSION = os.environ.get('PYTORCHJOB_VERSION', 'v1') + +PYTORCH_LOGLEVEL = os.environ.get('PYTORCHJOB_LOGLEVEL', 'INFO').upper() + +# PyTorchJob Label Names +PYTORCHJOB_CONTROLLER_LABEL = 'controller-name' +PYTORCHJOB_GROUP_LABEL = 'group-name' +PYTORCHJOB_NAME_LABEL = 'pytorch-job-name' +PYTORCHJOB_TYPE_LABEL = 'pytorch-replica-type' +PYTORCHJOB_INDEX_LABEL = 'pytorch-replica-index' +PYTORCHJOB_ROLE_LABEL = 'job-role' diff --git a/sdk/python/kubeflow/training/exceptions.py b/sdk/python/kubeflow/training/exceptions.py new file mode 100644 index 0000000000..610ab08638 --- /dev/null +++ b/sdk/python/kubeflow/training/exceptions.py @@ -0,0 +1,120 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import six + + +class OpenApiException(Exception): + """The base exception class for all OpenAPIExceptions""" + + +class ApiTypeError(OpenApiException, TypeError): + def __init__(self, msg, path_to_item=None, valid_classes=None, + key_type=None): + """ Raises an exception for TypeErrors + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list): a list of keys an indices to get to the + current_item + None if unset + valid_classes (tuple): the primitive classes that current item + should be an instance of + None if unset + key_type (bool): False if our value is a value in a dict + True if it is a key in a dict + False if our item is an item in a list + None if unset + """ + self.path_to_item = path_to_item + self.valid_classes = valid_classes + self.key_type = key_type + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(ApiTypeError, self).__init__(full_msg) + + +class ApiValueError(OpenApiException, ValueError): + def __init__(self, msg, path_to_item=None): + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list) the path to the exception in the + received_data dict. None if unset + """ + + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(ApiValueError, self).__init__(full_msg) + + +class ApiKeyError(OpenApiException, KeyError): + def __init__(self, msg, path_to_item=None): + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(ApiKeyError, self).__init__(full_msg) + + +class ApiException(OpenApiException): + + def __init__(self, status=None, reason=None, http_resp=None): + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\n"\ + "Reason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format( + self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message + + +def render_path(path_to_item): + """Returns a string representation of a path""" + result = "" + for pth in path_to_item: + if isinstance(pth, six.integer_types): + result += "[{0}]".format(pth) + else: + result += "['{0}']".format(pth) + return result diff --git a/sdk/python/kubeflow/training/models/__init__.py b/sdk/python/kubeflow/training/models/__init__.py new file mode 100644 index 0000000000..f480b4e80e --- /dev/null +++ b/sdk/python/kubeflow/training/models/__init__.py @@ -0,0 +1,34 @@ +# coding: utf-8 + +# flake8: noqa +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +# import models into model package +from kubeflow.training.models.v1_job_condition import V1JobCondition +from kubeflow.training.models.v1_job_status import V1JobStatus +from kubeflow.training.models.v1_mx_job import V1MXJob +from kubeflow.training.models.v1_mx_job_list import V1MXJobList +from kubeflow.training.models.v1_mx_job_spec import V1MXJobSpec +from kubeflow.training.models.v1_py_torch_job import V1PyTorchJob +from kubeflow.training.models.v1_py_torch_job_list import V1PyTorchJobList +from kubeflow.training.models.v1_py_torch_job_spec import V1PyTorchJobSpec +from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec +from kubeflow.training.models.v1_replica_status import V1ReplicaStatus +from kubeflow.training.models.v1_run_policy import V1RunPolicy +from kubeflow.training.models.v1_scheduling_policy import V1SchedulingPolicy +from kubeflow.training.models.v1_tf_job import V1TFJob +from kubeflow.training.models.v1_tf_job_list import V1TFJobList +from kubeflow.training.models.v1_tf_job_spec import V1TFJobSpec +from kubeflow.training.models.v1_xg_boost_job import V1XGBoostJob +from kubeflow.training.models.v1_xg_boost_job_list import V1XGBoostJobList +from kubeflow.training.models.v1_xg_boost_job_spec import V1XGBoostJobSpec diff --git a/sdk/python/kubeflow/tfjob/models/v1_job_condition.py b/sdk/python/kubeflow/training/models/v1_job_condition.py similarity index 77% rename from sdk/python/kubeflow/tfjob/models/v1_job_condition.py rename to sdk/python/kubeflow/training/models/v1_job_condition.py index 77b3ff4267..f01aeaf221 100644 --- a/sdk/python/kubeflow/tfjob/models/v1_job_condition.py +++ b/sdk/python/kubeflow/training/models/v1_job_condition.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -30,25 +15,26 @@ import six -from kubeflow.tfjob.models.v1_time import V1Time # noqa: F401,E501 +from kubeflow.training.configuration import Configuration class V1JobCondition(object): - """NOTE: This class is auto generated by the swagger code generator program. + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. """ """ Attributes: - swagger_types (dict): The key is attribute name + openapi_types (dict): The key is attribute name and the value is attribute type. attribute_map (dict): The key is attribute name and the value is json key in definition. """ - swagger_types = { - 'last_transition_time': 'V1Time', - 'last_update_time': 'V1Time', + openapi_types = { + 'last_transition_time': 'K8sIoApimachineryPkgApisMetaV1Time', + 'last_update_time': 'K8sIoApimachineryPkgApisMetaV1Time', 'message': 'str', 'reason': 'str', 'status': 'str', @@ -64,8 +50,11 @@ class V1JobCondition(object): 'type': 'type' } - def __init__(self, last_transition_time=None, last_update_time=None, message=None, reason=None, status=None, type=None): # noqa: E501 - """V1JobCondition - a model defined in Swagger""" # noqa: E501 + def __init__(self, last_transition_time=None, last_update_time=None, message=None, reason=None, status=None, type=None, local_vars_configuration=None): # noqa: E501 + """V1JobCondition - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration self._last_transition_time = None self._last_update_time = None @@ -90,10 +79,9 @@ def __init__(self, last_transition_time=None, last_update_time=None, message=Non def last_transition_time(self): """Gets the last_transition_time of this V1JobCondition. # noqa: E501 - Last time the condition transitioned from one status to another. # noqa: E501 :return: The last_transition_time of this V1JobCondition. # noqa: E501 - :rtype: V1Time + :rtype: K8sIoApimachineryPkgApisMetaV1Time """ return self._last_transition_time @@ -101,10 +89,9 @@ def last_transition_time(self): def last_transition_time(self, last_transition_time): """Sets the last_transition_time of this V1JobCondition. - Last time the condition transitioned from one status to another. # noqa: E501 :param last_transition_time: The last_transition_time of this V1JobCondition. # noqa: E501 - :type: V1Time + :type: K8sIoApimachineryPkgApisMetaV1Time """ self._last_transition_time = last_transition_time @@ -113,10 +100,9 @@ def last_transition_time(self, last_transition_time): def last_update_time(self): """Gets the last_update_time of this V1JobCondition. # noqa: E501 - The last time this condition was updated. # noqa: E501 :return: The last_update_time of this V1JobCondition. # noqa: E501 - :rtype: V1Time + :rtype: K8sIoApimachineryPkgApisMetaV1Time """ return self._last_update_time @@ -124,10 +110,9 @@ def last_update_time(self): def last_update_time(self, last_update_time): """Sets the last_update_time of this V1JobCondition. - The last time this condition was updated. # noqa: E501 :param last_update_time: The last_update_time of this V1JobCondition. # noqa: E501 - :type: V1Time + :type: K8sIoApimachineryPkgApisMetaV1Time """ self._last_update_time = last_update_time @@ -198,7 +183,7 @@ def status(self, status): :param status: The status of this V1JobCondition. # noqa: E501 :type: str """ - if status is None: + if self.local_vars_configuration.client_side_validation and status is None: # noqa: E501 raise ValueError("Invalid value for `status`, must not be `None`") # noqa: E501 self._status = status @@ -223,7 +208,7 @@ def type(self, type): :param type: The type of this V1JobCondition. # noqa: E501 :type: str """ - if type is None: + if self.local_vars_configuration.client_side_validation and type is None: # noqa: E501 raise ValueError("Invalid value for `type`, must not be `None`") # noqa: E501 self._type = type @@ -232,7 +217,7 @@ def to_dict(self): """Returns the model properties as a dict""" result = {} - for attr, _ in six.iteritems(self.swagger_types): + for attr, _ in six.iteritems(self.openapi_types): value = getattr(self, attr) if isinstance(value, list): result[attr] = list(map( @@ -249,9 +234,6 @@ def to_dict(self): )) else: result[attr] = value - if issubclass(V1JobCondition, dict): - for key, value in self.items(): - result[key] = value return result @@ -268,8 +250,11 @@ def __eq__(self, other): if not isinstance(other, V1JobCondition): return False - return self.__dict__ == other.__dict__ + return self.to_dict() == other.to_dict() def __ne__(self, other): """Returns true if both objects are not equal""" - return not self == other + if not isinstance(other, V1JobCondition): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/tfjob/models/v1_job_status.py b/sdk/python/kubeflow/training/models/v1_job_status.py similarity index 65% rename from sdk/python/kubeflow/tfjob/models/v1_job_status.py rename to sdk/python/kubeflow/training/models/v1_job_status.py index 12105ddb0f..d890d110da 100644 --- a/sdk/python/kubeflow/tfjob/models/v1_job_status.py +++ b/sdk/python/kubeflow/training/models/v1_job_status.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -30,30 +15,29 @@ import six -from kubernetes.client import V1JobCondition # noqa: F401,E501 -from kubeflow.tfjob.models.v1_replica_status import V1ReplicaStatus # noqa: F401,E501 -from kubeflow.tfjob.models.v1_time import V1Time # noqa: F401,E501 +from kubeflow.training.configuration import Configuration class V1JobStatus(object): - """NOTE: This class is auto generated by the swagger code generator program. + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. """ """ Attributes: - swagger_types (dict): The key is attribute name + openapi_types (dict): The key is attribute name and the value is attribute type. attribute_map (dict): The key is attribute name and the value is json key in definition. """ - swagger_types = { - 'completion_time': 'V1Time', + openapi_types = { + 'completion_time': 'K8sIoApimachineryPkgApisMetaV1Time', 'conditions': 'list[V1JobCondition]', - 'last_reconcile_time': 'V1Time', + 'last_reconcile_time': 'K8sIoApimachineryPkgApisMetaV1Time', 'replica_statuses': 'dict(str, V1ReplicaStatus)', - 'start_time': 'V1Time' + 'start_time': 'K8sIoApimachineryPkgApisMetaV1Time' } attribute_map = { @@ -64,8 +48,11 @@ class V1JobStatus(object): 'start_time': 'startTime' } - def __init__(self, completion_time=None, conditions=None, last_reconcile_time=None, replica_statuses=None, start_time=None): # noqa: E501 - """V1JobStatus - a model defined in Swagger""" # noqa: E501 + def __init__(self, completion_time=None, conditions=None, last_reconcile_time=None, replica_statuses=None, start_time=None, local_vars_configuration=None): # noqa: E501 + """V1JobStatus - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration self._completion_time = None self._conditions = None @@ -87,10 +74,9 @@ def __init__(self, completion_time=None, conditions=None, last_reconcile_time=No def completion_time(self): """Gets the completion_time of this V1JobStatus. # noqa: E501 - Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. # noqa: E501 :return: The completion_time of this V1JobStatus. # noqa: E501 - :rtype: V1Time + :rtype: K8sIoApimachineryPkgApisMetaV1Time """ return self._completion_time @@ -98,10 +84,9 @@ def completion_time(self): def completion_time(self, completion_time): """Sets the completion_time of this V1JobStatus. - Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. # noqa: E501 :param completion_time: The completion_time of this V1JobStatus. # noqa: E501 - :type: V1Time + :type: K8sIoApimachineryPkgApisMetaV1Time """ self._completion_time = completion_time @@ -126,7 +111,7 @@ def conditions(self, conditions): :param conditions: The conditions of this V1JobStatus. # noqa: E501 :type: list[V1JobCondition] """ - if conditions is None: + if self.local_vars_configuration.client_side_validation and conditions is None: # noqa: E501 raise ValueError("Invalid value for `conditions`, must not be `None`") # noqa: E501 self._conditions = conditions @@ -135,10 +120,9 @@ def conditions(self, conditions): def last_reconcile_time(self): """Gets the last_reconcile_time of this V1JobStatus. # noqa: E501 - Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. # noqa: E501 :return: The last_reconcile_time of this V1JobStatus. # noqa: E501 - :rtype: V1Time + :rtype: K8sIoApimachineryPkgApisMetaV1Time """ return self._last_reconcile_time @@ -146,10 +130,9 @@ def last_reconcile_time(self): def last_reconcile_time(self, last_reconcile_time): """Sets the last_reconcile_time of this V1JobStatus. - Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. # noqa: E501 :param last_reconcile_time: The last_reconcile_time of this V1JobStatus. # noqa: E501 - :type: V1Time + :type: K8sIoApimachineryPkgApisMetaV1Time """ self._last_reconcile_time = last_reconcile_time @@ -174,7 +157,7 @@ def replica_statuses(self, replica_statuses): :param replica_statuses: The replica_statuses of this V1JobStatus. # noqa: E501 :type: dict(str, V1ReplicaStatus) """ - if replica_statuses is None: + if self.local_vars_configuration.client_side_validation and replica_statuses is None: # noqa: E501 raise ValueError("Invalid value for `replica_statuses`, must not be `None`") # noqa: E501 self._replica_statuses = replica_statuses @@ -183,10 +166,9 @@ def replica_statuses(self, replica_statuses): def start_time(self): """Gets the start_time of this V1JobStatus. # noqa: E501 - Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. # noqa: E501 :return: The start_time of this V1JobStatus. # noqa: E501 - :rtype: V1Time + :rtype: K8sIoApimachineryPkgApisMetaV1Time """ return self._start_time @@ -194,10 +176,9 @@ def start_time(self): def start_time(self, start_time): """Sets the start_time of this V1JobStatus. - Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC. # noqa: E501 :param start_time: The start_time of this V1JobStatus. # noqa: E501 - :type: V1Time + :type: K8sIoApimachineryPkgApisMetaV1Time """ self._start_time = start_time @@ -206,7 +187,7 @@ def to_dict(self): """Returns the model properties as a dict""" result = {} - for attr, _ in six.iteritems(self.swagger_types): + for attr, _ in six.iteritems(self.openapi_types): value = getattr(self, attr) if isinstance(value, list): result[attr] = list(map( @@ -223,9 +204,6 @@ def to_dict(self): )) else: result[attr] = value - if issubclass(V1JobStatus, dict): - for key, value in self.items(): - result[key] = value return result @@ -242,8 +220,11 @@ def __eq__(self, other): if not isinstance(other, V1JobStatus): return False - return self.__dict__ == other.__dict__ + return self.to_dict() == other.to_dict() def __ne__(self, other): """Returns true if both objects are not equal""" - return not self == other + if not isinstance(other, V1JobStatus): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_mx_job.py b/sdk/python/kubeflow/training/models/v1_mx_job.py new file mode 100644 index 0000000000..e6bddc495f --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_mx_job.py @@ -0,0 +1,228 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1MXJob(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'api_version': 'str', + 'kind': 'str', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'spec': 'V1MXJobSpec', + 'status': 'V1JobStatus' + } + + attribute_map = { + 'api_version': 'apiVersion', + 'kind': 'kind', + 'metadata': 'metadata', + 'spec': 'spec', + 'status': 'status' + } + + def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 + """V1MXJob - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._api_version = None + self._kind = None + self._metadata = None + self._spec = None + self._status = None + self.discriminator = None + + if api_version is not None: + self.api_version = api_version + if kind is not None: + self.kind = kind + if metadata is not None: + self.metadata = metadata + if spec is not None: + self.spec = spec + if status is not None: + self.status = status + + @property + def api_version(self): + """Gets the api_version of this V1MXJob. # noqa: E501 + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :return: The api_version of this V1MXJob. # noqa: E501 + :rtype: str + """ + return self._api_version + + @api_version.setter + def api_version(self, api_version): + """Sets the api_version of this V1MXJob. + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :param api_version: The api_version of this V1MXJob. # noqa: E501 + :type: str + """ + + self._api_version = api_version + + @property + def kind(self): + """Gets the kind of this V1MXJob. # noqa: E501 + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :return: The kind of this V1MXJob. # noqa: E501 + :rtype: str + """ + return self._kind + + @kind.setter + def kind(self, kind): + """Sets the kind of this V1MXJob. + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :param kind: The kind of this V1MXJob. # noqa: E501 + :type: str + """ + + self._kind = kind + + @property + def metadata(self): + """Gets the metadata of this V1MXJob. # noqa: E501 + + + :return: The metadata of this V1MXJob. # noqa: E501 + :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + """ + return self._metadata + + @metadata.setter + def metadata(self, metadata): + """Sets the metadata of this V1MXJob. + + + :param metadata: The metadata of this V1MXJob. # noqa: E501 + :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + """ + + self._metadata = metadata + + @property + def spec(self): + """Gets the spec of this V1MXJob. # noqa: E501 + + + :return: The spec of this V1MXJob. # noqa: E501 + :rtype: V1MXJobSpec + """ + return self._spec + + @spec.setter + def spec(self, spec): + """Sets the spec of this V1MXJob. + + + :param spec: The spec of this V1MXJob. # noqa: E501 + :type: V1MXJobSpec + """ + + self._spec = spec + + @property + def status(self): + """Gets the status of this V1MXJob. # noqa: E501 + + + :return: The status of this V1MXJob. # noqa: E501 + :rtype: V1JobStatus + """ + return self._status + + @status.setter + def status(self, status): + """Sets the status of this V1MXJob. + + + :param status: The status of this V1MXJob. # noqa: E501 + :type: V1JobStatus + """ + + self._status = status + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1MXJob): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1MXJob): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_mx_job_list.py b/sdk/python/kubeflow/training/models/v1_mx_job_list.py new file mode 100644 index 0000000000..94ac5dc45a --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_mx_job_list.py @@ -0,0 +1,203 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1MXJobList(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'api_version': 'str', + 'items': 'list[V1MXJob]', + 'kind': 'str', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + } + + attribute_map = { + 'api_version': 'apiVersion', + 'items': 'items', + 'kind': 'kind', + 'metadata': 'metadata' + } + + def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 + """V1MXJobList - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._api_version = None + self._items = None + self._kind = None + self._metadata = None + self.discriminator = None + + if api_version is not None: + self.api_version = api_version + self.items = items + if kind is not None: + self.kind = kind + if metadata is not None: + self.metadata = metadata + + @property + def api_version(self): + """Gets the api_version of this V1MXJobList. # noqa: E501 + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :return: The api_version of this V1MXJobList. # noqa: E501 + :rtype: str + """ + return self._api_version + + @api_version.setter + def api_version(self, api_version): + """Sets the api_version of this V1MXJobList. + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :param api_version: The api_version of this V1MXJobList. # noqa: E501 + :type: str + """ + + self._api_version = api_version + + @property + def items(self): + """Gets the items of this V1MXJobList. # noqa: E501 + + + :return: The items of this V1MXJobList. # noqa: E501 + :rtype: list[V1MXJob] + """ + return self._items + + @items.setter + def items(self, items): + """Sets the items of this V1MXJobList. + + + :param items: The items of this V1MXJobList. # noqa: E501 + :type: list[V1MXJob] + """ + if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 + raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 + + self._items = items + + @property + def kind(self): + """Gets the kind of this V1MXJobList. # noqa: E501 + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :return: The kind of this V1MXJobList. # noqa: E501 + :rtype: str + """ + return self._kind + + @kind.setter + def kind(self, kind): + """Sets the kind of this V1MXJobList. + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :param kind: The kind of this V1MXJobList. # noqa: E501 + :type: str + """ + + self._kind = kind + + @property + def metadata(self): + """Gets the metadata of this V1MXJobList. # noqa: E501 + + + :return: The metadata of this V1MXJobList. # noqa: E501 + :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + """ + return self._metadata + + @metadata.setter + def metadata(self, metadata): + """Sets the metadata of this V1MXJobList. + + + :param metadata: The metadata of this V1MXJobList. # noqa: E501 + :type: K8sIoApimachineryPkgApisMetaV1ListMeta + """ + + self._metadata = metadata + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1MXJobList): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1MXJobList): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_mx_job_spec.py b/sdk/python/kubeflow/training/models/v1_mx_job_spec.py new file mode 100644 index 0000000000..d3fd356a85 --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_mx_job_spec.py @@ -0,0 +1,179 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1MXJobSpec(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'job_mode': 'str', + 'mx_replica_specs': 'dict(str, V1ReplicaSpec)', + 'run_policy': 'V1RunPolicy' + } + + attribute_map = { + 'job_mode': 'jobMode', + 'mx_replica_specs': 'mxReplicaSpecs', + 'run_policy': 'runPolicy' + } + + def __init__(self, job_mode=None, mx_replica_specs=None, run_policy=None, local_vars_configuration=None): # noqa: E501 + """V1MXJobSpec - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._job_mode = None + self._mx_replica_specs = None + self._run_policy = None + self.discriminator = None + + self.job_mode = job_mode + self.mx_replica_specs = mx_replica_specs + self.run_policy = run_policy + + @property + def job_mode(self): + """Gets the job_mode of this V1MXJobSpec. # noqa: E501 + + JobMode specify the kind of MXjob to do. Different mode may have different MXReplicaSpecs request # noqa: E501 + + :return: The job_mode of this V1MXJobSpec. # noqa: E501 + :rtype: str + """ + return self._job_mode + + @job_mode.setter + def job_mode(self, job_mode): + """Sets the job_mode of this V1MXJobSpec. + + JobMode specify the kind of MXjob to do. Different mode may have different MXReplicaSpecs request # noqa: E501 + + :param job_mode: The job_mode of this V1MXJobSpec. # noqa: E501 + :type: str + """ + if self.local_vars_configuration.client_side_validation and job_mode is None: # noqa: E501 + raise ValueError("Invalid value for `job_mode`, must not be `None`") # noqa: E501 + + self._job_mode = job_mode + + @property + def mx_replica_specs(self): + """Gets the mx_replica_specs of this V1MXJobSpec. # noqa: E501 + + MXReplicaSpecs is map of common.ReplicaType and common.ReplicaSpec specifies the MX replicas to run. For example, { \"Scheduler\": common.ReplicaSpec, \"Server\": common.ReplicaSpec, \"Worker\": common.ReplicaSpec, } # noqa: E501 + + :return: The mx_replica_specs of this V1MXJobSpec. # noqa: E501 + :rtype: dict(str, V1ReplicaSpec) + """ + return self._mx_replica_specs + + @mx_replica_specs.setter + def mx_replica_specs(self, mx_replica_specs): + """Sets the mx_replica_specs of this V1MXJobSpec. + + MXReplicaSpecs is map of common.ReplicaType and common.ReplicaSpec specifies the MX replicas to run. For example, { \"Scheduler\": common.ReplicaSpec, \"Server\": common.ReplicaSpec, \"Worker\": common.ReplicaSpec, } # noqa: E501 + + :param mx_replica_specs: The mx_replica_specs of this V1MXJobSpec. # noqa: E501 + :type: dict(str, V1ReplicaSpec) + """ + if self.local_vars_configuration.client_side_validation and mx_replica_specs is None: # noqa: E501 + raise ValueError("Invalid value for `mx_replica_specs`, must not be `None`") # noqa: E501 + + self._mx_replica_specs = mx_replica_specs + + @property + def run_policy(self): + """Gets the run_policy of this V1MXJobSpec. # noqa: E501 + + + :return: The run_policy of this V1MXJobSpec. # noqa: E501 + :rtype: V1RunPolicy + """ + return self._run_policy + + @run_policy.setter + def run_policy(self, run_policy): + """Sets the run_policy of this V1MXJobSpec. + + + :param run_policy: The run_policy of this V1MXJobSpec. # noqa: E501 + :type: V1RunPolicy + """ + if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 + raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 + + self._run_policy = run_policy + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1MXJobSpec): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1MXJobSpec): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_py_torch_job.py b/sdk/python/kubeflow/training/models/v1_py_torch_job.py new file mode 100644 index 0000000000..99625ce159 --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_py_torch_job.py @@ -0,0 +1,228 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1PyTorchJob(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'api_version': 'str', + 'kind': 'str', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'spec': 'V1PyTorchJobSpec', + 'status': 'V1JobStatus' + } + + attribute_map = { + 'api_version': 'apiVersion', + 'kind': 'kind', + 'metadata': 'metadata', + 'spec': 'spec', + 'status': 'status' + } + + def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 + """V1PyTorchJob - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._api_version = None + self._kind = None + self._metadata = None + self._spec = None + self._status = None + self.discriminator = None + + if api_version is not None: + self.api_version = api_version + if kind is not None: + self.kind = kind + if metadata is not None: + self.metadata = metadata + if spec is not None: + self.spec = spec + if status is not None: + self.status = status + + @property + def api_version(self): + """Gets the api_version of this V1PyTorchJob. # noqa: E501 + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :return: The api_version of this V1PyTorchJob. # noqa: E501 + :rtype: str + """ + return self._api_version + + @api_version.setter + def api_version(self, api_version): + """Sets the api_version of this V1PyTorchJob. + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :param api_version: The api_version of this V1PyTorchJob. # noqa: E501 + :type: str + """ + + self._api_version = api_version + + @property + def kind(self): + """Gets the kind of this V1PyTorchJob. # noqa: E501 + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :return: The kind of this V1PyTorchJob. # noqa: E501 + :rtype: str + """ + return self._kind + + @kind.setter + def kind(self, kind): + """Sets the kind of this V1PyTorchJob. + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :param kind: The kind of this V1PyTorchJob. # noqa: E501 + :type: str + """ + + self._kind = kind + + @property + def metadata(self): + """Gets the metadata of this V1PyTorchJob. # noqa: E501 + + + :return: The metadata of this V1PyTorchJob. # noqa: E501 + :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + """ + return self._metadata + + @metadata.setter + def metadata(self, metadata): + """Sets the metadata of this V1PyTorchJob. + + + :param metadata: The metadata of this V1PyTorchJob. # noqa: E501 + :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + """ + + self._metadata = metadata + + @property + def spec(self): + """Gets the spec of this V1PyTorchJob. # noqa: E501 + + + :return: The spec of this V1PyTorchJob. # noqa: E501 + :rtype: V1PyTorchJobSpec + """ + return self._spec + + @spec.setter + def spec(self, spec): + """Sets the spec of this V1PyTorchJob. + + + :param spec: The spec of this V1PyTorchJob. # noqa: E501 + :type: V1PyTorchJobSpec + """ + + self._spec = spec + + @property + def status(self): + """Gets the status of this V1PyTorchJob. # noqa: E501 + + + :return: The status of this V1PyTorchJob. # noqa: E501 + :rtype: V1JobStatus + """ + return self._status + + @status.setter + def status(self, status): + """Sets the status of this V1PyTorchJob. + + + :param status: The status of this V1PyTorchJob. # noqa: E501 + :type: V1JobStatus + """ + + self._status = status + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1PyTorchJob): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1PyTorchJob): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py b/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py new file mode 100644 index 0000000000..581751da36 --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py @@ -0,0 +1,205 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1PyTorchJobList(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'api_version': 'str', + 'items': 'list[V1PyTorchJob]', + 'kind': 'str', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + } + + attribute_map = { + 'api_version': 'apiVersion', + 'items': 'items', + 'kind': 'kind', + 'metadata': 'metadata' + } + + def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 + """V1PyTorchJobList - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._api_version = None + self._items = None + self._kind = None + self._metadata = None + self.discriminator = None + + if api_version is not None: + self.api_version = api_version + self.items = items + if kind is not None: + self.kind = kind + if metadata is not None: + self.metadata = metadata + + @property + def api_version(self): + """Gets the api_version of this V1PyTorchJobList. # noqa: E501 + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :return: The api_version of this V1PyTorchJobList. # noqa: E501 + :rtype: str + """ + return self._api_version + + @api_version.setter + def api_version(self, api_version): + """Sets the api_version of this V1PyTorchJobList. + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :param api_version: The api_version of this V1PyTorchJobList. # noqa: E501 + :type: str + """ + + self._api_version = api_version + + @property + def items(self): + """Gets the items of this V1PyTorchJobList. # noqa: E501 + + List of PyTorchJobs. # noqa: E501 + + :return: The items of this V1PyTorchJobList. # noqa: E501 + :rtype: list[V1PyTorchJob] + """ + return self._items + + @items.setter + def items(self, items): + """Sets the items of this V1PyTorchJobList. + + List of PyTorchJobs. # noqa: E501 + + :param items: The items of this V1PyTorchJobList. # noqa: E501 + :type: list[V1PyTorchJob] + """ + if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 + raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 + + self._items = items + + @property + def kind(self): + """Gets the kind of this V1PyTorchJobList. # noqa: E501 + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :return: The kind of this V1PyTorchJobList. # noqa: E501 + :rtype: str + """ + return self._kind + + @kind.setter + def kind(self, kind): + """Sets the kind of this V1PyTorchJobList. + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :param kind: The kind of this V1PyTorchJobList. # noqa: E501 + :type: str + """ + + self._kind = kind + + @property + def metadata(self): + """Gets the metadata of this V1PyTorchJobList. # noqa: E501 + + + :return: The metadata of this V1PyTorchJobList. # noqa: E501 + :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + """ + return self._metadata + + @metadata.setter + def metadata(self, metadata): + """Sets the metadata of this V1PyTorchJobList. + + + :param metadata: The metadata of this V1PyTorchJobList. # noqa: E501 + :type: K8sIoApimachineryPkgApisMetaV1ListMeta + """ + + self._metadata = metadata + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1PyTorchJobList): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1PyTorchJobList): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_py_torch_job_spec.py b/sdk/python/kubeflow/training/models/v1_py_torch_job_spec.py new file mode 100644 index 0000000000..c7c219151e --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_py_torch_job_spec.py @@ -0,0 +1,150 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1PyTorchJobSpec(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'pytorch_replica_specs': 'dict(str, V1ReplicaSpec)', + 'run_policy': 'V1RunPolicy' + } + + attribute_map = { + 'pytorch_replica_specs': 'pytorchReplicaSpecs', + 'run_policy': 'runPolicy' + } + + def __init__(self, pytorch_replica_specs=None, run_policy=None, local_vars_configuration=None): # noqa: E501 + """V1PyTorchJobSpec - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._pytorch_replica_specs = None + self._run_policy = None + self.discriminator = None + + self.pytorch_replica_specs = pytorch_replica_specs + self.run_policy = run_policy + + @property + def pytorch_replica_specs(self): + """Gets the pytorch_replica_specs of this V1PyTorchJobSpec. # noqa: E501 + + A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example, { \"Master\": PyTorchReplicaSpec, \"Worker\": PyTorchReplicaSpec, } # noqa: E501 + + :return: The pytorch_replica_specs of this V1PyTorchJobSpec. # noqa: E501 + :rtype: dict(str, V1ReplicaSpec) + """ + return self._pytorch_replica_specs + + @pytorch_replica_specs.setter + def pytorch_replica_specs(self, pytorch_replica_specs): + """Sets the pytorch_replica_specs of this V1PyTorchJobSpec. + + A map of PyTorchReplicaType (type) to ReplicaSpec (value). Specifies the PyTorch cluster configuration. For example, { \"Master\": PyTorchReplicaSpec, \"Worker\": PyTorchReplicaSpec, } # noqa: E501 + + :param pytorch_replica_specs: The pytorch_replica_specs of this V1PyTorchJobSpec. # noqa: E501 + :type: dict(str, V1ReplicaSpec) + """ + if self.local_vars_configuration.client_side_validation and pytorch_replica_specs is None: # noqa: E501 + raise ValueError("Invalid value for `pytorch_replica_specs`, must not be `None`") # noqa: E501 + + self._pytorch_replica_specs = pytorch_replica_specs + + @property + def run_policy(self): + """Gets the run_policy of this V1PyTorchJobSpec. # noqa: E501 + + + :return: The run_policy of this V1PyTorchJobSpec. # noqa: E501 + :rtype: V1RunPolicy + """ + return self._run_policy + + @run_policy.setter + def run_policy(self, run_policy): + """Sets the run_policy of this V1PyTorchJobSpec. + + + :param run_policy: The run_policy of this V1PyTorchJobSpec. # noqa: E501 + :type: V1RunPolicy + """ + if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 + raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 + + self._run_policy = run_policy + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1PyTorchJobSpec): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1PyTorchJobSpec): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/tfjob/models/v1_replica_spec.py b/sdk/python/kubeflow/training/models/v1_replica_spec.py similarity index 70% rename from sdk/python/kubeflow/tfjob/models/v1_replica_spec.py rename to sdk/python/kubeflow/training/models/v1_replica_spec.py index a73c0f9151..3026ea6513 100644 --- a/sdk/python/kubeflow/tfjob/models/v1_replica_spec.py +++ b/sdk/python/kubeflow/training/models/v1_replica_spec.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -30,26 +15,27 @@ import six -from kubernetes.client import V1PodTemplateSpec # noqa: F401,E501 +from kubeflow.training.configuration import Configuration class V1ReplicaSpec(object): - """NOTE: This class is auto generated by the swagger code generator program. + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. """ """ Attributes: - swagger_types (dict): The key is attribute name + openapi_types (dict): The key is attribute name and the value is attribute type. attribute_map (dict): The key is attribute name and the value is json key in definition. """ - swagger_types = { + openapi_types = { 'replicas': 'int', 'restart_policy': 'str', - 'template': 'V1PodTemplateSpec' + 'template': 'K8sIoApiCoreV1PodTemplateSpec' } attribute_map = { @@ -58,8 +44,11 @@ class V1ReplicaSpec(object): 'template': 'template' } - def __init__(self, replicas=None, restart_policy=None, template=None): # noqa: E501 - """V1ReplicaSpec - a model defined in Swagger""" # noqa: E501 + def __init__(self, replicas=None, restart_policy=None, template=None, local_vars_configuration=None): # noqa: E501 + """V1ReplicaSpec - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration self._replicas = None self._restart_policy = None @@ -123,10 +112,9 @@ def restart_policy(self, restart_policy): def template(self): """Gets the template of this V1ReplicaSpec. # noqa: E501 - Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec # noqa: E501 :return: The template of this V1ReplicaSpec. # noqa: E501 - :rtype: V1PodTemplateSpec + :rtype: K8sIoApiCoreV1PodTemplateSpec """ return self._template @@ -134,10 +122,9 @@ def template(self): def template(self, template): """Sets the template of this V1ReplicaSpec. - Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec # noqa: E501 :param template: The template of this V1ReplicaSpec. # noqa: E501 - :type: V1PodTemplateSpec + :type: K8sIoApiCoreV1PodTemplateSpec """ self._template = template @@ -146,7 +133,7 @@ def to_dict(self): """Returns the model properties as a dict""" result = {} - for attr, _ in six.iteritems(self.swagger_types): + for attr, _ in six.iteritems(self.openapi_types): value = getattr(self, attr) if isinstance(value, list): result[attr] = list(map( @@ -163,9 +150,6 @@ def to_dict(self): )) else: result[attr] = value - if issubclass(V1ReplicaSpec, dict): - for key, value in self.items(): - result[key] = value return result @@ -182,8 +166,11 @@ def __eq__(self, other): if not isinstance(other, V1ReplicaSpec): return False - return self.__dict__ == other.__dict__ + return self.to_dict() == other.to_dict() def __ne__(self, other): """Returns true if both objects are not equal""" - return not self == other + if not isinstance(other, V1ReplicaSpec): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/tfjob/models/v1_replica_status.py b/sdk/python/kubeflow/training/models/v1_replica_status.py similarity index 75% rename from sdk/python/kubeflow/tfjob/models/v1_replica_status.py rename to sdk/python/kubeflow/training/models/v1_replica_status.py index 17c9ee6d2f..cc79ecaa5f 100644 --- a/sdk/python/kubeflow/tfjob/models/v1_replica_status.py +++ b/sdk/python/kubeflow/training/models/v1_replica_status.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -30,21 +15,24 @@ import six +from kubeflow.training.configuration import Configuration + class V1ReplicaStatus(object): - """NOTE: This class is auto generated by the swagger code generator program. + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. """ """ Attributes: - swagger_types (dict): The key is attribute name + openapi_types (dict): The key is attribute name and the value is attribute type. attribute_map (dict): The key is attribute name and the value is json key in definition. """ - swagger_types = { + openapi_types = { 'active': 'int', 'failed': 'int', 'succeeded': 'int' @@ -56,8 +44,11 @@ class V1ReplicaStatus(object): 'succeeded': 'succeeded' } - def __init__(self, active=None, failed=None, succeeded=None): # noqa: E501 - """V1ReplicaStatus - a model defined in Swagger""" # noqa: E501 + def __init__(self, active=None, failed=None, succeeded=None, local_vars_configuration=None): # noqa: E501 + """V1ReplicaStatus - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration self._active = None self._failed = None @@ -144,7 +135,7 @@ def to_dict(self): """Returns the model properties as a dict""" result = {} - for attr, _ in six.iteritems(self.swagger_types): + for attr, _ in six.iteritems(self.openapi_types): value = getattr(self, attr) if isinstance(value, list): result[attr] = list(map( @@ -161,9 +152,6 @@ def to_dict(self): )) else: result[attr] = value - if issubclass(V1ReplicaStatus, dict): - for key, value in self.items(): - result[key] = value return result @@ -180,8 +168,11 @@ def __eq__(self, other): if not isinstance(other, V1ReplicaStatus): return False - return self.__dict__ == other.__dict__ + return self.to_dict() == other.to_dict() def __ne__(self, other): """Returns true if both objects are not equal""" - return not self == other + if not isinstance(other, V1ReplicaStatus): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_run_policy.py b/sdk/python/kubeflow/training/models/v1_run_policy.py new file mode 100644 index 0000000000..92f154223c --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_run_policy.py @@ -0,0 +1,232 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1RunPolicy(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'active_deadline_seconds': 'int', + 'backoff_limit': 'int', + 'clean_pod_policy': 'str', + 'scheduling_policy': 'V1SchedulingPolicy', + 'ttl_seconds_after_finished': 'int' + } + + attribute_map = { + 'active_deadline_seconds': 'activeDeadlineSeconds', + 'backoff_limit': 'backoffLimit', + 'clean_pod_policy': 'cleanPodPolicy', + 'scheduling_policy': 'schedulingPolicy', + 'ttl_seconds_after_finished': 'ttlSecondsAfterFinished' + } + + def __init__(self, active_deadline_seconds=None, backoff_limit=None, clean_pod_policy=None, scheduling_policy=None, ttl_seconds_after_finished=None, local_vars_configuration=None): # noqa: E501 + """V1RunPolicy - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._active_deadline_seconds = None + self._backoff_limit = None + self._clean_pod_policy = None + self._scheduling_policy = None + self._ttl_seconds_after_finished = None + self.discriminator = None + + if active_deadline_seconds is not None: + self.active_deadline_seconds = active_deadline_seconds + if backoff_limit is not None: + self.backoff_limit = backoff_limit + if clean_pod_policy is not None: + self.clean_pod_policy = clean_pod_policy + if scheduling_policy is not None: + self.scheduling_policy = scheduling_policy + if ttl_seconds_after_finished is not None: + self.ttl_seconds_after_finished = ttl_seconds_after_finished + + @property + def active_deadline_seconds(self): + """Gets the active_deadline_seconds of this V1RunPolicy. # noqa: E501 + + Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. # noqa: E501 + + :return: The active_deadline_seconds of this V1RunPolicy. # noqa: E501 + :rtype: int + """ + return self._active_deadline_seconds + + @active_deadline_seconds.setter + def active_deadline_seconds(self, active_deadline_seconds): + """Sets the active_deadline_seconds of this V1RunPolicy. + + Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer. # noqa: E501 + + :param active_deadline_seconds: The active_deadline_seconds of this V1RunPolicy. # noqa: E501 + :type: int + """ + + self._active_deadline_seconds = active_deadline_seconds + + @property + def backoff_limit(self): + """Gets the backoff_limit of this V1RunPolicy. # noqa: E501 + + Optional number of retries before marking this job failed. # noqa: E501 + + :return: The backoff_limit of this V1RunPolicy. # noqa: E501 + :rtype: int + """ + return self._backoff_limit + + @backoff_limit.setter + def backoff_limit(self, backoff_limit): + """Sets the backoff_limit of this V1RunPolicy. + + Optional number of retries before marking this job failed. # noqa: E501 + + :param backoff_limit: The backoff_limit of this V1RunPolicy. # noqa: E501 + :type: int + """ + + self._backoff_limit = backoff_limit + + @property + def clean_pod_policy(self): + """Gets the clean_pod_policy of this V1RunPolicy. # noqa: E501 + + CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running. # noqa: E501 + + :return: The clean_pod_policy of this V1RunPolicy. # noqa: E501 + :rtype: str + """ + return self._clean_pod_policy + + @clean_pod_policy.setter + def clean_pod_policy(self, clean_pod_policy): + """Sets the clean_pod_policy of this V1RunPolicy. + + CleanPodPolicy defines the policy to kill pods after the job completes. Default to Running. # noqa: E501 + + :param clean_pod_policy: The clean_pod_policy of this V1RunPolicy. # noqa: E501 + :type: str + """ + + self._clean_pod_policy = clean_pod_policy + + @property + def scheduling_policy(self): + """Gets the scheduling_policy of this V1RunPolicy. # noqa: E501 + + + :return: The scheduling_policy of this V1RunPolicy. # noqa: E501 + :rtype: V1SchedulingPolicy + """ + return self._scheduling_policy + + @scheduling_policy.setter + def scheduling_policy(self, scheduling_policy): + """Sets the scheduling_policy of this V1RunPolicy. + + + :param scheduling_policy: The scheduling_policy of this V1RunPolicy. # noqa: E501 + :type: V1SchedulingPolicy + """ + + self._scheduling_policy = scheduling_policy + + @property + def ttl_seconds_after_finished(self): + """Gets the ttl_seconds_after_finished of this V1RunPolicy. # noqa: E501 + + TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. # noqa: E501 + + :return: The ttl_seconds_after_finished of this V1RunPolicy. # noqa: E501 + :rtype: int + """ + return self._ttl_seconds_after_finished + + @ttl_seconds_after_finished.setter + def ttl_seconds_after_finished(self, ttl_seconds_after_finished): + """Sets the ttl_seconds_after_finished of this V1RunPolicy. + + TTLSecondsAfterFinished is the TTL to clean up jobs. It may take extra ReconcilePeriod seconds for the cleanup, since reconcile gets called periodically. Default to infinite. # noqa: E501 + + :param ttl_seconds_after_finished: The ttl_seconds_after_finished of this V1RunPolicy. # noqa: E501 + :type: int + """ + + self._ttl_seconds_after_finished = ttl_seconds_after_finished + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1RunPolicy): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1RunPolicy): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_scheduling_policy.py b/sdk/python/kubeflow/training/models/v1_scheduling_policy.py new file mode 100644 index 0000000000..0f3ad232cf --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_scheduling_policy.py @@ -0,0 +1,198 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1SchedulingPolicy(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'min_available': 'int', + 'min_resources': 'dict(str, K8sIoApimachineryPkgApiResourceQuantity)', + 'priority_class': 'str', + 'queue': 'str' + } + + attribute_map = { + 'min_available': 'minAvailable', + 'min_resources': 'minResources', + 'priority_class': 'priorityClass', + 'queue': 'queue' + } + + def __init__(self, min_available=None, min_resources=None, priority_class=None, queue=None, local_vars_configuration=None): # noqa: E501 + """V1SchedulingPolicy - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._min_available = None + self._min_resources = None + self._priority_class = None + self._queue = None + self.discriminator = None + + if min_available is not None: + self.min_available = min_available + if min_resources is not None: + self.min_resources = min_resources + if priority_class is not None: + self.priority_class = priority_class + if queue is not None: + self.queue = queue + + @property + def min_available(self): + """Gets the min_available of this V1SchedulingPolicy. # noqa: E501 + + + :return: The min_available of this V1SchedulingPolicy. # noqa: E501 + :rtype: int + """ + return self._min_available + + @min_available.setter + def min_available(self, min_available): + """Sets the min_available of this V1SchedulingPolicy. + + + :param min_available: The min_available of this V1SchedulingPolicy. # noqa: E501 + :type: int + """ + + self._min_available = min_available + + @property + def min_resources(self): + """Gets the min_resources of this V1SchedulingPolicy. # noqa: E501 + + + :return: The min_resources of this V1SchedulingPolicy. # noqa: E501 + :rtype: dict(str, K8sIoApimachineryPkgApiResourceQuantity) + """ + return self._min_resources + + @min_resources.setter + def min_resources(self, min_resources): + """Sets the min_resources of this V1SchedulingPolicy. + + + :param min_resources: The min_resources of this V1SchedulingPolicy. # noqa: E501 + :type: dict(str, K8sIoApimachineryPkgApiResourceQuantity) + """ + + self._min_resources = min_resources + + @property + def priority_class(self): + """Gets the priority_class of this V1SchedulingPolicy. # noqa: E501 + + + :return: The priority_class of this V1SchedulingPolicy. # noqa: E501 + :rtype: str + """ + return self._priority_class + + @priority_class.setter + def priority_class(self, priority_class): + """Sets the priority_class of this V1SchedulingPolicy. + + + :param priority_class: The priority_class of this V1SchedulingPolicy. # noqa: E501 + :type: str + """ + + self._priority_class = priority_class + + @property + def queue(self): + """Gets the queue of this V1SchedulingPolicy. # noqa: E501 + + + :return: The queue of this V1SchedulingPolicy. # noqa: E501 + :rtype: str + """ + return self._queue + + @queue.setter + def queue(self, queue): + """Sets the queue of this V1SchedulingPolicy. + + + :param queue: The queue of this V1SchedulingPolicy. # noqa: E501 + :type: str + """ + + self._queue = queue + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1SchedulingPolicy): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1SchedulingPolicy): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/tfjob/models/v1_tf_job.py b/sdk/python/kubeflow/training/models/v1_tf_job.py similarity index 69% rename from sdk/python/kubeflow/tfjob/models/v1_tf_job.py rename to sdk/python/kubeflow/training/models/v1_tf_job.py index 6cf67bcb0c..5e03193959 100644 --- a/sdk/python/kubeflow/tfjob/models/v1_tf_job.py +++ b/sdk/python/kubeflow/training/models/v1_tf_job.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -30,28 +15,27 @@ import six -from kubernetes.client import V1ObjectMeta # noqa: F401,E501 -from kubeflow.tfjob.models.v1_job_status import V1JobStatus # noqa: F401,E501 -from kubeflow.tfjob.models.v1_tf_job_spec import V1TFJobSpec # noqa: F401,E501 +from kubeflow.training.configuration import Configuration class V1TFJob(object): - """NOTE: This class is auto generated by the swagger code generator program. + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. """ """ Attributes: - swagger_types (dict): The key is attribute name + openapi_types (dict): The key is attribute name and the value is attribute type. attribute_map (dict): The key is attribute name and the value is json key in definition. """ - swagger_types = { + openapi_types = { 'api_version': 'str', 'kind': 'str', - 'metadata': 'V1ObjectMeta', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', 'spec': 'V1TFJobSpec', 'status': 'V1JobStatus' } @@ -64,8 +48,11 @@ class V1TFJob(object): 'status': 'status' } - def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None): # noqa: E501 - """V1TFJob - a model defined in Swagger""" # noqa: E501 + def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 + """V1TFJob - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration self._api_version = None self._kind = None @@ -89,7 +76,7 @@ def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status def api_version(self): """Gets the api_version of this V1TFJob. # noqa: E501 - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources # noqa: E501 + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 :return: The api_version of this V1TFJob. # noqa: E501 :rtype: str @@ -100,7 +87,7 @@ def api_version(self): def api_version(self, api_version): """Sets the api_version of this V1TFJob. - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources # noqa: E501 + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 :param api_version: The api_version of this V1TFJob. # noqa: E501 :type: str @@ -112,7 +99,7 @@ def api_version(self, api_version): def kind(self): """Gets the kind of this V1TFJob. # noqa: E501 - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds # noqa: E501 + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 :return: The kind of this V1TFJob. # noqa: E501 :rtype: str @@ -123,7 +110,7 @@ def kind(self): def kind(self, kind): """Sets the kind of this V1TFJob. - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds # noqa: E501 + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 :param kind: The kind of this V1TFJob. # noqa: E501 :type: str @@ -135,10 +122,9 @@ def kind(self, kind): def metadata(self): """Gets the metadata of this V1TFJob. # noqa: E501 - Standard Kubernetes object's metadata. # noqa: E501 :return: The metadata of this V1TFJob. # noqa: E501 - :rtype: V1ObjectMeta + :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta """ return self._metadata @@ -146,10 +132,9 @@ def metadata(self): def metadata(self, metadata): """Sets the metadata of this V1TFJob. - Standard Kubernetes object's metadata. # noqa: E501 :param metadata: The metadata of this V1TFJob. # noqa: E501 - :type: V1ObjectMeta + :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta """ self._metadata = metadata @@ -158,7 +143,6 @@ def metadata(self, metadata): def spec(self): """Gets the spec of this V1TFJob. # noqa: E501 - Specification of the desired state of the TFJob. # noqa: E501 :return: The spec of this V1TFJob. # noqa: E501 :rtype: V1TFJobSpec @@ -169,7 +153,6 @@ def spec(self): def spec(self, spec): """Sets the spec of this V1TFJob. - Specification of the desired state of the TFJob. # noqa: E501 :param spec: The spec of this V1TFJob. # noqa: E501 :type: V1TFJobSpec @@ -181,7 +164,6 @@ def spec(self, spec): def status(self): """Gets the status of this V1TFJob. # noqa: E501 - Most recently observed status of the TFJob. Read-only (modified by the system). # noqa: E501 :return: The status of this V1TFJob. # noqa: E501 :rtype: V1JobStatus @@ -192,7 +174,6 @@ def status(self): def status(self, status): """Sets the status of this V1TFJob. - Most recently observed status of the TFJob. Read-only (modified by the system). # noqa: E501 :param status: The status of this V1TFJob. # noqa: E501 :type: V1JobStatus @@ -204,7 +185,7 @@ def to_dict(self): """Returns the model properties as a dict""" result = {} - for attr, _ in six.iteritems(self.swagger_types): + for attr, _ in six.iteritems(self.openapi_types): value = getattr(self, attr) if isinstance(value, list): result[attr] = list(map( @@ -221,9 +202,6 @@ def to_dict(self): )) else: result[attr] = value - if issubclass(V1TFJob, dict): - for key, value in self.items(): - result[key] = value return result @@ -240,8 +218,11 @@ def __eq__(self, other): if not isinstance(other, V1TFJob): return False - return self.__dict__ == other.__dict__ + return self.to_dict() == other.to_dict() def __ne__(self, other): """Returns true if both objects are not equal""" - return not self == other + if not isinstance(other, V1TFJob): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/tfjob/models/v1_tf_job_list.py b/sdk/python/kubeflow/training/models/v1_tf_job_list.py similarity index 71% rename from sdk/python/kubeflow/tfjob/models/v1_tf_job_list.py rename to sdk/python/kubeflow/training/models/v1_tf_job_list.py index 7dac03feea..4e8a50e282 100644 --- a/sdk/python/kubeflow/tfjob/models/v1_tf_job_list.py +++ b/sdk/python/kubeflow/training/models/v1_tf_job_list.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob + tensorflow - Python SDK for TF-Operator # noqa: E501 + Python SDK for tensorflow # noqa: E501 - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -30,28 +15,28 @@ import six -from kubernetes.client import V1ListMeta # noqa: F401,E501 -from kubeflow.tfjob.models.v1_tf_job import V1TFJob # noqa: F401,E501 +from kubeflow.training.configuration import Configuration class V1TFJobList(object): - """NOTE: This class is auto generated by the swagger code generator program. + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech Do not edit the class manually. """ """ Attributes: - swagger_types (dict): The key is attribute name + openapi_types (dict): The key is attribute name and the value is attribute type. attribute_map (dict): The key is attribute name and the value is json key in definition. """ - swagger_types = { + openapi_types = { 'api_version': 'str', 'items': 'list[V1TFJob]', 'kind': 'str', - 'metadata': 'V1ListMeta' + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' } attribute_map = { @@ -61,8 +46,11 @@ class V1TFJobList(object): 'metadata': 'metadata' } - def __init__(self, api_version=None, items=None, kind=None, metadata=None): # noqa: E501 - """V1TFJobList - a model defined in Swagger""" # noqa: E501 + def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 + """V1TFJobList - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration self._api_version = None self._items = None @@ -82,7 +70,7 @@ def __init__(self, api_version=None, items=None, kind=None, metadata=None): # n def api_version(self): """Gets the api_version of this V1TFJobList. # noqa: E501 - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources # noqa: E501 + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 :return: The api_version of this V1TFJobList. # noqa: E501 :rtype: str @@ -93,7 +81,7 @@ def api_version(self): def api_version(self, api_version): """Sets the api_version of this V1TFJobList. - APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources # noqa: E501 + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 :param api_version: The api_version of this V1TFJobList. # noqa: E501 :type: str @@ -121,7 +109,7 @@ def items(self, items): :param items: The items of this V1TFJobList. # noqa: E501 :type: list[V1TFJob] """ - if items is None: + if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 self._items = items @@ -130,7 +118,7 @@ def items(self, items): def kind(self): """Gets the kind of this V1TFJobList. # noqa: E501 - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds # noqa: E501 + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 :return: The kind of this V1TFJobList. # noqa: E501 :rtype: str @@ -141,7 +129,7 @@ def kind(self): def kind(self, kind): """Sets the kind of this V1TFJobList. - Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds # noqa: E501 + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 :param kind: The kind of this V1TFJobList. # noqa: E501 :type: str @@ -153,10 +141,9 @@ def kind(self, kind): def metadata(self): """Gets the metadata of this V1TFJobList. # noqa: E501 - Standard list metadata. # noqa: E501 :return: The metadata of this V1TFJobList. # noqa: E501 - :rtype: V1ListMeta + :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta """ return self._metadata @@ -164,10 +151,9 @@ def metadata(self): def metadata(self, metadata): """Sets the metadata of this V1TFJobList. - Standard list metadata. # noqa: E501 :param metadata: The metadata of this V1TFJobList. # noqa: E501 - :type: V1ListMeta + :type: K8sIoApimachineryPkgApisMetaV1ListMeta """ self._metadata = metadata @@ -176,7 +162,7 @@ def to_dict(self): """Returns the model properties as a dict""" result = {} - for attr, _ in six.iteritems(self.swagger_types): + for attr, _ in six.iteritems(self.openapi_types): value = getattr(self, attr) if isinstance(value, list): result[attr] = list(map( @@ -193,9 +179,6 @@ def to_dict(self): )) else: result[attr] = value - if issubclass(V1TFJobList, dict): - for key, value in self.items(): - result[key] = value return result @@ -212,8 +195,11 @@ def __eq__(self, other): if not isinstance(other, V1TFJobList): return False - return self.__dict__ == other.__dict__ + return self.to_dict() == other.to_dict() def __ne__(self, other): """Returns true if both objects are not equal""" - return not self == other + if not isinstance(other, V1TFJobList): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_tf_job_spec.py b/sdk/python/kubeflow/training/models/v1_tf_job_spec.py new file mode 100644 index 0000000000..1c347b4c2c --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_tf_job_spec.py @@ -0,0 +1,206 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1TFJobSpec(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'enable_dynamic_worker': 'bool', + 'run_policy': 'V1RunPolicy', + 'success_policy': 'str', + 'tf_replica_specs': 'dict(str, V1ReplicaSpec)' + } + + attribute_map = { + 'enable_dynamic_worker': 'enableDynamicWorker', + 'run_policy': 'runPolicy', + 'success_policy': 'successPolicy', + 'tf_replica_specs': 'tfReplicaSpecs' + } + + def __init__(self, enable_dynamic_worker=None, run_policy=None, success_policy=None, tf_replica_specs=None, local_vars_configuration=None): # noqa: E501 + """V1TFJobSpec - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._enable_dynamic_worker = None + self._run_policy = None + self._success_policy = None + self._tf_replica_specs = None + self.discriminator = None + + if enable_dynamic_worker is not None: + self.enable_dynamic_worker = enable_dynamic_worker + self.run_policy = run_policy + if success_policy is not None: + self.success_policy = success_policy + self.tf_replica_specs = tf_replica_specs + + @property + def enable_dynamic_worker(self): + """Gets the enable_dynamic_worker of this V1TFJobSpec. # noqa: E501 + + A switch to enable dynamic worker # noqa: E501 + + :return: The enable_dynamic_worker of this V1TFJobSpec. # noqa: E501 + :rtype: bool + """ + return self._enable_dynamic_worker + + @enable_dynamic_worker.setter + def enable_dynamic_worker(self, enable_dynamic_worker): + """Sets the enable_dynamic_worker of this V1TFJobSpec. + + A switch to enable dynamic worker # noqa: E501 + + :param enable_dynamic_worker: The enable_dynamic_worker of this V1TFJobSpec. # noqa: E501 + :type: bool + """ + + self._enable_dynamic_worker = enable_dynamic_worker + + @property + def run_policy(self): + """Gets the run_policy of this V1TFJobSpec. # noqa: E501 + + + :return: The run_policy of this V1TFJobSpec. # noqa: E501 + :rtype: V1RunPolicy + """ + return self._run_policy + + @run_policy.setter + def run_policy(self, run_policy): + """Sets the run_policy of this V1TFJobSpec. + + + :param run_policy: The run_policy of this V1TFJobSpec. # noqa: E501 + :type: V1RunPolicy + """ + if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 + raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 + + self._run_policy = run_policy + + @property + def success_policy(self): + """Gets the success_policy of this V1TFJobSpec. # noqa: E501 + + SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules. # noqa: E501 + + :return: The success_policy of this V1TFJobSpec. # noqa: E501 + :rtype: str + """ + return self._success_policy + + @success_policy.setter + def success_policy(self, success_policy): + """Sets the success_policy of this V1TFJobSpec. + + SuccessPolicy defines the policy to mark the TFJob as succeeded. Default to \"\", using the default rules. # noqa: E501 + + :param success_policy: The success_policy of this V1TFJobSpec. # noqa: E501 + :type: str + """ + + self._success_policy = success_policy + + @property + def tf_replica_specs(self): + """Gets the tf_replica_specs of this V1TFJobSpec. # noqa: E501 + + A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } # noqa: E501 + + :return: The tf_replica_specs of this V1TFJobSpec. # noqa: E501 + :rtype: dict(str, V1ReplicaSpec) + """ + return self._tf_replica_specs + + @tf_replica_specs.setter + def tf_replica_specs(self, tf_replica_specs): + """Sets the tf_replica_specs of this V1TFJobSpec. + + A map of TFReplicaType (type) to ReplicaSpec (value). Specifies the TF cluster configuration. For example, { \"PS\": ReplicaSpec, \"Worker\": ReplicaSpec, } # noqa: E501 + + :param tf_replica_specs: The tf_replica_specs of this V1TFJobSpec. # noqa: E501 + :type: dict(str, V1ReplicaSpec) + """ + if self.local_vars_configuration.client_side_validation and tf_replica_specs is None: # noqa: E501 + raise ValueError("Invalid value for `tf_replica_specs`, must not be `None`") # noqa: E501 + + self._tf_replica_specs = tf_replica_specs + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1TFJobSpec): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1TFJobSpec): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_xg_boost_job.py b/sdk/python/kubeflow/training/models/v1_xg_boost_job.py new file mode 100644 index 0000000000..294d06c605 --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_xg_boost_job.py @@ -0,0 +1,228 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1XGBoostJob(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'api_version': 'str', + 'kind': 'str', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'spec': 'V1XGBoostJobSpec', + 'status': 'V1JobStatus' + } + + attribute_map = { + 'api_version': 'apiVersion', + 'kind': 'kind', + 'metadata': 'metadata', + 'spec': 'spec', + 'status': 'status' + } + + def __init__(self, api_version=None, kind=None, metadata=None, spec=None, status=None, local_vars_configuration=None): # noqa: E501 + """V1XGBoostJob - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._api_version = None + self._kind = None + self._metadata = None + self._spec = None + self._status = None + self.discriminator = None + + if api_version is not None: + self.api_version = api_version + if kind is not None: + self.kind = kind + if metadata is not None: + self.metadata = metadata + if spec is not None: + self.spec = spec + if status is not None: + self.status = status + + @property + def api_version(self): + """Gets the api_version of this V1XGBoostJob. # noqa: E501 + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :return: The api_version of this V1XGBoostJob. # noqa: E501 + :rtype: str + """ + return self._api_version + + @api_version.setter + def api_version(self, api_version): + """Sets the api_version of this V1XGBoostJob. + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :param api_version: The api_version of this V1XGBoostJob. # noqa: E501 + :type: str + """ + + self._api_version = api_version + + @property + def kind(self): + """Gets the kind of this V1XGBoostJob. # noqa: E501 + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :return: The kind of this V1XGBoostJob. # noqa: E501 + :rtype: str + """ + return self._kind + + @kind.setter + def kind(self, kind): + """Sets the kind of this V1XGBoostJob. + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :param kind: The kind of this V1XGBoostJob. # noqa: E501 + :type: str + """ + + self._kind = kind + + @property + def metadata(self): + """Gets the metadata of this V1XGBoostJob. # noqa: E501 + + + :return: The metadata of this V1XGBoostJob. # noqa: E501 + :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + """ + return self._metadata + + @metadata.setter + def metadata(self, metadata): + """Sets the metadata of this V1XGBoostJob. + + + :param metadata: The metadata of this V1XGBoostJob. # noqa: E501 + :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + """ + + self._metadata = metadata + + @property + def spec(self): + """Gets the spec of this V1XGBoostJob. # noqa: E501 + + + :return: The spec of this V1XGBoostJob. # noqa: E501 + :rtype: V1XGBoostJobSpec + """ + return self._spec + + @spec.setter + def spec(self, spec): + """Sets the spec of this V1XGBoostJob. + + + :param spec: The spec of this V1XGBoostJob. # noqa: E501 + :type: V1XGBoostJobSpec + """ + + self._spec = spec + + @property + def status(self): + """Gets the status of this V1XGBoostJob. # noqa: E501 + + + :return: The status of this V1XGBoostJob. # noqa: E501 + :rtype: V1JobStatus + """ + return self._status + + @status.setter + def status(self, status): + """Sets the status of this V1XGBoostJob. + + + :param status: The status of this V1XGBoostJob. # noqa: E501 + :type: V1JobStatus + """ + + self._status = status + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1XGBoostJob): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1XGBoostJob): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py b/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py new file mode 100644 index 0000000000..5bed85c8a2 --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py @@ -0,0 +1,203 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1XGBoostJobList(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'api_version': 'str', + 'items': 'list[V1XGBoostJob]', + 'kind': 'str', + 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + } + + attribute_map = { + 'api_version': 'apiVersion', + 'items': 'items', + 'kind': 'kind', + 'metadata': 'metadata' + } + + def __init__(self, api_version=None, items=None, kind=None, metadata=None, local_vars_configuration=None): # noqa: E501 + """V1XGBoostJobList - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._api_version = None + self._items = None + self._kind = None + self._metadata = None + self.discriminator = None + + if api_version is not None: + self.api_version = api_version + self.items = items + if kind is not None: + self.kind = kind + if metadata is not None: + self.metadata = metadata + + @property + def api_version(self): + """Gets the api_version of this V1XGBoostJobList. # noqa: E501 + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :return: The api_version of this V1XGBoostJobList. # noqa: E501 + :rtype: str + """ + return self._api_version + + @api_version.setter + def api_version(self, api_version): + """Sets the api_version of this V1XGBoostJobList. + + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources # noqa: E501 + + :param api_version: The api_version of this V1XGBoostJobList. # noqa: E501 + :type: str + """ + + self._api_version = api_version + + @property + def items(self): + """Gets the items of this V1XGBoostJobList. # noqa: E501 + + + :return: The items of this V1XGBoostJobList. # noqa: E501 + :rtype: list[V1XGBoostJob] + """ + return self._items + + @items.setter + def items(self, items): + """Sets the items of this V1XGBoostJobList. + + + :param items: The items of this V1XGBoostJobList. # noqa: E501 + :type: list[V1XGBoostJob] + """ + if self.local_vars_configuration.client_side_validation and items is None: # noqa: E501 + raise ValueError("Invalid value for `items`, must not be `None`") # noqa: E501 + + self._items = items + + @property + def kind(self): + """Gets the kind of this V1XGBoostJobList. # noqa: E501 + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :return: The kind of this V1XGBoostJobList. # noqa: E501 + :rtype: str + """ + return self._kind + + @kind.setter + def kind(self, kind): + """Sets the kind of this V1XGBoostJobList. + + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds # noqa: E501 + + :param kind: The kind of this V1XGBoostJobList. # noqa: E501 + :type: str + """ + + self._kind = kind + + @property + def metadata(self): + """Gets the metadata of this V1XGBoostJobList. # noqa: E501 + + + :return: The metadata of this V1XGBoostJobList. # noqa: E501 + :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + """ + return self._metadata + + @metadata.setter + def metadata(self, metadata): + """Sets the metadata of this V1XGBoostJobList. + + + :param metadata: The metadata of this V1XGBoostJobList. # noqa: E501 + :type: K8sIoApimachineryPkgApisMetaV1ListMeta + """ + + self._metadata = metadata + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1XGBoostJobList): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1XGBoostJobList): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/training/models/v1_xg_boost_job_spec.py b/sdk/python/kubeflow/training/models/v1_xg_boost_job_spec.py new file mode 100644 index 0000000000..4d90947bc5 --- /dev/null +++ b/sdk/python/kubeflow/training/models/v1_xg_boost_job_spec.py @@ -0,0 +1,148 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kubeflow.training.configuration import Configuration + + +class V1XGBoostJobSpec(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'run_policy': 'V1RunPolicy', + 'xgb_replica_specs': 'dict(str, V1ReplicaSpec)' + } + + attribute_map = { + 'run_policy': 'runPolicy', + 'xgb_replica_specs': 'xgbReplicaSpecs' + } + + def __init__(self, run_policy=None, xgb_replica_specs=None, local_vars_configuration=None): # noqa: E501 + """V1XGBoostJobSpec - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._run_policy = None + self._xgb_replica_specs = None + self.discriminator = None + + self.run_policy = run_policy + self.xgb_replica_specs = xgb_replica_specs + + @property + def run_policy(self): + """Gets the run_policy of this V1XGBoostJobSpec. # noqa: E501 + + + :return: The run_policy of this V1XGBoostJobSpec. # noqa: E501 + :rtype: V1RunPolicy + """ + return self._run_policy + + @run_policy.setter + def run_policy(self, run_policy): + """Sets the run_policy of this V1XGBoostJobSpec. + + + :param run_policy: The run_policy of this V1XGBoostJobSpec. # noqa: E501 + :type: V1RunPolicy + """ + if self.local_vars_configuration.client_side_validation and run_policy is None: # noqa: E501 + raise ValueError("Invalid value for `run_policy`, must not be `None`") # noqa: E501 + + self._run_policy = run_policy + + @property + def xgb_replica_specs(self): + """Gets the xgb_replica_specs of this V1XGBoostJobSpec. # noqa: E501 + + + :return: The xgb_replica_specs of this V1XGBoostJobSpec. # noqa: E501 + :rtype: dict(str, V1ReplicaSpec) + """ + return self._xgb_replica_specs + + @xgb_replica_specs.setter + def xgb_replica_specs(self, xgb_replica_specs): + """Sets the xgb_replica_specs of this V1XGBoostJobSpec. + + + :param xgb_replica_specs: The xgb_replica_specs of this V1XGBoostJobSpec. # noqa: E501 + :type: dict(str, V1ReplicaSpec) + """ + if self.local_vars_configuration.client_side_validation and xgb_replica_specs is None: # noqa: E501 + raise ValueError("Invalid value for `xgb_replica_specs`, must not be `None`") # noqa: E501 + + self._xgb_replica_specs = xgb_replica_specs + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V1XGBoostJobSpec): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V1XGBoostJobSpec): + return True + + return self.to_dict() != other.to_dict() diff --git a/sdk/python/kubeflow/tfjob/rest.py b/sdk/python/kubeflow/training/rest.py similarity index 85% rename from sdk/python/kubeflow/tfjob/rest.py rename to sdk/python/kubeflow/training/rest.py index aad1ef56fe..29759aef11 100644 --- a/sdk/python/kubeflow/tfjob/rest.py +++ b/sdk/python/kubeflow/training/rest.py @@ -1,27 +1,12 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # coding: utf-8 """ - tfjob - - Python SDK for TF-Operator # noqa: E501 + tensorflow - OpenAPI spec version: v0.1 + Python SDK for tensorflow # noqa: E501 - Generated by: https://github.com/swagger-api/swagger-codegen.git + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech """ @@ -37,11 +22,9 @@ # python 2 and python 3 compatibility library import six from six.moves.urllib.parse import urlencode +import urllib3 -try: - import urllib3 -except ImportError: - raise ImportError('Swagger python client requires urllib3.') +from kubeflow.training.exceptions import ApiException, ApiValueError logger = logging.getLogger(__name__) @@ -90,6 +73,9 @@ def __init__(self, configuration, pools_size=4, maxsize=None): if configuration.assert_hostname is not None: addition_pool_args['assert_hostname'] = configuration.assert_hostname # noqa: E501 + if configuration.retries is not None: + addition_pool_args['retries'] = configuration.retries + if maxsize is None: if configuration.connection_pool_maxsize is not None: maxsize = configuration.connection_pool_maxsize @@ -106,6 +92,7 @@ def __init__(self, configuration, pools_size=4, maxsize=None): cert_file=configuration.cert_file, key_file=configuration.key_file, proxy_url=configuration.proxy, + proxy_headers=configuration.proxy_headers, **addition_pool_args ) else: @@ -145,7 +132,7 @@ def request(self, method, url, query_params=None, headers=None, 'PATCH', 'OPTIONS'] if post_params and body: - raise ValueError( + raise ApiValueError( "body parameter cannot be used with post_params parameter." ) @@ -202,7 +189,7 @@ def request(self, method, url, query_params=None, headers=None, # Pass a `string` parameter directly in the body to support # other content types than Json when `body` argument is # provided in serialized form - elif isinstance(body, str): + elif isinstance(body, str) or isinstance(body, bytes): request_body = body r = self.pool_manager.request( method, url, @@ -230,11 +217,6 @@ def request(self, method, url, query_params=None, headers=None, if _preload_content: r = RESTResponse(r) - # In the python 3, the response.data is bytes. - # we need to decode it to string. - if six.PY3: - r.data = r.data.decode('utf8') - # log response body logger.debug("response body: %s", r.data) @@ -307,31 +289,3 @@ def PATCH(self, url, headers=None, query_params=None, post_params=None, _preload_content=_preload_content, _request_timeout=_request_timeout, body=body) - - -class ApiException(Exception): - - def __init__(self, status=None, reason=None, http_resp=None): - if http_resp: - self.status = http_resp.status - self.reason = http_resp.reason - self.body = http_resp.data - self.headers = http_resp.getheaders() - else: - self.status = status - self.reason = reason - self.body = None - self.headers = None - - def __str__(self): - """Custom error messages for exception""" - error_message = "({0})\n"\ - "Reason: {1}\n".format(self.status, self.reason) - if self.headers: - error_message += "HTTP response headers: {0}\n".format( - self.headers) - - if self.body: - error_message += "HTTP response body: {0}\n".format(self.body) - - return error_message diff --git a/sdk/python/kubeflow/tfjob/utils/__init__.py b/sdk/python/kubeflow/training/utils/__init__.py similarity index 100% rename from sdk/python/kubeflow/tfjob/utils/__init__.py rename to sdk/python/kubeflow/training/utils/__init__.py diff --git a/sdk/python/kubeflow/training/utils/utils.py b/sdk/python/kubeflow/training/utils/utils.py new file mode 100644 index 0000000000..e04f46bbbb --- /dev/null +++ b/sdk/python/kubeflow/training/utils/utils.py @@ -0,0 +1,108 @@ +# Copyright 2019 kubeflow.org. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from kubeflow.training.constants import constants + + +def is_running_in_k8s(): + return os.path.isdir('/var/run/secrets/kubernetes.io/') + + +def get_current_k8s_namespace(): + with open('/var/run/secrets/kubernetes.io/serviceaccount/namespace', 'r') as f: + return f.readline() + + +def get_default_target_namespace(): + if not is_running_in_k8s(): + return 'default' + return get_current_k8s_namespace() + + +def set_tfjob_namespace(tfjob): + tfjob_namespace = tfjob.metadata.namespace + namespace = tfjob_namespace or get_default_target_namespace() + return namespace + + +def set_pytorchjob_namespace(pytorchjob): + pytorchjob_namespace = pytorchjob.metadata.namespace + namespace = pytorchjob_namespace or get_default_target_namespace() + return namespace + + +def get_tfjob_labels(name, master=False, replica_type=None, replica_index=None): + """ + Get labels according to speficed flags. + :param name: tfjob name + :param master: if need include label 'job-role: master'. + :param replica_type: User can specify one of 'worker, ps, chief to only' get one type pods. + :param replica_index: Can specfy replica index to get one pod of TFJob. + :return: Dict: Labels + """ + labels = { + constants.TFJOB_GROUP_LABEL: 'kubeflow.org', + constants.TFJOB_NAME_LABEL: name, + } + + if master: + labels[constants.TFJOB_ROLE_LABEL] = 'master' + + if replica_type: + labels[constants.TFJOB_TYPE_LABEL] = str.lower(replica_type) + + if replica_index: + labels[constants.TFJOB_INDEX_LABEL] = replica_index + + return labels + + +def get_pytorchjob_labels(name, master=False, replica_type=None, replica_index=None): + """ + Get labels according to speficed flags. + :param name: PyTorchJob name + :param master: if need include label 'job-role: master'. + :param replica_type: User can specify one of 'worker, ps, chief to only' get one type pods. + :param replica_index: Can specfy replica index to get one pod of PyTorchJob. + :return: Dict: Labels + """ + labels = { + constants.PYTORCHJOB_GROUP_LABEL: 'kubeflow.org', + constants.PYTORCHJOB_CONTROLLER_LABEL: 'pytorch-operator', + constants.PYTORCHJOB_NAME_LABEL: name, + } + + if master: + labels[constants.PYTORCHJOB_ROLE_LABEL] = 'master' + + if replica_type: + labels[constants.PYTORCHJOB_TYPE_LABEL] = str.lower(replica_type) + + if replica_index: + labels[constants.PYTORCHJOB_INDEX_LABEL] = replica_index + + return labels + + +def to_selector(labels): + """ + Transfer Labels to selector. + """ + parts = [] + for key in labels.keys(): + parts.append("{0}={1}".format(key, labels[key])) + + return ",".join(parts) diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 7b0e0d448e..48731164d3 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -3,5 +3,5 @@ six>=1.10 python_dateutil>=2.5.3 setuptools>=21.0.0 urllib3>=1.15.1 -kubernetes==10.0.1 +kubernetes>=12.0.0 table_logger>=0.3.5 diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 3e4072430c..d2420133a8 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -24,14 +24,14 @@ REQUIRES = f.readlines() setuptools.setup( - name='kubeflow-tfjob', - version='0.1.4', + name='kubeflow-training', + version='1.3.0', author="Kubeflow Authors", author_email='hejinchi@cn.ibm.com', license="Apache License Version 2.0", url="https://github.com/kubeflow/tf-operator/sdk/python", - description="TFJob Python SDK", - long_description="TFJob Python SDK", + description="Training Operator Python SDK", + long_description="Training Operator Python SDK", packages=setuptools.find_packages( include=("kubeflow*")), package_data={}, diff --git a/sdk/python/test/e2e/__init__.py b/sdk/python/test/e2e/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/test/e2e/test_e2e_pytorchjob.py b/sdk/python/test/e2e/test_e2e_pytorchjob.py new file mode 100644 index 0000000000..f241b38a44 --- /dev/null +++ b/sdk/python/test/e2e/test_e2e_pytorchjob.py @@ -0,0 +1,82 @@ +# Copyright 2019 kubeflow.org. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +from kubernetes.client import V1PodTemplateSpec +from kubernetes.client import V1ObjectMeta +from kubernetes.client import V1PodSpec +from kubernetes.client import V1Container + +from kubeflow.training.api.py_torch_job_client import PyTorchJobClient +from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec +from kubeflow.training.models.v1_py_torch_job import V1PyTorchJob +from kubeflow.training.models.v1_py_torch_job_spec import V1PyTorchJobSpec +from kubeflow.training.models.v1_run_policy import V1RunPolicy + +PYTORCH_CLIENT = PyTorchJobClient(config_file=os.getenv('KUBECONFIG', '~/.kube/config')) +SDK_TEST_NAMESPACE = 'default' + + +def test_sdk_e2e(): + container = V1Container( + name="pytorch", + image="gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0", + args=["--backend", "gloo"], + ) + + master = V1ReplicaSpec( + replicas=1, + restart_policy="OnFailure", + template=V1PodTemplateSpec( + spec=V1PodSpec( + containers=[container] + ) + ) + ) + + worker = V1ReplicaSpec( + replicas=1, + restart_policy="OnFailure", + template=V1PodTemplateSpec( + spec=V1PodSpec( + containers=[container] + ) + ) + ) + + pytorchjob = V1PyTorchJob( + api_version="kubeflow.org/v1", + kind="PyTorchJob", + metadata=V1ObjectMeta(name="pytorchjob-mnist-ci-test", namespace=SDK_TEST_NAMESPACE), + spec=V1PyTorchJobSpec( + run_policy=V1RunPolicy( + clean_pod_policy="None", + ), + pytorch_replica_specs={"Master": master, + "Worker": worker} + ) + ) + + PYTORCH_CLIENT.create(pytorchjob) + + PYTORCH_CLIENT.wait_for_job("pytorchjob-mnist-ci-test", namespace=SDK_TEST_NAMESPACE) + if not PYTORCH_CLIENT.is_job_succeeded("pytorchjob-mnist-ci-test", + namespace=SDK_TEST_NAMESPACE): + raise RuntimeError("The PyTorchJob is not succeeded.") + + PYTORCH_CLIENT.get_logs("pytorchjob-mnist-ci-test", namespace=SDK_TEST_NAMESPACE) + + PYTORCH_CLIENT.delete("pytorchjob-mnist-ci-test", namespace=SDK_TEST_NAMESPACE) diff --git a/sdk/python/test/e2e/test_e2e_tfjob.py b/sdk/python/test/e2e/test_e2e_tfjob.py new file mode 100644 index 0000000000..63ce69b29d --- /dev/null +++ b/sdk/python/test/e2e/test_e2e_tfjob.py @@ -0,0 +1,74 @@ +# Copyright 2019 kubeflow.org. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from kubernetes.client import V1PodTemplateSpec +from kubernetes.client import V1ObjectMeta +from kubernetes.client import V1PodSpec +from kubernetes.client import V1Container + +from kubeflow.training.api.tf_job_client import TFJobClient +from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec +from kubeflow.training.models.v1_run_policy import V1RunPolicy +from kubeflow.training.models.v1_tf_job import V1TFJob +from kubeflow.training.models.v1_tf_job_spec import V1TFJobSpec + +TFJOB_CLIENT = TFJobClient(config_file=os.getenv('KUBECONFIG')) +SDK_TEST_NAMESPACE = 'kubeflow' + + +def test_sdk_e2e(): + container = V1Container( + name="tensorflow", + image="gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", + command=[ + "python", + "/var/tf_mnist/mnist_with_summaries.py", + "--log_dir=/train/logs", "--learning_rate=0.01", + "--batch_size=150" + ] + ) + + worker = V1ReplicaSpec( + replicas=1, + restart_policy="Never", + template=V1PodTemplateSpec( + spec=V1PodSpec( + containers=[container] + ) + ) + ) + + tfjob = V1TFJob( + api_version="kubeflow.org/v1", + kind="TFJob", + metadata=V1ObjectMeta(name="mnist-ci-test", namespace=SDK_TEST_NAMESPACE), + spec=V1TFJobSpec( + run_policy=V1RunPolicy( + clean_pod_policy="None", + ), + tf_replica_specs={"Worker": worker} + ) + ) + + TFJOB_CLIENT.create(tfjob, namespace=SDK_TEST_NAMESPACE) + + TFJOB_CLIENT.wait_for_job("mnist-ci-test", namespace=SDK_TEST_NAMESPACE) + if not TFJOB_CLIENT.is_job_succeeded("mnist-ci-test", namespace=SDK_TEST_NAMESPACE): + raise RuntimeError("The TFJob is not succeeded.") + + TFJOB_CLIENT.get_logs("mnist-ci-test", master=False, namespace=SDK_TEST_NAMESPACE) + + TFJOB_CLIENT.delete("mnist-ci-test", namespace=SDK_TEST_NAMESPACE) diff --git a/sdk/python/test/models/__init__.py b/sdk/python/test/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/test/models/test_v1_job_condition.py b/sdk/python/test/models/test_v1_job_condition.py new file mode 100644 index 0000000000..a81a0f0341 --- /dev/null +++ b/sdk/python/test/models/test_v1_job_condition.py @@ -0,0 +1,59 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_job_condition import V1JobCondition # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1JobCondition(unittest.TestCase): + """V1JobCondition unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1JobCondition + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_job_condition.V1JobCondition() # noqa: E501 + if include_optional : + return V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0' + ) + else : + return V1JobCondition( + status = '0', + type = '0', + ) + + def testV1JobCondition(self): + """Test V1JobCondition""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_job_status.py b/sdk/python/test/models/test_v1_job_status.py new file mode 100644 index 0000000000..841db2da67 --- /dev/null +++ b/sdk/python/test/models/test_v1_job_status.py @@ -0,0 +1,84 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_job_status import V1JobStatus # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1JobStatus(unittest.TestCase): + """V1JobStatus unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1JobStatus + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_job_status.V1JobStatus() # noqa: E501 + if include_optional : + return V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None + ) + else : + return V1JobStatus( + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + ) + + def testV1JobStatus(self): + """Test V1JobStatus""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_mx_job.py b/sdk/python/test/models/test_v1_mx_job.py new file mode 100644 index 0000000000..d2e2548c77 --- /dev/null +++ b/sdk/python/test/models/test_v1_mx_job.py @@ -0,0 +1,93 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_mx_job import V1MXJob # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1MXJob(unittest.TestCase): + """V1MXJob unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1MXJob + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_mx_job.V1MXJob() # noqa: E501 + if include_optional : + return V1MXJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1MXJobSpec( + job_mode = '0', + mx_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ) + ) + else : + return V1MXJob( + ) + + def testV1MXJob(self): + """Test V1MXJob""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_mx_job_list.py b/sdk/python/test/models/test_v1_mx_job_list.py new file mode 100644 index 0000000000..da9c6cb845 --- /dev/null +++ b/sdk/python/test/models/test_v1_mx_job_list.py @@ -0,0 +1,144 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_mx_job_list import V1MXJobList # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1MXJobList(unittest.TestCase): + """V1MXJobList unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1MXJobList + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_mx_job_list.V1MXJobList() # noqa: E501 + if include_optional : + return V1MXJobList( + api_version = '0', + items = [ + V1MXJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1MXJobSpec( + job_mode = '0', + mx_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + kind = '0', + metadata = None + ) + else : + return V1MXJobList( + items = [ + V1MXJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1MXJobSpec( + job_mode = '0', + mx_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + ) + + def testV1MXJobList(self): + """Test V1MXJobList""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_mx_job_spec.py b/sdk/python/test/models/test_v1_mx_job_spec.py new file mode 100644 index 0000000000..9b1c6c2ca9 --- /dev/null +++ b/sdk/python/test/models/test_v1_mx_job_spec.py @@ -0,0 +1,89 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_mx_job_spec import V1MXJobSpec # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1MXJobSpec(unittest.TestCase): + """V1MXJobSpec unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1MXJobSpec + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_mx_job_spec.V1MXJobSpec() # noqa: E501 + if include_optional : + return V1MXJobSpec( + job_mode = '0', + mx_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ) + ) + else : + return V1MXJobSpec( + job_mode = '0', + mx_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + ) + + def testV1MXJobSpec(self): + """Test V1MXJobSpec""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_py_torch_job.py b/sdk/python/test/models/test_v1_py_torch_job.py new file mode 100644 index 0000000000..3a6c90ffc9 --- /dev/null +++ b/sdk/python/test/models/test_v1_py_torch_job.py @@ -0,0 +1,92 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_py_torch_job import V1PyTorchJob # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1PyTorchJob(unittest.TestCase): + """V1PyTorchJob unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1PyTorchJob + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_py_torch_job.V1PyTorchJob() # noqa: E501 + if include_optional : + return V1PyTorchJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1PyTorchJobSpec( + pytorch_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ) + ) + else : + return V1PyTorchJob( + ) + + def testV1PyTorchJob(self): + """Test V1PyTorchJob""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_py_torch_job_list.py b/sdk/python/test/models/test_v1_py_torch_job_list.py new file mode 100644 index 0000000000..8dbbc71dd8 --- /dev/null +++ b/sdk/python/test/models/test_v1_py_torch_job_list.py @@ -0,0 +1,142 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_py_torch_job_list import V1PyTorchJobList # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1PyTorchJobList(unittest.TestCase): + """V1PyTorchJobList unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1PyTorchJobList + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_py_torch_job_list.V1PyTorchJobList() # noqa: E501 + if include_optional : + return V1PyTorchJobList( + api_version = '0', + items = [ + V1PyTorchJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1PyTorchJobSpec( + pytorch_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + kind = '0', + metadata = None + ) + else : + return V1PyTorchJobList( + items = [ + V1PyTorchJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1PyTorchJobSpec( + pytorch_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + ) + + def testV1PyTorchJobList(self): + """Test V1PyTorchJobList""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_py_torch_job_spec.py b/sdk/python/test/models/test_v1_py_torch_job_spec.py new file mode 100644 index 0000000000..f0c5c9a7bd --- /dev/null +++ b/sdk/python/test/models/test_v1_py_torch_job_spec.py @@ -0,0 +1,87 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_py_torch_job_spec import V1PyTorchJobSpec # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1PyTorchJobSpec(unittest.TestCase): + """V1PyTorchJobSpec unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1PyTorchJobSpec + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_py_torch_job_spec.V1PyTorchJobSpec() # noqa: E501 + if include_optional : + return V1PyTorchJobSpec( + pytorch_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ) + ) + else : + return V1PyTorchJobSpec( + pytorch_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + ) + + def testV1PyTorchJobSpec(self): + """Test V1PyTorchJobSpec""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_replica_spec.py b/sdk/python/test/models/test_v1_replica_spec.py new file mode 100644 index 0000000000..6c5f42f1cc --- /dev/null +++ b/sdk/python/test/models/test_v1_replica_spec.py @@ -0,0 +1,54 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1ReplicaSpec(unittest.TestCase): + """V1ReplicaSpec unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1ReplicaSpec + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_replica_spec.V1ReplicaSpec() # noqa: E501 + if include_optional : + return V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None + ) + else : + return V1ReplicaSpec( + ) + + def testV1ReplicaSpec(self): + """Test V1ReplicaSpec""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_replica_status.py b/sdk/python/test/models/test_v1_replica_status.py new file mode 100644 index 0000000000..864e56b222 --- /dev/null +++ b/sdk/python/test/models/test_v1_replica_status.py @@ -0,0 +1,54 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_replica_status import V1ReplicaStatus # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1ReplicaStatus(unittest.TestCase): + """V1ReplicaStatus unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1ReplicaStatus + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_replica_status.V1ReplicaStatus() # noqa: E501 + if include_optional : + return V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56 + ) + else : + return V1ReplicaStatus( + ) + + def testV1ReplicaStatus(self): + """Test V1ReplicaStatus""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_run_policy.py b/sdk/python/test/models/test_v1_run_policy.py new file mode 100644 index 0000000000..1052fa3227 --- /dev/null +++ b/sdk/python/test/models/test_v1_run_policy.py @@ -0,0 +1,62 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_run_policy import V1RunPolicy # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1RunPolicy(unittest.TestCase): + """V1RunPolicy unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1RunPolicy + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_run_policy.V1RunPolicy() # noqa: E501 + if include_optional : + return V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56 + ) + else : + return V1RunPolicy( + ) + + def testV1RunPolicy(self): + """Test V1RunPolicy""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_scheduling_policy.py b/sdk/python/test/models/test_v1_scheduling_policy.py new file mode 100644 index 0000000000..bc9c181c24 --- /dev/null +++ b/sdk/python/test/models/test_v1_scheduling_policy.py @@ -0,0 +1,57 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_scheduling_policy import V1SchedulingPolicy # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1SchedulingPolicy(unittest.TestCase): + """V1SchedulingPolicy unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1SchedulingPolicy + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_scheduling_policy.V1SchedulingPolicy() # noqa: E501 + if include_optional : + return V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0' + ) + else : + return V1SchedulingPolicy( + ) + + def testV1SchedulingPolicy(self): + """Test V1SchedulingPolicy""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_tf_job.py b/sdk/python/test/models/test_v1_tf_job.py new file mode 100644 index 0000000000..91a5cfa949 --- /dev/null +++ b/sdk/python/test/models/test_v1_tf_job.py @@ -0,0 +1,94 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_tf_job import V1TFJob # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1TFJob(unittest.TestCase): + """V1TFJob unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1TFJob + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_tf_job.V1TFJob() # noqa: E501 + if include_optional : + return V1TFJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1TFJobSpec( + enable_dynamic_worker = True, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + success_policy = '0', + tf_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ) + ) + else : + return V1TFJob( + ) + + def testV1TFJob(self): + """Test V1TFJob""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_tf_job_list.py b/sdk/python/test/models/test_v1_tf_job_list.py new file mode 100644 index 0000000000..956ee2403a --- /dev/null +++ b/sdk/python/test/models/test_v1_tf_job_list.py @@ -0,0 +1,146 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_tf_job_list import V1TFJobList # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1TFJobList(unittest.TestCase): + """V1TFJobList unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1TFJobList + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_tf_job_list.V1TFJobList() # noqa: E501 + if include_optional : + return V1TFJobList( + api_version = '0', + items = [ + V1TFJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1TFJobSpec( + enable_dynamic_worker = True, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + success_policy = '0', + tf_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + kind = '0', + metadata = None + ) + else : + return V1TFJobList( + items = [ + V1TFJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1TFJobSpec( + enable_dynamic_worker = True, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + success_policy = '0', + tf_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + ) + + def testV1TFJobList(self): + """Test V1TFJobList""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_tf_job_spec.py b/sdk/python/test/models/test_v1_tf_job_spec.py new file mode 100644 index 0000000000..6099577b20 --- /dev/null +++ b/sdk/python/test/models/test_v1_tf_job_spec.py @@ -0,0 +1,89 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_tf_job_spec import V1TFJobSpec # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1TFJobSpec(unittest.TestCase): + """V1TFJobSpec unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1TFJobSpec + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_tf_job_spec.V1TFJobSpec() # noqa: E501 + if include_optional : + return V1TFJobSpec( + enable_dynamic_worker = True, + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + success_policy = '0', + tf_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + } + ) + else : + return V1TFJobSpec( + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + tf_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + ) + + def testV1TFJobSpec(self): + """Test V1TFJobSpec""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_xg_boost_job.py b/sdk/python/test/models/test_v1_xg_boost_job.py new file mode 100644 index 0000000000..cb079d1936 --- /dev/null +++ b/sdk/python/test/models/test_v1_xg_boost_job.py @@ -0,0 +1,92 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_xg_boost_job import V1XGBoostJob # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1XGBoostJob(unittest.TestCase): + """V1XGBoostJob unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1XGBoostJob + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_xg_boost_job.V1XGBoostJob() # noqa: E501 + if include_optional : + return V1XGBoostJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1XGBoostJobSpec( + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + xgb_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ) + ) + else : + return V1XGBoostJob( + ) + + def testV1XGBoostJob(self): + """Test V1XGBoostJob""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_xg_boost_job_list.py b/sdk/python/test/models/test_v1_xg_boost_job_list.py new file mode 100644 index 0000000000..f8e89e3245 --- /dev/null +++ b/sdk/python/test/models/test_v1_xg_boost_job_list.py @@ -0,0 +1,142 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_xg_boost_job_list import V1XGBoostJobList # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1XGBoostJobList(unittest.TestCase): + """V1XGBoostJobList unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1XGBoostJobList + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_xg_boost_job_list.V1XGBoostJobList() # noqa: E501 + if include_optional : + return V1XGBoostJobList( + api_version = '0', + items = [ + V1XGBoostJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1XGBoostJobSpec( + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + xgb_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + kind = '0', + metadata = None + ) + else : + return V1XGBoostJobList( + items = [ + V1XGBoostJob( + api_version = '0', + kind = '0', + metadata = None, + spec = V1XGBoostJobSpec( + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + xgb_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, ), + status = V1JobStatus( + completion_time = None, + conditions = [ + V1JobCondition( + last_transition_time = None, + last_update_time = None, + message = '0', + reason = '0', + status = '0', + type = '0', ) + ], + last_reconcile_time = None, + replica_statuses = { + 'key' : V1ReplicaStatus( + active = 56, + failed = 56, + succeeded = 56, ) + }, + start_time = None, ), ) + ], + ) + + def testV1XGBoostJobList(self): + """Test V1XGBoostJobList""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/models/test_v1_xg_boost_job_spec.py b/sdk/python/test/models/test_v1_xg_boost_job_spec.py new file mode 100644 index 0000000000..3b19e5ddb7 --- /dev/null +++ b/sdk/python/test/models/test_v1_xg_boost_job_spec.py @@ -0,0 +1,87 @@ +# coding: utf-8 + +""" + tensorflow + + Python SDK for tensorflow # noqa: E501 + + The version of the OpenAPI document: v1.3.0 + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +from kubeflow.training.models import * +from kubeflow.training.models.v1_xg_boost_job_spec import V1XGBoostJobSpec # noqa: E501 +from kubeflow.training.rest import ApiException + +class TestV1XGBoostJobSpec(unittest.TestCase): + """V1XGBoostJobSpec unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V1XGBoostJobSpec + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kubeflow.training.models.v1_xg_boost_job_spec.V1XGBoostJobSpec() # noqa: E501 + if include_optional : + return V1XGBoostJobSpec( + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + xgb_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + } + ) + else : + return V1XGBoostJobSpec( + run_policy = V1RunPolicy( + active_deadline_seconds = 56, + backoff_limit = 56, + clean_pod_policy = '0', + scheduling_policy = V1SchedulingPolicy( + min_available = 56, + min_resources = { + 'key' : None + }, + priority_class = '0', + queue = '0', ), + ttl_seconds_after_finished = 56, ), + xgb_replica_specs = { + 'key' : V1ReplicaSpec( + replicas = 56, + restart_policy = '0', + template = None, ) + }, + ) + + def testV1XGBoostJobSpec(self): + """Test V1XGBoostJobSpec""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/test/test_e2e.py b/sdk/python/test/test_e2e.py deleted file mode 100644 index 882785a795..0000000000 --- a/sdk/python/test/test_e2e.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container - -from kubeflow.tfjob import V1ReplicaSpec -from kubeflow.tfjob import V1TFJob -from kubeflow.tfjob import V1TFJobSpec -from kubeflow.tfjob import TFJobClient - - -TFJOB_CLIENT = TFJobClient(config_file=os.getenv('KUBECONFIG')) -SDK_TEST_NAMESPACE = 'kubeflow' - -def test_sdk_e2e(): - - container = V1Container( - name="tensorflow", - image="gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", - command=[ - "python", - "/var/tf_mnist/mnist_with_summaries.py", - "--log_dir=/train/logs", "--learning_rate=0.01", - "--batch_size=150" - ] - ) - - worker = V1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - spec=V1PodSpec( - containers=[container] - ) - ) - ) - - tfjob = V1TFJob( - api_version="kubeflow.org/v1", - kind="TFJob", - metadata=V1ObjectMeta(name="mnist-ci-test", namespace=SDK_TEST_NAMESPACE), - spec=V1TFJobSpec( - clean_pod_policy="None", - tf_replica_specs={"Worker": worker} - ) - ) - - - TFJOB_CLIENT.create(tfjob, namespace=SDK_TEST_NAMESPACE) - - TFJOB_CLIENT.wait_for_job("mnist-ci-test", namespace=SDK_TEST_NAMESPACE) - if not TFJOB_CLIENT.is_job_succeeded("mnist-ci-test", namespace=SDK_TEST_NAMESPACE): - raise RuntimeError("The TFJob is not succeeded.") - - TFJOB_CLIENT.get_logs("mnist-ci-test", master=False, namespace=SDK_TEST_NAMESPACE) - - TFJOB_CLIENT.delete("mnist-ci-test", namespace=SDK_TEST_NAMESPACE) diff --git a/sdk/python/test/test_v1_job_condition.py b/sdk/python/test/test_v1_job_condition.py deleted file mode 100644 index 8a2e431132..0000000000 --- a/sdk/python/test/test_v1_job_condition.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_job_condition import V1JobCondition # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1JobCondition(unittest.TestCase): - """V1JobCondition unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1JobCondition(self): - """Test V1JobCondition""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_job_condition.V1JobCondition() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/test/test_v1_job_status.py b/sdk/python/test/test_v1_job_status.py deleted file mode 100644 index 1a17dab458..0000000000 --- a/sdk/python/test/test_v1_job_status.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_job_status import V1JobStatus # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1JobStatus(unittest.TestCase): - """V1JobStatus unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1JobStatus(self): - """Test V1JobStatus""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_job_status.V1JobStatus() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/test/test_v1_replica_spec.py b/sdk/python/test/test_v1_replica_spec.py deleted file mode 100644 index 34efefeb4e..0000000000 --- a/sdk/python/test/test_v1_replica_spec.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_replica_spec import V1ReplicaSpec # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1ReplicaSpec(unittest.TestCase): - """V1ReplicaSpec unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1ReplicaSpec(self): - """Test V1ReplicaSpec""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_replica_spec.V1ReplicaSpec() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/test/test_v1_replica_status.py b/sdk/python/test/test_v1_replica_status.py deleted file mode 100644 index 69f4d8f28c..0000000000 --- a/sdk/python/test/test_v1_replica_status.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_replica_status import V1ReplicaStatus # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1ReplicaStatus(unittest.TestCase): - """V1ReplicaStatus unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1ReplicaStatus(self): - """Test V1ReplicaStatus""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_replica_status.V1ReplicaStatus() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/test/test_v1_tf_job.py b/sdk/python/test/test_v1_tf_job.py deleted file mode 100644 index e08d94681c..0000000000 --- a/sdk/python/test/test_v1_tf_job.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_tf_job import V1TFJob # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1TFJob(unittest.TestCase): - """V1TFJob unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1TFJob(self): - """Test V1TFJob""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_tf_job.V1TFJob() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/test/test_v1_tf_job_list.py b/sdk/python/test/test_v1_tf_job_list.py deleted file mode 100644 index 49c7db6001..0000000000 --- a/sdk/python/test/test_v1_tf_job_list.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_tf_job_list import V1TFJobList # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1TFJobList(unittest.TestCase): - """V1TFJobList unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1TFJobList(self): - """Test V1TFJobList""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_tf_job_list.V1TFJobList() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/test/test_v1_tf_job_spec.py b/sdk/python/test/test_v1_tf_job_spec.py deleted file mode 100644 index f0ef7db79a..0000000000 --- a/sdk/python/test/test_v1_tf_job_spec.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2019 kubeflow.org. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -""" - tfjob - - Python SDK for TF-Operator # noqa: E501 - - OpenAPI spec version: v0.1 - - Generated by: https://github.com/swagger-api/swagger-codegen.git -""" - - -from __future__ import absolute_import - -import unittest - -from kubeflow import tfjob -from kubeflow.tfjob.models.v1_tf_job_spec import V1TFJobSpec # noqa: E501 -from kubeflow.tfjob.rest import ApiException - - -class TestV1TFJobSpec(unittest.TestCase): - """V1TFJobSpec unit test stubs""" - - def setUp(self): - pass - - def tearDown(self): - pass - - def testV1TFJobSpec(self): - """Test V1TFJobSpec""" - # FIXME: construct object with mandatory attributes with example values - # model = tfjob.models.v1_tf_job_spec.V1TFJobSpec() # noqa: E501 - pass - - -if __name__ == '__main__': - unittest.main() From e80031fc3a72fd35aa327724f2303d6b2cd6ae66 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Thu, 23 Sep 2021 00:12:05 +0200 Subject: [PATCH 2/8] Update example notebooks --- .../examples/kubeflow-pytorchjob-sdk.ipynb | 533 ++++++++++++++++++ sdk/python/examples/kubeflow-tfjob-sdk.ipynb | 144 +++-- 2 files changed, 616 insertions(+), 61 deletions(-) create mode 100644 sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb diff --git a/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb b/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb new file mode 100644 index 0000000000..bd7b073e03 --- /dev/null +++ b/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb @@ -0,0 +1,533 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sample for Kubeflow PyTorchJob SDK" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a sample for Kubeflow PyTorchJob SDK `kubeflow-pytorchjob`.\n", + "\n", + "The notebook shows how to use Kubeflow PyTorchJob SDK to create, get, wait, check and delete PyTorchJob." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from kubernetes.client import V1PodTemplateSpec\n", + "from kubernetes.client import V1ObjectMeta\n", + "from kubernetes.client import V1PodSpec\n", + "from kubernetes.client import V1Container\n", + "from kubernetes.client import V1ResourceRequirements\n", + "\n", + "from kubeflow.training import constants\n", + "from kubeflow.training.utils import utils\n", + "from kubeflow.training import V1ReplicaSpec\n", + "from kubeflow.training import V1PyTorchJob\n", + "from kubeflow.training import V1PyTorchJobSpec\n", + "from kubeflow.training import V1RunPolicy\n", + "from kubeflow.training.api.py_torch_job_client import PyTorchJobClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define namespace where pytorchjob needs to be created to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "namespace = utils.get_default_target_namespace()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define PyTorchJob" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The demo only creates a worker of PyTorchJob to run mnist sample." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "container = V1Container(\n", + " name=\"pytorch\",\n", + " image=\"gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0\",\n", + " args=[\"--backend\",\"gloo\"]\n", + ")\n", + "\n", + "master = V1ReplicaSpec(\n", + " replicas=1,\n", + " restart_policy=\"OnFailure\",\n", + " template=V1PodTemplateSpec(\n", + " spec=V1PodSpec(\n", + " containers=[container]\n", + " )\n", + " )\n", + ")\n", + "\n", + "worker = V1ReplicaSpec(\n", + " replicas=1,\n", + " restart_policy=\"OnFailure\",\n", + " template=V1PodTemplateSpec(\n", + " spec=V1PodSpec(\n", + " containers=[container]\n", + " )\n", + " )\n", + ")\n", + "\n", + "pytorchjob = V1PyTorchJob(\n", + " api_version=\"kubeflow.org/v1\",\n", + " kind=\"PyTorchJob\",\n", + " metadata=V1ObjectMeta(name=\"pytorch-dist-mnist-gloo\",namespace=namespace),\n", + " spec=V1PyTorchJobSpec(\n", + " run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n", + " pytorch_replica_specs={\"Master\": master,\n", + " \"Worker\": worker}\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create PyTorchJob" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'apiVersion': 'kubeflow.org/v1',\n", + " 'kind': 'PyTorchJob',\n", + " 'metadata': {'creationTimestamp': '2021-09-22T21:39:22Z',\n", + " 'generation': 1,\n", + " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", + " 'fieldsType': 'FieldsV1',\n", + " 'fieldsV1': {'f:spec': {'.': {},\n", + " 'f:pytorchReplicaSpecs': {'.': {},\n", + " 'f:Master': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", + " 'f:Worker': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}},\n", + " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}}}},\n", + " 'manager': 'OpenAPI-Generator',\n", + " 'operation': 'Update',\n", + " 'time': '2021-09-22T21:39:22Z'}],\n", + " 'name': 'pytorch-dist-mnist-gloo',\n", + " 'namespace': 'default',\n", + " 'resourceVersion': '605918',\n", + " 'uid': '00588811-222f-4c06-a07d-e29d9d01bb77'},\n", + " 'spec': {'pytorchReplicaSpecs': {'Master': {'replicas': 1,\n", + " 'restartPolicy': 'OnFailure',\n", + " 'template': {'spec': {'containers': [{'args': ['--backend', 'gloo'],\n", + " 'image': 'gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0',\n", + " 'name': 'pytorch'}]}}},\n", + " 'Worker': {'replicas': 1,\n", + " 'restartPolicy': 'OnFailure',\n", + " 'template': {'spec': {'containers': [{'args': ['--backend', 'gloo'],\n", + " 'image': 'gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0',\n", + " 'name': 'pytorch'}]}}}},\n", + " 'runPolicy': {'cleanPodPolicy': 'None'}}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pytorchjob_client = PyTorchJobClient()\n", + "pytorchjob_client.create(pytorchjob)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the created PyTorchJob " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'apiVersion': 'kubeflow.org/v1',\n", + " 'kind': 'PyTorchJob',\n", + " 'metadata': {'creationTimestamp': '2021-09-22T21:39:22Z',\n", + " 'generation': 1,\n", + " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", + " 'fieldsType': 'FieldsV1',\n", + " 'fieldsV1': {'f:spec': {'.': {},\n", + " 'f:pytorchReplicaSpecs': {'.': {},\n", + " 'f:Master': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {}}},\n", + " 'f:Worker': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {}}}}}},\n", + " 'manager': 'OpenAPI-Generator',\n", + " 'operation': 'Update',\n", + " 'time': '2021-09-22T21:39:22Z'},\n", + " {'apiVersion': 'kubeflow.org/v1',\n", + " 'fieldsType': 'FieldsV1',\n", + " 'fieldsV1': {'f:spec': {'f:cleanPodPolicy': {},\n", + " 'f:pytorchReplicaSpecs': {'f:Master': {'f:template': {'f:metadata': {'.': {},\n", + " 'f:creationTimestamp': {}},\n", + " 'f:spec': {'f:containers': {}}}},\n", + " 'f:Worker': {'f:template': {'f:metadata': {'.': {},\n", + " 'f:creationTimestamp': {}},\n", + " 'f:spec': {'f:containers': {}}}}}},\n", + " 'f:status': {'.': {},\n", + " 'f:conditions': {},\n", + " 'f:replicaStatuses': {'.': {}, 'f:Master': {}, 'f:Worker': {}},\n", + " 'f:startTime': {}}},\n", + " 'manager': 'pytorch-operator.v1',\n", + " 'operation': 'Update',\n", + " 'time': '2021-09-22T21:39:22Z'}],\n", + " 'name': 'pytorch-dist-mnist-gloo',\n", + " 'namespace': 'default',\n", + " 'resourceVersion': '605956',\n", + " 'uid': '00588811-222f-4c06-a07d-e29d9d01bb77'},\n", + " 'spec': {'pytorchReplicaSpecs': {'Master': {'replicas': 1,\n", + " 'restartPolicy': 'OnFailure',\n", + " 'template': {'spec': {'containers': [{'args': ['--backend', 'gloo'],\n", + " 'image': 'gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0',\n", + " 'name': 'pytorch'}]}}},\n", + " 'Worker': {'replicas': 1,\n", + " 'restartPolicy': 'OnFailure',\n", + " 'template': {'spec': {'containers': [{'args': ['--backend', 'gloo'],\n", + " 'image': 'gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0',\n", + " 'name': 'pytorch'}]}}}},\n", + " 'runPolicy': {'cleanPodPolicy': 'None'}},\n", + " 'status': {'conditions': [{'lastTransitionTime': '2021-09-22T21:39:22Z',\n", + " 'lastUpdateTime': '2021-09-22T21:39:22Z',\n", + " 'message': 'PyTorchJob pytorch-dist-mnist-gloo is created.',\n", + " 'reason': 'PyTorchJobCreated',\n", + " 'status': 'True',\n", + " 'type': 'Created'}],\n", + " 'replicaStatuses': {'Master': {}, 'Worker': {}},\n", + " 'startTime': '2021-09-22T21:39:22Z'}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pytorchjob_client.get('pytorch-dist-mnist-gloo')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the PyTorchJob status, check if the PyTorchJob has been started." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Created'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pytorchjob_client.get_job_status('pytorch-dist-mnist-gloo', namespace=namespace)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Wait for the specified PyTorchJob to finish" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NAME STATE TIME \n", + "pytorch-dist-mnist-gloo Created 2021-09-22T21:39:22Z \n", + "pytorch-dist-mnist-gloo Running 2021-09-22T21:40:29Z \n", + "pytorch-dist-mnist-gloo Running 2021-09-22T21:40:29Z \n" + ] + } + ], + "source": [ + "pytorchjob_client.wait_for_job('pytorch-dist-mnist-gloo', namespace=namespace, watch=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check if the PyTorchJob succeeded" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pytorchjob_client.is_job_succeeded('pytorch-dist-mnist-gloo', namespace=namespace)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the PyTorchJob training logs." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The logs of Pod pytorch-dist-mnist-gloo-master-0:\n", + " Using distributed PyTorch with gloo backend\n", + "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n", + "Processing...\n", + "Done!\n", + "Train Epoch: 1 [0/60000 (0%)]\tloss=2.3000\n", + "Train Epoch: 1 [640/60000 (1%)]\tloss=2.2135\n", + "Train Epoch: 1 [1280/60000 (2%)]\tloss=2.1704\n", + "Train Epoch: 1 [1920/60000 (3%)]\tloss=2.0766\n", + "Train Epoch: 1 [2560/60000 (4%)]\tloss=1.8679\n", + "Train Epoch: 1 [3200/60000 (5%)]\tloss=1.4135\n", + "Train Epoch: 1 [3840/60000 (6%)]\tloss=1.0003\n", + "Train Epoch: 1 [4480/60000 (7%)]\tloss=0.7762\n", + "Train Epoch: 1 [5120/60000 (9%)]\tloss=0.4598\n", + "Train Epoch: 1 [5760/60000 (10%)]\tloss=0.4860\n", + "Train Epoch: 1 [6400/60000 (11%)]\tloss=0.4389\n", + "Train Epoch: 1 [7040/60000 (12%)]\tloss=0.4084\n", + "Train Epoch: 1 [7680/60000 (13%)]\tloss=0.4602\n", + "Train Epoch: 1 [8320/60000 (14%)]\tloss=0.4289\n", + "Train Epoch: 1 [8960/60000 (15%)]\tloss=0.3990\n", + "Train Epoch: 1 [9600/60000 (16%)]\tloss=0.3850\n", + "Train Epoch: 1 [10240/60000 (17%)]\tloss=0.2985\n", + "Train Epoch: 1 [10880/60000 (18%)]\tloss=0.5031\n", + "Train Epoch: 1 [11520/60000 (19%)]\tloss=0.5235\n", + "Train Epoch: 1 [12160/60000 (20%)]\tloss=0.3379\n", + "Train Epoch: 1 [12800/60000 (21%)]\tloss=0.3667\n", + "Train Epoch: 1 [13440/60000 (22%)]\tloss=0.4503\n", + "Train Epoch: 1 [14080/60000 (23%)]\tloss=0.3043\n", + "Train Epoch: 1 [14720/60000 (25%)]\tloss=0.3589\n", + "Train Epoch: 1 [15360/60000 (26%)]\tloss=0.3320\n", + "Train Epoch: 1 [16000/60000 (27%)]\tloss=0.4406\n", + "Train Epoch: 1 [16640/60000 (28%)]\tloss=0.3641\n", + "Train Epoch: 1 [17280/60000 (29%)]\tloss=0.3170\n", + "Train Epoch: 1 [17920/60000 (30%)]\tloss=0.2014\n", + "Train Epoch: 1 [18560/60000 (31%)]\tloss=0.4985\n", + "Train Epoch: 1 [19200/60000 (32%)]\tloss=0.3264\n", + "Train Epoch: 1 [19840/60000 (33%)]\tloss=0.1198\n", + "Train Epoch: 1 [20480/60000 (34%)]\tloss=0.1904\n", + "Train Epoch: 1 [21120/60000 (35%)]\tloss=0.1424\n", + "Train Epoch: 1 [21760/60000 (36%)]\tloss=0.3143\n", + "Train Epoch: 1 [22400/60000 (37%)]\tloss=0.1494\n", + "Train Epoch: 1 [23040/60000 (38%)]\tloss=0.2901\n", + "Train Epoch: 1 [23680/60000 (39%)]\tloss=0.4670\n", + "Train Epoch: 1 [24320/60000 (41%)]\tloss=0.2151\n", + "Train Epoch: 1 [24960/60000 (42%)]\tloss=0.1521\n", + "Train Epoch: 1 [25600/60000 (43%)]\tloss=0.2240\n", + "Train Epoch: 1 [26240/60000 (44%)]\tloss=0.2629\n", + "Train Epoch: 1 [26880/60000 (45%)]\tloss=0.2330\n", + "Train Epoch: 1 [27520/60000 (46%)]\tloss=0.2630\n", + "Train Epoch: 1 [28160/60000 (47%)]\tloss=0.2126\n", + "Train Epoch: 1 [28800/60000 (48%)]\tloss=0.1327\n", + "Train Epoch: 1 [29440/60000 (49%)]\tloss=0.2789\n", + "Train Epoch: 1 [30080/60000 (50%)]\tloss=0.0947\n", + "Train Epoch: 1 [30720/60000 (51%)]\tloss=0.1280\n", + "Train Epoch: 1 [31360/60000 (52%)]\tloss=0.2458\n", + "Train Epoch: 1 [32000/60000 (53%)]\tloss=0.3394\n", + "Train Epoch: 1 [32640/60000 (54%)]\tloss=0.1527\n", + "Train Epoch: 1 [33280/60000 (55%)]\tloss=0.0901\n", + "Train Epoch: 1 [33920/60000 (57%)]\tloss=0.1451\n", + "Train Epoch: 1 [34560/60000 (58%)]\tloss=0.1994\n", + "Train Epoch: 1 [35200/60000 (59%)]\tloss=0.2171\n", + "Train Epoch: 1 [35840/60000 (60%)]\tloss=0.0633\n", + "Train Epoch: 1 [36480/60000 (61%)]\tloss=0.1369\n", + "Train Epoch: 1 [37120/60000 (62%)]\tloss=0.1160\n", + "Train Epoch: 1 [37760/60000 (63%)]\tloss=0.2355\n", + "Train Epoch: 1 [38400/60000 (64%)]\tloss=0.0634\n", + "Train Epoch: 1 [39040/60000 (65%)]\tloss=0.1062\n", + "Train Epoch: 1 [39680/60000 (66%)]\tloss=0.1608\n", + "Train Epoch: 1 [40320/60000 (67%)]\tloss=0.1101\n", + "Train Epoch: 1 [40960/60000 (68%)]\tloss=0.1775\n", + "Train Epoch: 1 [41600/60000 (69%)]\tloss=0.2285\n", + "Train Epoch: 1 [42240/60000 (70%)]\tloss=0.0737\n", + "Train Epoch: 1 [42880/60000 (71%)]\tloss=0.1562\n", + "Train Epoch: 1 [43520/60000 (72%)]\tloss=0.2775\n", + "Train Epoch: 1 [44160/60000 (74%)]\tloss=0.1418\n", + "Train Epoch: 1 [44800/60000 (75%)]\tloss=0.1163\n", + "Train Epoch: 1 [45440/60000 (76%)]\tloss=0.1221\n", + "Train Epoch: 1 [46080/60000 (77%)]\tloss=0.0768\n", + "Train Epoch: 1 [46720/60000 (78%)]\tloss=0.1950\n", + "Train Epoch: 1 [47360/60000 (79%)]\tloss=0.0706\n", + "Train Epoch: 1 [48000/60000 (80%)]\tloss=0.2091\n", + "Train Epoch: 1 [48640/60000 (81%)]\tloss=0.1380\n", + "Train Epoch: 1 [49280/60000 (82%)]\tloss=0.0950\n", + "Train Epoch: 1 [49920/60000 (83%)]\tloss=0.1070\n", + "Train Epoch: 1 [50560/60000 (84%)]\tloss=0.1194\n", + "Train Epoch: 1 [51200/60000 (85%)]\tloss=0.1447\n", + "Train Epoch: 1 [51840/60000 (86%)]\tloss=0.0662\n", + "Train Epoch: 1 [52480/60000 (87%)]\tloss=0.0239\n", + "Train Epoch: 1 [53120/60000 (88%)]\tloss=0.2622\n", + "Train Epoch: 1 [53760/60000 (90%)]\tloss=0.0928\n", + "Train Epoch: 1 [54400/60000 (91%)]\tloss=0.1297\n", + "Train Epoch: 1 [55040/60000 (92%)]\tloss=0.1907\n", + "Train Epoch: 1 [55680/60000 (93%)]\tloss=0.0347\n", + "Train Epoch: 1 [56320/60000 (94%)]\tloss=0.0354\n", + "Train Epoch: 1 [56960/60000 (95%)]\tloss=0.0770\n", + "Train Epoch: 1 [57600/60000 (96%)]\tloss=0.1175\n", + "Train Epoch: 1 [58240/60000 (97%)]\tloss=0.1919\n", + "Train Epoch: 1 [58880/60000 (98%)]\tloss=0.2053\n", + "Train Epoch: 1 [59520/60000 (99%)]\tloss=0.0639\n", + "\n", + "accuracy=0.9664\n", + "\n", + "\n" + ] + } + ], + "source": [ + "pytorchjob_client.get_logs('pytorch-dist-mnist-gloo', namespace=namespace)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete the PyTorchJob" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'kind': 'Status',\n", + " 'apiVersion': 'v1',\n", + " 'metadata': {},\n", + " 'status': 'Success',\n", + " 'details': {'name': 'pytorch-dist-mnist-gloo',\n", + " 'group': 'kubeflow.org',\n", + " 'kind': 'pytorchjobs',\n", + " 'uid': '47f9dc9a-36af-11ea-beb5-00163e01f7d2'}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pytorchjob_client.delete('pytorch-dist-mnist-gloo')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/sdk/python/examples/kubeflow-tfjob-sdk.ipynb b/sdk/python/examples/kubeflow-tfjob-sdk.ipynb index 6b5a7ce923..6d4e8a857b 100644 --- a/sdk/python/examples/kubeflow-tfjob-sdk.ipynb +++ b/sdk/python/examples/kubeflow-tfjob-sdk.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -27,12 +27,13 @@ "from kubernetes.client import V1PodSpec\n", "from kubernetes.client import V1Container\n", "\n", - "from kubeflow.tfjob import constants\n", - "from kubeflow.tfjob import utils\n", - "from kubeflow.tfjob import V1ReplicaSpec\n", - "from kubeflow.tfjob import V1TFJob\n", - "from kubeflow.tfjob import V1TFJobSpec\n", - "from kubeflow.tfjob import TFJobClient" + "from kubeflow.training import constants\n", + "from kubeflow.training.utils import utils\n", + "from kubeflow.training import V1ReplicaSpec\n", + "from kubeflow.training import V1TFJob\n", + "from kubeflow.training import V1TFJobSpec\n", + "from kubeflow.training import V1RunPolicy\n", + "from kubeflow.training.api.tf_job_client import TFJobClient" ] }, { @@ -44,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -117,7 +118,7 @@ " kind=\"TFJob\",\n", " metadata=V1ObjectMeta(name=\"mnist\",namespace=namespace),\n", " spec=V1TFJobSpec(\n", - " clean_pod_policy=\"None\",\n", + " run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n", " tf_replica_specs={\"Worker\": worker,\n", " \"Chief\": chief,\n", " \"PS\": ps}\n", @@ -134,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -142,14 +143,33 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1',\n", " 'kind': 'TFJob',\n", - " 'metadata': {'creationTimestamp': '2020-01-10T06:05:17Z',\n", + " 'metadata': {'creationTimestamp': '2021-09-22T21:27:46Z',\n", " 'generation': 1,\n", + " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", + " 'fieldsType': 'FieldsV1',\n", + " 'fieldsV1': {'f:spec': {'.': {},\n", + " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}},\n", + " 'f:tfReplicaSpecs': {'.': {},\n", + " 'f:Chief': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", + " 'f:PS': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", + " 'f:Worker': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n", + " 'manager': 'OpenAPI-Generator',\n", + " 'operation': 'Update',\n", + " 'time': '2021-09-22T21:27:46Z'}],\n", " 'name': 'mnist',\n", " 'namespace': 'default',\n", - " 'resourceVersion': '24815779',\n", - " 'selfLink': '/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist',\n", - " 'uid': '2d0ad671-336f-11ea-b6a8-00000a1001ee'},\n", - " 'spec': {'cleanPodPolicy': 'None',\n", + " 'resourceVersion': '594847',\n", + " 'uid': '13e06ee2-3bb0-42f5-832b-f0b4fb2f5874'},\n", + " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n", " 'tfReplicaSpecs': {'Chief': {'replicas': 1,\n", " 'restartPolicy': 'Never',\n", " 'template': {'spec': {'containers': [{'command': ['python',\n", @@ -179,7 +199,7 @@ " 'name': 'tensorflow'}]}}}}}}" ] }, - "execution_count": 4, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -198,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -206,14 +226,33 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1',\n", " 'kind': 'TFJob',\n", - " 'metadata': {'creationTimestamp': '2020-01-10T06:05:17Z',\n", + " 'metadata': {'creationTimestamp': '2021-09-22T21:27:46Z',\n", " 'generation': 1,\n", + " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", + " 'fieldsType': 'FieldsV1',\n", + " 'fieldsV1': {'f:spec': {'.': {},\n", + " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}},\n", + " 'f:tfReplicaSpecs': {'.': {},\n", + " 'f:Chief': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", + " 'f:PS': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", + " 'f:Worker': {'.': {},\n", + " 'f:replicas': {},\n", + " 'f:restartPolicy': {},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n", + " 'manager': 'OpenAPI-Generator',\n", + " 'operation': 'Update',\n", + " 'time': '2021-09-22T21:27:46Z'}],\n", " 'name': 'mnist',\n", " 'namespace': 'default',\n", - " 'resourceVersion': '24815814',\n", - " 'selfLink': '/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist',\n", - " 'uid': '2d0ad671-336f-11ea-b6a8-00000a1001ee'},\n", - " 'spec': {'cleanPodPolicy': 'None',\n", + " 'resourceVersion': '594847',\n", + " 'uid': '13e06ee2-3bb0-42f5-832b-f0b4fb2f5874'},\n", + " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n", " 'tfReplicaSpecs': {'Chief': {'replicas': 1,\n", " 'restartPolicy': 'Never',\n", " 'template': {'spec': {'containers': [{'command': ['python',\n", @@ -240,18 +279,10 @@ " '--learning_rate=0.01',\n", " '--batch_size=150'],\n", " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", - " 'name': 'tensorflow'}]}}}}},\n", - " 'status': {'conditions': [{'lastTransitionTime': '2020-01-10T06:05:17Z',\n", - " 'lastUpdateTime': '2020-01-10T06:05:17Z',\n", - " 'message': 'TFJob mnist is created.',\n", - " 'reason': 'TFJobCreated',\n", - " 'status': 'True',\n", - " 'type': 'Created'}],\n", - " 'replicaStatuses': {'Chief': {}, 'PS': {}, 'Worker': {}},\n", - " 'startTime': '2020-01-10T06:05:18Z'}}" + " 'name': 'tensorflow'}]}}}}}}" ] }, - "execution_count": 5, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -269,16 +300,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'Created'" + "''" ] }, - "execution_count": 6, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -296,22 +327,20 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "NAME STATE TIME \n", - "mnist Created 2020-01-10T06:05:17Z \n", - "mnist Running 2020-01-10T06:05:29Z \n", - "mnist Running 2020-01-10T06:05:29Z \n", - "mnist Running 2020-01-10T06:05:29Z \n", - "mnist Running 2020-01-10T06:05:29Z \n", - "mnist Running 2020-01-10T06:05:29Z \n", - "mnist Running 2020-01-10T06:05:29Z \n", - "mnist Succeeded 2020-01-10T06:07:49Z \n" + "mnist \n", + "mnist Created 2021-09-22T21:27:46Z \n", + "mnist Created 2021-09-22T21:27:46Z \n", + "mnist Created 2021-09-22T21:27:46Z \n", + "mnist Running 2021-09-22T21:27:51Z \n", + "mnist Running 2021-09-22T21:27:51Z \n", + "mnist Succeeded 2021-09-22T21:29:38Z \n" ] } ], @@ -328,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 23, "metadata": { "scrolled": true }, @@ -339,7 +368,7 @@ "True" ] }, - "execution_count": 8, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -357,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -519,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -532,10 +561,10 @@ " 'details': {'name': 'mnist',\n", " 'group': 'kubeflow.org',\n", " 'kind': 'tfjobs',\n", - " 'uid': '2d0ad671-336f-11ea-b6a8-00000a1001ee'}}" + " 'uid': '13e06ee2-3bb0-42f5-832b-f0b4fb2f5874'}}" ] }, - "execution_count": 10, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -543,13 +572,6 @@ "source": [ "tfjob_client.delete('mnist', namespace=namespace)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -568,9 +590,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.7.3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From 556db102074aa6f8cfc03c52b4c4d24767dd0550 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Wed, 29 Sep 2021 15:21:14 +0200 Subject: [PATCH 3/8] Update SDK generation tooling and docs --- README.md | 9 ++++++++ docs/development/developer_guide.md | 15 +++++++++++++ hack/python-sdk/gen-sdk.sh | 34 +++++++++++++++++++---------- hack/python-sdk/main.go | 3 +++ hack/python-sdk/post_gen.py | 11 +++++----- hack/python-sdk/swagger.json | 18 +++++++-------- 6 files changed, 64 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index f74baa0eb5..448f4fd770 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,15 @@ For users who prefer to use original tensorflow controllers, please checkout v1. kubectl apply -k "github.com/kubeflow/tf-operator.git/manifests/overlays/standalone?ref=v1.2.0" ``` +### Python SDK for Kubeflow Training Operator + +Training Operator provides Python SDK for the custom resources. More docs are available in [sdk/python](sdk/python) folder. + +Use `pip install` command to install the latest release of the SDK: +``` +pip install kubeflow-training +``` + ## Quick Start Please refer to the [quick-start-v1.md](docs/quick-start-v1.md) and [Kubeflow Training User Guide](https://www.kubeflow.org/docs/guides/components/tftraining/) for more information. diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md index 34eda7cc50..db79ee89d4 100644 --- a/docs/development/developer_guide.md +++ b/docs/development/developer_guide.md @@ -80,6 +80,21 @@ kubectl create -f ./tf_job_mnist.yaml On ubuntu the default go package appears to be gccgo-go which has problems see [issue](https://github.com/golang/go/issues/15429) golang-go package is also really old so install from golang tarballs instead. +## Generate Python SDK + +To generate Python SDK for the operator, run: +``` +.hack/python-sdk/gen-sdk.sh +``` +This command will re-generate the api and model files together with the documentation and model tests. +The following files/folders in `sdk/python` are auto-generated and should not be modified directly: +``` +docs +kubeflow/training/models +kubeflow/training/*.py +test/*.py +``` + ## Code Style ### Python diff --git a/hack/python-sdk/gen-sdk.sh b/hack/python-sdk/gen-sdk.sh index 86cbc789bc..5288104478 100755 --- a/hack/python-sdk/gen-sdk.sh +++ b/hack/python-sdk/gen-sdk.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright 2019 The Kubeflow Authors. +# Copyright 2021 The Kubeflow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,10 +18,12 @@ set -o errexit set -o nounset set -o pipefail +repo_root="$(realpath "$(dirname "$0")/../..")" + SWAGGER_JAR_URL="https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/4.3.1/openapi-generator-cli-4.3.1.jar" -SWAGGER_CODEGEN_JAR="hack/python-sdk/openapi-generator-cli.jar" -SWAGGER_CODEGEN_CONF="hack/python-sdk/swagger_config.json" -SDK_OUTPUT_PATH="/tmp/sdk/python" +SWAGGER_CODEGEN_JAR="${repo_root}/hack/python-sdk/openapi-generator-cli.jar" +SWAGGER_CODEGEN_CONF="${repo_root}/hack/python-sdk/swagger_config.json" +SDK_OUTPUT_PATH="${repo_root}/sdk/python" FRAMEWORKS=(tensorflow pytorch mxnet xgboost) VERSION=1.3.0 @@ -32,14 +34,16 @@ fi echo "Generating OpenAPI specification ..." echo "./hack/update-codegen.sh already help us generate openapi specs ..." -echo "Downloading the swagger-codegen JAR package ..." -wget -O ${SWAGGER_CODEGEN_JAR} ${SWAGGER_JAR_URL} +if [[ ! -f "$SWAGGER_CODEGEN_JAR" ]]; then + echo "Downloading the swagger-codegen JAR package ..." + wget -O "${SWAGGER_CODEGEN_JAR}" ${SWAGGER_JAR_URL} +fi for FRAMEWORK in ${FRAMEWORKS[@]}; do SWAGGER_CODEGEN_FILE="pkg/apis/${FRAMEWORK}/v1/swagger.json" echo "Generating swagger file for ${FRAMEWORK} ..." - go run hack/python-sdk/main.go ${FRAMEWORK} ${VERSION} > ${SWAGGER_CODEGEN_FILE} + go run "${repo_root}"/hack/python-sdk/main.go "${FRAMEWORK}" ${VERSION} > "${SWAGGER_CODEGEN_FILE}" done echo "Merging swagger files from different frameworks into one" @@ -50,11 +54,17 @@ chmod +x /tmp/swagger # it will report warning like 'v1.SchedulingPolicy' already exists in primary or higher priority mixin, skipping # error code is not 0 but t's acceptable. -/tmp/swagger mixin pkg/apis/tensorflow/v1/swagger.json pkg/apis/pytorch/v1/swagger.json pkg/apis/mxnet/v1/swagger.json pkg/apis/xgboost/v1/swagger.json \ ---output hack/python-sdk/swagger.json --quiet || true +/tmp/swagger mixin "${repo_root}"/pkg/apis/tensorflow/v1/swagger.json "${repo_root}"/pkg/apis/pytorch/v1/swagger.json \ + "${repo_root}"/pkg/apis/mxnet/v1/swagger.json "${repo_root}"/pkg/apis/xgboost/v1/swagger.json \ + --output "${repo_root}"/hack/python-sdk/swagger.json --quiet || true -echo "Generating Python SDK for ${FRAMEWORK} ..." -java -jar ${SWAGGER_CODEGEN_JAR} generate -i hack/python-sdk/swagger.json -g python -o ${SDK_OUTPUT_PATH} -c ${SWAGGER_CODEGEN_CONF} +echo "Removing previously generated files ..." +rm -rf "${SDK_OUTPUT_PATH}"/docs "${SDK_OUTPUT_PATH}"/kubeflow/training/models "${SDK_OUTPUT_PATH}"/kubeflow/training/*.py "${SDK_OUTPUT_PATH}"/test/*.py +echo "Generating Python SDK for Training Operator ..." +java -jar "${SWAGGER_CODEGEN_JAR}" generate -i "${repo_root}"/hack/python-sdk/swagger.json -g python -o "${SDK_OUTPUT_PATH}" -c "${SWAGGER_CODEGEN_CONF}" echo "Kubeflow Training Operator Python SDK is generated successfully to folder ${SDK_OUTPUT_PATH}/." -rm /tmp/swagger \ No newline at end of file +rm /tmp/swagger + +echo "Running post-generation script ..." +"${repo_root}"/hack/python-sdk/post_gen.py diff --git a/hack/python-sdk/main.go b/hack/python-sdk/main.go index 7b0b2411db..4389f727c0 100644 --- a/hack/python-sdk/main.go +++ b/hack/python-sdk/main.go @@ -90,6 +90,9 @@ func main() { func swaggify(name, framework string) string { name = strings.Replace(name, fmt.Sprintf("github.com/kubeflow/tf-operator/pkg/apis/%s/", framework), "", -1) name = strings.Replace(name, "github.com/kubeflow/common/pkg/apis/common/", "", -1) + name = strings.Replace(name, "k8s.io/api/core/", "", -1) + name = strings.Replace(name, "k8s.io/apimachinery/pkg/apis/meta/", "", -1) + name = strings.Replace(name, "k8s.io/apimachinery/pkg/api/resource", "", -1) name = strings.Replace(name, "/", ".", -1) return name } diff --git a/hack/python-sdk/post_gen.py b/hack/python-sdk/post_gen.py index ae737fd8fd..a0e516807a 100755 --- a/hack/python-sdk/post_gen.py +++ b/hack/python-sdk/post_gen.py @@ -25,13 +25,14 @@ def fix_test_files() -> None: Fix invalid model imports in generated model tests """ os.path.realpath(__file__) - test_folder_dir = os.path.join(sdk_dir, "test/models") + test_folder_dir = os.path.join(sdk_dir, "test") test_files = os.listdir(test_folder_dir) for test_file in test_files: - print(test_file) - with fileinput.FileInput(os.path.join(test_folder_dir, test_file), inplace=True) as file: - for line in file: - print(_apply_regex(line), end='') + print(f"Precessing file {test_file}") + if test_file.endswith(".py"): + with fileinput.FileInput(os.path.join(test_folder_dir, test_file), inplace=True) as file: + for line in file: + print(_apply_regex(line), end='') def _apply_regex(input_str: str) -> str: diff --git a/hack/python-sdk/swagger.json b/hack/python-sdk/swagger.json index f55dbf33c9..3f201e1764 100644 --- a/hack/python-sdk/swagger.json +++ b/hack/python-sdk/swagger.json @@ -62,7 +62,7 @@ }, "lastReconcileTime": { "description": "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "replicaStatuses": { "description": "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", @@ -73,7 +73,7 @@ }, "startTime": { "description": "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" } } }, @@ -90,7 +90,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "$ref": "#/definitions/v1.MXJobSpec" @@ -122,7 +122,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, @@ -169,7 +169,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "description": "Specification of the desired state of the PyTorchJob.", @@ -205,7 +205,7 @@ }, "metadata": { "description": "Standard list metadata.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, @@ -245,7 +245,7 @@ }, "template": { "description": "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - "$ref": "#/definitions/k8s.io.api.core.v1.PodTemplateSpec" + "$ref": "#/definitions/v1.PodTemplateSpec" } } }, @@ -310,7 +310,7 @@ "minResources": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.api.resource.Quantity" + "$ref": "#/definitions/.Quantity" } }, "priorityClass": { @@ -334,7 +334,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "description": "Specification of the desired state of the TFJob.", From 83c1453b453e3c09956ea5fd44701d250267ef13 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Wed, 29 Sep 2021 15:23:41 +0200 Subject: [PATCH 4/8] Re-generate SDK --- hack/python-sdk/swagger.json | 2 +- pkg/apis/mxnet/v1/swagger.json | 18 +- pkg/apis/pytorch/v1/swagger.json | 18 +- pkg/apis/tensorflow/v1/swagger.json | 18 +- pkg/apis/xgboost/v1/swagger.json | 18 +- sdk/python/.openapi-generator-ignore | 33 ++ sdk/python/README.md | 1 - sdk/python/docs/TFJobClient.md | 378 ------------------ sdk/python/docs/V1JobCondition.md | 4 +- sdk/python/docs/V1JobStatus.md | 6 +- sdk/python/docs/V1MXJob.md | 2 +- sdk/python/docs/V1MXJobList.md | 2 +- sdk/python/docs/V1PyTorchJob.md | 2 +- sdk/python/docs/V1PyTorchJobList.md | 2 +- sdk/python/docs/V1ReplicaSpec.md | 2 +- sdk/python/docs/V1SchedulingPolicy.md | 2 +- sdk/python/docs/V1TFJob.md | 2 +- sdk/python/docs/V1TFJobList.md | 2 +- sdk/python/docs/V1Time.md | 9 - sdk/python/docs/V1XGBoostJob.md | 2 +- sdk/python/docs/V1XGBoostJobList.md | 2 +- sdk/python/kubeflow/__init__.py | 1 - .../training/models/v1_job_condition.py | 12 +- .../kubeflow/training/models/v1_job_status.py | 18 +- .../kubeflow/training/models/v1_mx_job.py | 6 +- .../training/models/v1_mx_job_list.py | 6 +- .../training/models/v1_py_torch_job.py | 6 +- .../training/models/v1_py_torch_job_list.py | 6 +- .../training/models/v1_replica_spec.py | 6 +- .../training/models/v1_scheduling_policy.py | 6 +- .../kubeflow/training/models/v1_tf_job.py | 6 +- .../training/models/v1_tf_job_list.py | 6 +- .../training/models/v1_xg_boost_job.py | 6 +- .../training/models/v1_xg_boost_job_list.py | 6 +- sdk/python/test/e2e/test_e2e_pytorchjob.py | 3 +- sdk/python/test/e2e/test_e2e_tfjob.py | 2 +- sdk/python/test/models/__init__.py | 0 .../{models => }/test_v1_job_condition.py | 0 .../test/{models => }/test_v1_job_status.py | 0 .../test/{models => }/test_v1_mx_job.py | 0 .../test/{models => }/test_v1_mx_job_list.py | 0 .../test/{models => }/test_v1_mx_job_spec.py | 0 .../test/{models => }/test_v1_py_torch_job.py | 0 .../{models => }/test_v1_py_torch_job_list.py | 0 .../{models => }/test_v1_py_torch_job_spec.py | 0 .../test/{models => }/test_v1_replica_spec.py | 0 .../{models => }/test_v1_replica_status.py | 0 .../test/{models => }/test_v1_run_policy.py | 0 .../{models => }/test_v1_scheduling_policy.py | 0 .../test/{models => }/test_v1_tf_job.py | 0 .../test/{models => }/test_v1_tf_job_list.py | 0 .../test/{models => }/test_v1_tf_job_spec.py | 0 .../test/{models => }/test_v1_xg_boost_job.py | 0 .../{models => }/test_v1_xg_boost_job_list.py | 0 .../{models => }/test_v1_xg_boost_job_spec.py | 0 55 files changed, 132 insertions(+), 489 deletions(-) create mode 100644 sdk/python/.openapi-generator-ignore delete mode 100644 sdk/python/docs/TFJobClient.md delete mode 100644 sdk/python/docs/V1Time.md delete mode 100644 sdk/python/test/models/__init__.py rename sdk/python/test/{models => }/test_v1_job_condition.py (100%) rename sdk/python/test/{models => }/test_v1_job_status.py (100%) rename sdk/python/test/{models => }/test_v1_mx_job.py (100%) rename sdk/python/test/{models => }/test_v1_mx_job_list.py (100%) rename sdk/python/test/{models => }/test_v1_mx_job_spec.py (100%) rename sdk/python/test/{models => }/test_v1_py_torch_job.py (100%) rename sdk/python/test/{models => }/test_v1_py_torch_job_list.py (100%) rename sdk/python/test/{models => }/test_v1_py_torch_job_spec.py (100%) rename sdk/python/test/{models => }/test_v1_replica_spec.py (100%) rename sdk/python/test/{models => }/test_v1_replica_status.py (100%) rename sdk/python/test/{models => }/test_v1_run_policy.py (100%) rename sdk/python/test/{models => }/test_v1_scheduling_policy.py (100%) rename sdk/python/test/{models => }/test_v1_tf_job.py (100%) rename sdk/python/test/{models => }/test_v1_tf_job_list.py (100%) rename sdk/python/test/{models => }/test_v1_tf_job_spec.py (100%) rename sdk/python/test/{models => }/test_v1_xg_boost_job.py (100%) rename sdk/python/test/{models => }/test_v1_xg_boost_job_list.py (100%) rename sdk/python/test/{models => }/test_v1_xg_boost_job_spec.py (100%) diff --git a/hack/python-sdk/swagger.json b/hack/python-sdk/swagger.json index 3f201e1764..a6d6cdb056 100644 --- a/hack/python-sdk/swagger.json +++ b/hack/python-sdk/swagger.json @@ -473,4 +473,4 @@ } } } -} \ No newline at end of file +} diff --git a/pkg/apis/mxnet/v1/swagger.json b/pkg/apis/mxnet/v1/swagger.json index 6aab64c795..2e4a7065cd 100644 --- a/pkg/apis/mxnet/v1/swagger.json +++ b/pkg/apis/mxnet/v1/swagger.json @@ -17,11 +17,11 @@ "properties": { "lastTransitionTime": { "description": "Last time the condition transitioned from one status to another.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "lastUpdateTime": { "description": "The last time this condition was updated.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "message": { "description": "A human readable message indicating details about the transition.", @@ -51,7 +51,7 @@ "properties": { "completionTime": { "description": "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "conditions": { "description": "Conditions is an array of current observed job conditions.", @@ -62,7 +62,7 @@ }, "lastReconcileTime": { "description": "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "replicaStatuses": { "description": "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", @@ -73,7 +73,7 @@ }, "startTime": { "description": "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" } } }, @@ -90,7 +90,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "$ref": "#/definitions/v1.MXJobSpec" @@ -122,7 +122,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, @@ -171,7 +171,7 @@ }, "template": { "description": "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - "$ref": "#/definitions/k8s.io.api.core.v1.PodTemplateSpec" + "$ref": "#/definitions/v1.PodTemplateSpec" } } }, @@ -236,7 +236,7 @@ "minResources": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.api.resource.Quantity" + "$ref": "#/definitions/.Quantity" } }, "priorityClass": { diff --git a/pkg/apis/pytorch/v1/swagger.json b/pkg/apis/pytorch/v1/swagger.json index 72dcd27593..25470edb33 100644 --- a/pkg/apis/pytorch/v1/swagger.json +++ b/pkg/apis/pytorch/v1/swagger.json @@ -17,11 +17,11 @@ "properties": { "lastTransitionTime": { "description": "Last time the condition transitioned from one status to another.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "lastUpdateTime": { "description": "The last time this condition was updated.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "message": { "description": "A human readable message indicating details about the transition.", @@ -51,7 +51,7 @@ "properties": { "completionTime": { "description": "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "conditions": { "description": "Conditions is an array of current observed job conditions.", @@ -62,7 +62,7 @@ }, "lastReconcileTime": { "description": "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "replicaStatuses": { "description": "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", @@ -73,7 +73,7 @@ }, "startTime": { "description": "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" } } }, @@ -90,7 +90,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "description": "Specification of the desired state of the PyTorchJob.", @@ -126,7 +126,7 @@ }, "metadata": { "description": "Standard list metadata.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, @@ -166,7 +166,7 @@ }, "template": { "description": "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - "$ref": "#/definitions/k8s.io.api.core.v1.PodTemplateSpec" + "$ref": "#/definitions/v1.PodTemplateSpec" } } }, @@ -231,7 +231,7 @@ "minResources": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.api.resource.Quantity" + "$ref": "#/definitions/.Quantity" } }, "priorityClass": { diff --git a/pkg/apis/tensorflow/v1/swagger.json b/pkg/apis/tensorflow/v1/swagger.json index fe021c1976..5adcb1c8d8 100644 --- a/pkg/apis/tensorflow/v1/swagger.json +++ b/pkg/apis/tensorflow/v1/swagger.json @@ -17,11 +17,11 @@ "properties": { "lastTransitionTime": { "description": "Last time the condition transitioned from one status to another.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "lastUpdateTime": { "description": "The last time this condition was updated.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "message": { "description": "A human readable message indicating details about the transition.", @@ -51,7 +51,7 @@ "properties": { "completionTime": { "description": "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "conditions": { "description": "Conditions is an array of current observed job conditions.", @@ -62,7 +62,7 @@ }, "lastReconcileTime": { "description": "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "replicaStatuses": { "description": "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", @@ -73,7 +73,7 @@ }, "startTime": { "description": "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" } } }, @@ -92,7 +92,7 @@ }, "template": { "description": "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - "$ref": "#/definitions/k8s.io.api.core.v1.PodTemplateSpec" + "$ref": "#/definitions/v1.PodTemplateSpec" } } }, @@ -157,7 +157,7 @@ "minResources": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.api.resource.Quantity" + "$ref": "#/definitions/.Quantity" } }, "priorityClass": { @@ -181,7 +181,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "description": "Specification of the desired state of the TFJob.", @@ -217,7 +217,7 @@ }, "metadata": { "description": "Standard list metadata.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, diff --git a/pkg/apis/xgboost/v1/swagger.json b/pkg/apis/xgboost/v1/swagger.json index f1a170fdc8..5203230a88 100644 --- a/pkg/apis/xgboost/v1/swagger.json +++ b/pkg/apis/xgboost/v1/swagger.json @@ -17,11 +17,11 @@ "properties": { "lastTransitionTime": { "description": "Last time the condition transitioned from one status to another.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "lastUpdateTime": { "description": "The last time this condition was updated.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "message": { "description": "A human readable message indicating details about the transition.", @@ -51,7 +51,7 @@ "properties": { "completionTime": { "description": "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "conditions": { "description": "Conditions is an array of current observed job conditions.", @@ -62,7 +62,7 @@ }, "lastReconcileTime": { "description": "Represents last time when the job was reconciled. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "replicaStatuses": { "description": "ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica.", @@ -73,7 +73,7 @@ }, "startTime": { "description": "Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" } } }, @@ -92,7 +92,7 @@ }, "template": { "description": "Template is the object that describes the pod that will be created for this replica. RestartPolicy in PodTemplateSpec will be overide by RestartPolicy in ReplicaSpec", - "$ref": "#/definitions/k8s.io.api.core.v1.PodTemplateSpec" + "$ref": "#/definitions/v1.PodTemplateSpec" } } }, @@ -157,7 +157,7 @@ "minResources": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.api.resource.Quantity" + "$ref": "#/definitions/.Quantity" } }, "priorityClass": { @@ -181,7 +181,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "$ref": "#/definitions/v1.XGBoostJobSpec" @@ -213,7 +213,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, diff --git a/sdk/python/.openapi-generator-ignore b/sdk/python/.openapi-generator-ignore new file mode 100644 index 0000000000..c0ae241a85 --- /dev/null +++ b/sdk/python/.openapi-generator-ignore @@ -0,0 +1,33 @@ +# OpenAPI Generator Ignore +# Generated by openapi-generator https://github.com/openapitools/openapi-generator + +# Use this file to prevent files from being overwritten by the generator. +# The patterns follow closely to .gitignore or .dockerignore. + +# As an example, the C# client generator defines ApiClient.cs. +# You can make changes and tell OpenAPI Generator to ignore just this file by uncommenting the following line: +#ApiClient.cs + +# You can match any string of characters against a directory, file or extension with a single asterisk (*): +#foo/*/qux +# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux + +# You can recursively match patterns against a directory, file or extension with a double asterisk (**): +#foo/**/qux +# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux + +# You can also negate patterns with an exclamation (!). +# For example, you can ignore all files in a docs folder with the file extension .md: +#docs/*.md +# Then explicitly reverse the ignore rule for a single file: +#!docs/README.md + +.gitignore +OWNERS +README.md +requirements.txt +setup.py +.gitlab-ci.yml +setup.cfg +git_push.sh +.travis.yml diff --git a/sdk/python/README.md b/sdk/python/README.md index 31a128b911..7f865f7a61 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -57,7 +57,6 @@ Class | Method | Description [PyTorchJobClient](docs/PyTorchJobClient.md) | [is_job_succeeded](docs/PyTorchJobClient.md#is_job_succeeded) | Check if the PyTorchJob Succeeded | [PyTorchJobClient](docs/PyTorchJobClient.md) | [get_pod_names](docs/PyTorchJobClient.md#get_pod_names) | Get pod names of PyTorchJob | [PyTorchJobClient](docs/PyTorchJobClient.md)| [get_logs](docs/PyTorchJobClient.md#get_logs) | Get training logs of the PyTorchJob | -## Documentation For Models ## Documentation For Models diff --git a/sdk/python/docs/TFJobClient.md b/sdk/python/docs/TFJobClient.md deleted file mode 100644 index d1d3a3336a..0000000000 --- a/sdk/python/docs/TFJobClient.md +++ /dev/null @@ -1,378 +0,0 @@ -# TFJobClient - -> TFJobClient(config_file=None, context=None, client_configuration=None, persist_config=True) - -User can loads authentication and cluster information from kube-config file and stores them in kubernetes.client.configuration. Parameters are as following: - -parameter | Description ------------- | ------------- -config_file | Name of the kube-config file. Defaults to `~/.kube/config`. Note that for the case that the SDK is running in cluster and you want to operate tfjob in another remote cluster, user must set `config_file` to load kube-config file explicitly, e.g. `TFJobClient(config_file="~/.kube/config")`. | -context |Set the active context. If is set to None, current_context from config file will be used.| -client_configuration | The kubernetes.client.Configuration to set configs to.| -persist_config | If True, config file will be updated when changed (e.g GCP token refresh).| - - -The APIs for TFJobClient are as following: - -Class | Method | Description ------------- | ------------- | ------------- -TFJobClient| [create](#create) | Create TFJob| -TFJobClient | [get](#get) | Get the specified TFJob or all TFJob in the namespace | -TFJobClient | [patch](#patch) | Patch the specified TFJob| -TFJobClient | [delete](#delete) | Delete the specified TFJob | -TFJobClient | [wait_for_job](#wait_for_job) | Wait for the specified job to finish | -TFJobClient | [wait_for_condition](#wait_for_condition) | Waits until any of the specified conditions occur | -TFJobClient | [get_job_status](#get_job_status) | Get the TFJob status| -TFJobClient | [is_job_running](#is_job_running) | Check if the TFJob status is running | -TFJobClient | [is_job_succeeded](#is_job_succeeded) | Check if the TFJob status is Succeeded | -TFJobClient | [get_pod_names](#get_pod_names) | Get pod names of TFJob | -TFJobClient | [get_logs](#get_logs) | Get training logs of the TFJob | - - -## create -> create(tfjob, namespace=None) - -Create the provided tfjob in the specified namespace - -### Example - -```python -from kubernetes.client import V1PodTemplateSpec -from kubernetes.client import V1ObjectMeta -from kubernetes.client import V1PodSpec -from kubernetes.client import V1Container - -from kubeflow.tfjob import constants -from kubeflow.tfjob import utils -from kubeflow.tfjob import V1ReplicaSpec -from kubeflow.tfjob import V1TFJob -from kubeflow.tfjob import V1TFJobList -from kubeflow.tfjob import V1TFJobSpec -from kubeflow.tfjob import TFJobClient - - -container = V1Container( - name="tensorflow", - image="gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", - command=[ - "python", - "/var/tf_mnist/mnist_with_summaries.py", - "--log_dir=/train/logs", "--learning_rate=0.01", - "--batch_size=150" - ] -) - -worker = V1ReplicaSpec( - replicas=1, - restart_policy="Never", - template=V1PodTemplateSpec( - spec=V1PodSpec( - containers=[container] - ) - ) -) - -tfjob = V1TFJob( - api_version="kubeflow.org/v1", - kind="TFJob", - metadata=V1ObjectMeta(name="mnist",namespace=namespace), - spec=V1TFJobSpec( - clean_pod_policy="None", - tf_replica_specs={"Worker": worker} - ) -) - - -tfjob_client = TFJobClient() -tfjob_client.create(tfjob) - -``` - - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -tfjob | [V1TFJob](V1TFJob.md) | tfjob defination| Required | -namespace | str | Namespace for tfjob deploying to. If the `namespace` is not defined, will align with tfjob definition, or use current or default namespace if namespace is not specified in tfjob definition. | Optional | - -### Return type -object - -## get -> get(name=None, namespace=None, watch=False, timeout_seconds=600) - -Get the created tfjob in the specified namespace - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.get('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name. If the `name` is not specified, it will get all tfjobs in the namespace.| Optional. | -namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | -watch | bool | Watch the created TFJob if `True`, otherwise will return the created TFJob object. Stop watching if TFJob reaches the optional specified `timeout_seconds` or once the TFJob status `Succeeded` or `Failed`. | Optional | -timeout_seconds | int | Timeout seconds for watching. Defaults to 600. | Optional | - -### Return type -object - - -## patch -> patch(name, tfjob, namespace=None) - -Patch the created tfjob in the specified namespace. - -Note that if you want to set the field from existing value to `None`, `patch` API may not work, you need to use [replace](#replace) API to remove the field value. - -### Example - -```python - -tfjob = V1TFJob( - api_version="kubeflow.org/v1", - ... #update something in TFJob spec -) - -tfjob_client = TFJobClient() -tfjob_client.patch('mnist', isvc) - -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -tfjob | [V1TFJob](V1TFJob.md) | tfjob defination| Required | -namespace | str | The tfjob's namespace for patching. If the `namespace` is not defined, will align with tfjob definition, or use current or default namespace if namespace is not specified in tfjob definition. | Optional| - -### Return type -object - - -## delete -> delete(name, namespace=None) - -Delete the created tfjob in the specified namespace - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.delete('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace. | Optional| - -### Return type -object - - -## wait_for_job -> wait_for_job(name, -> namespace=None, -> timeout_seconds=600, -> polling_interval=30, -> watch=False, -> status_callback=None): - -Wait for the specified job to finish. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.wait_for_job('mnist', namespace='kubeflow') - -# The API also supports watching the TFJob status till it's Succeeded or Failed. -tfjob_client.wait_for_job('mnist', namespace=namespace, watch=True) -NAME STATE TIME -mnist Created 2019-12-31T09:20:07Z -mnist Running 2019-12-31T09:20:19Z -mnist Running 2019-12-31T09:20:19Z -mnist Succeeded 2019-12-31T09:22:04Z -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace. | Optional| -timeout_seconds | int | How long to wait for the job, default wait for 600 seconds. | Optional| -polling_interval | int | How often to poll for the status of the job.| Optional| -status_callback | str | Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the tfjob.| Optional| -watch | bool | Watch the TFJob if `True`. Stop watching if TFJob reaches the optional specified `timeout_seconds` or once the TFJob status `Succeeded` or `Failed`. | Optional | - -### Return type -object - - -## wait_for_condition -> wait_for_condition(name, -> expected_condition, -> namespace=None, -> timeout_seconds=600, -> polling_interval=30, -> status_callback=None): - - -Waits until any of the specified conditions occur. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.wait_for_condition('mnist', expected_condition=["Succeeded", "Failed"], namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -expected_condition |List |A list of conditions. Function waits until any of the supplied conditions is reached.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace. | Optional| -timeout_seconds | int | How long to wait for the job, default wait for 600 seconds. | Optional| -polling_interval | int | How often to poll for the status of the job.| Optional| -status_callback | str | Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the tfjob.| Optional| - -### Return type -object - -## get_job_status -> get_job_status(name, namespace=None) - -Returns TFJob status, such as Running, Failed or Succeeded. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.get_job_status('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name. | | -namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | - -### Return type -Str - -## is_job_running -> is_job_running(name, namespace=None) - -Returns True if the TFJob running; false otherwise. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.is_job_running('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | - -### Return type -Bool - -## is_job_succeeded -> is_job_succeeded(name, namespace=None) - -Returns True if the TFJob succeeded; false otherwise. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.is_job_succeeded('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | - -### Return type -Bool - - -## get_pod_names -> get_pod_names(name, namespace=None, master=False, replica_type=None, replica_index=None) - -Get pod names of the TFJob. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.get_pod_names('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | -master | bool | Only get pod with label 'job-role: master' pod if True. | | -replica_type | str | User can specify one of 'worker, ps, chief' to only get one type pods. By default get all type pods.| | -replica_index | str | User can specfy replica index to get one pod of the TFJob. | | - -### Return type -Set - - -## get_logs -> get_logs(name, namespace=None, master=True, replica_type=None, replica_index=None, follow=False) - -Get training logs of the TFJob. By default only get the logs of Pod that has labels 'job-role: master', to get all pods logs, specfy the `master=False`. - -### Example - -```python -from kubeflow.tfjob import TFJobClient - -tfjob_client = TFJobClient() -tfjob_client.get_logs('mnist', namespace='kubeflow') -``` - -### Parameters -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- -name | str | The TFJob name.| | -namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | -master | bool | Only get pod with label 'job-role: master' pod if True. | | -replica_type | str | User can specify one of 'worker, ps, chief' to only get one type pods. By default get all type pods.| | -replica_index | str | User can specfy replica index to get one pod of the TFJob. | | -follow | bool | Follow the log stream of the pod. Defaults to false. | | - -### Return type -Str \ No newline at end of file diff --git a/sdk/python/docs/V1JobCondition.md b/sdk/python/docs/V1JobCondition.md index 37f0f6f094..47093ac9cb 100644 --- a/sdk/python/docs/V1JobCondition.md +++ b/sdk/python/docs/V1JobCondition.md @@ -4,8 +4,8 @@ JobCondition describes the state of the job at a certain point. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**last_transition_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] -**last_update_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] +**last_transition_time** | [**V1Time**](V1Time.md) | | [optional] +**last_update_time** | [**V1Time**](V1Time.md) | | [optional] **message** | **str** | A human readable message indicating details about the transition. | [optional] **reason** | **str** | The reason for the condition's last transition. | [optional] **status** | **str** | Status of the condition, one of True, False, Unknown. | diff --git a/sdk/python/docs/V1JobStatus.md b/sdk/python/docs/V1JobStatus.md index fad337e8b6..937145406a 100644 --- a/sdk/python/docs/V1JobStatus.md +++ b/sdk/python/docs/V1JobStatus.md @@ -4,11 +4,11 @@ JobStatus represents the current observed state of the training Job. ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**completion_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] +**completion_time** | [**V1Time**](V1Time.md) | | [optional] **conditions** | [**list[V1JobCondition]**](V1JobCondition.md) | Conditions is an array of current observed job conditions. | -**last_reconcile_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] +**last_reconcile_time** | [**V1Time**](V1Time.md) | | [optional] **replica_statuses** | [**dict(str, V1ReplicaStatus)**](V1ReplicaStatus.md) | ReplicaStatuses is map of ReplicaType and ReplicaStatus, specifies the status of each replica. | -**start_time** | [**K8sIoApimachineryPkgApisMetaV1Time**](K8sIoApimachineryPkgApisMetaV1Time.md) | | [optional] +**start_time** | [**V1Time**](V1Time.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1MXJob.md b/sdk/python/docs/V1MXJob.md index 4485f9da78..b2ae3fa303 100644 --- a/sdk/python/docs/V1MXJob.md +++ b/sdk/python/docs/V1MXJob.md @@ -6,7 +6,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] **spec** | [**V1MXJobSpec**](V1MXJobSpec.md) | | [optional] **status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] diff --git a/sdk/python/docs/V1MXJobList.md b/sdk/python/docs/V1MXJobList.md index b31f61d39d..4e0c0cc0d0 100644 --- a/sdk/python/docs/V1MXJobList.md +++ b/sdk/python/docs/V1MXJobList.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **items** | [**list[V1MXJob]**](V1MXJob.md) | | **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] +**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1PyTorchJob.md b/sdk/python/docs/V1PyTorchJob.md index dde8ebeb00..ca1f734194 100644 --- a/sdk/python/docs/V1PyTorchJob.md +++ b/sdk/python/docs/V1PyTorchJob.md @@ -6,7 +6,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] **spec** | [**V1PyTorchJobSpec**](V1PyTorchJobSpec.md) | | [optional] **status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] diff --git a/sdk/python/docs/V1PyTorchJobList.md b/sdk/python/docs/V1PyTorchJobList.md index 61f8e2ec08..9a1073c34e 100644 --- a/sdk/python/docs/V1PyTorchJobList.md +++ b/sdk/python/docs/V1PyTorchJobList.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **items** | [**list[V1PyTorchJob]**](V1PyTorchJob.md) | List of PyTorchJobs. | **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] +**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1ReplicaSpec.md b/sdk/python/docs/V1ReplicaSpec.md index b8ac81f92f..fd5c6c6831 100644 --- a/sdk/python/docs/V1ReplicaSpec.md +++ b/sdk/python/docs/V1ReplicaSpec.md @@ -6,7 +6,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **replicas** | **int** | Replicas is the desired number of replicas of the given template. If unspecified, defaults to 1. | [optional] **restart_policy** | **str** | Restart policy for all replicas within the job. One of Always, OnFailure, Never and ExitCode. Default to Never. | [optional] -**template** | [**K8sIoApiCoreV1PodTemplateSpec**](K8sIoApiCoreV1PodTemplateSpec.md) | | [optional] +**template** | [**V1PodTemplateSpec**](V1PodTemplateSpec.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1SchedulingPolicy.md b/sdk/python/docs/V1SchedulingPolicy.md index 3f64a74764..0832b1de97 100644 --- a/sdk/python/docs/V1SchedulingPolicy.md +++ b/sdk/python/docs/V1SchedulingPolicy.md @@ -5,7 +5,7 @@ SchedulingPolicy encapsulates various scheduling policies of the distributed tra Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **min_available** | **int** | | [optional] -**min_resources** | [**dict(str, K8sIoApimachineryPkgApiResourceQuantity)**](K8sIoApimachineryPkgApiResourceQuantity.md) | | [optional] +**min_resources** | [**dict(str, Quantity)**](Quantity.md) | | [optional] **priority_class** | **str** | | [optional] **queue** | **str** | | [optional] diff --git a/sdk/python/docs/V1TFJob.md b/sdk/python/docs/V1TFJob.md index eca12a4459..28c7b4657d 100644 --- a/sdk/python/docs/V1TFJob.md +++ b/sdk/python/docs/V1TFJob.md @@ -6,7 +6,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] **spec** | [**V1TFJobSpec**](V1TFJobSpec.md) | | [optional] **status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] diff --git a/sdk/python/docs/V1TFJobList.md b/sdk/python/docs/V1TFJobList.md index eafb589e7e..3e48e2ba82 100644 --- a/sdk/python/docs/V1TFJobList.md +++ b/sdk/python/docs/V1TFJobList.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **items** | [**list[V1TFJob]**](V1TFJob.md) | List of TFJobs. | **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] +**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/docs/V1Time.md b/sdk/python/docs/V1Time.md deleted file mode 100644 index 63a5fb595d..0000000000 --- a/sdk/python/docs/V1Time.md +++ /dev/null @@ -1,9 +0,0 @@ -# V1Time - -## Properties -Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- - -[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) - - diff --git a/sdk/python/docs/V1XGBoostJob.md b/sdk/python/docs/V1XGBoostJob.md index d6cab1bb67..072c8d803f 100644 --- a/sdk/python/docs/V1XGBoostJob.md +++ b/sdk/python/docs/V1XGBoostJob.md @@ -6,7 +6,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ObjectMeta**](K8sIoApimachineryPkgApisMetaV1ObjectMeta.md) | | [optional] +**metadata** | [**V1ObjectMeta**](V1ObjectMeta.md) | | [optional] **spec** | [**V1XGBoostJobSpec**](V1XGBoostJobSpec.md) | | [optional] **status** | [**V1JobStatus**](V1JobStatus.md) | | [optional] diff --git a/sdk/python/docs/V1XGBoostJobList.md b/sdk/python/docs/V1XGBoostJobList.md index 0ba29ba98d..0672446d82 100644 --- a/sdk/python/docs/V1XGBoostJobList.md +++ b/sdk/python/docs/V1XGBoostJobList.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes **api_version** | **str** | APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | [optional] **items** | [**list[V1XGBoostJob]**](V1XGBoostJob.md) | | **kind** | **str** | Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | [optional] -**metadata** | [**K8sIoApimachineryPkgApisMetaV1ListMeta**](K8sIoApimachineryPkgApisMetaV1ListMeta.md) | | [optional] +**metadata** | [**V1ListMeta**](V1ListMeta.md) | | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/sdk/python/kubeflow/__init__.py b/sdk/python/kubeflow/__init__.py index 69e3be50da..e69de29bb2 100644 --- a/sdk/python/kubeflow/__init__.py +++ b/sdk/python/kubeflow/__init__.py @@ -1 +0,0 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/sdk/python/kubeflow/training/models/v1_job_condition.py b/sdk/python/kubeflow/training/models/v1_job_condition.py index f01aeaf221..985bfd45d5 100644 --- a/sdk/python/kubeflow/training/models/v1_job_condition.py +++ b/sdk/python/kubeflow/training/models/v1_job_condition.py @@ -33,8 +33,8 @@ class V1JobCondition(object): and the value is json key in definition. """ openapi_types = { - 'last_transition_time': 'K8sIoApimachineryPkgApisMetaV1Time', - 'last_update_time': 'K8sIoApimachineryPkgApisMetaV1Time', + 'last_transition_time': 'V1Time', + 'last_update_time': 'V1Time', 'message': 'str', 'reason': 'str', 'status': 'str', @@ -81,7 +81,7 @@ def last_transition_time(self): :return: The last_transition_time of this V1JobCondition. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1Time + :rtype: V1Time """ return self._last_transition_time @@ -91,7 +91,7 @@ def last_transition_time(self, last_transition_time): :param last_transition_time: The last_transition_time of this V1JobCondition. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1Time + :type: V1Time """ self._last_transition_time = last_transition_time @@ -102,7 +102,7 @@ def last_update_time(self): :return: The last_update_time of this V1JobCondition. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1Time + :rtype: V1Time """ return self._last_update_time @@ -112,7 +112,7 @@ def last_update_time(self, last_update_time): :param last_update_time: The last_update_time of this V1JobCondition. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1Time + :type: V1Time """ self._last_update_time = last_update_time diff --git a/sdk/python/kubeflow/training/models/v1_job_status.py b/sdk/python/kubeflow/training/models/v1_job_status.py index d890d110da..fcfa548c07 100644 --- a/sdk/python/kubeflow/training/models/v1_job_status.py +++ b/sdk/python/kubeflow/training/models/v1_job_status.py @@ -33,11 +33,11 @@ class V1JobStatus(object): and the value is json key in definition. """ openapi_types = { - 'completion_time': 'K8sIoApimachineryPkgApisMetaV1Time', + 'completion_time': 'V1Time', 'conditions': 'list[V1JobCondition]', - 'last_reconcile_time': 'K8sIoApimachineryPkgApisMetaV1Time', + 'last_reconcile_time': 'V1Time', 'replica_statuses': 'dict(str, V1ReplicaStatus)', - 'start_time': 'K8sIoApimachineryPkgApisMetaV1Time' + 'start_time': 'V1Time' } attribute_map = { @@ -76,7 +76,7 @@ def completion_time(self): :return: The completion_time of this V1JobStatus. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1Time + :rtype: V1Time """ return self._completion_time @@ -86,7 +86,7 @@ def completion_time(self, completion_time): :param completion_time: The completion_time of this V1JobStatus. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1Time + :type: V1Time """ self._completion_time = completion_time @@ -122,7 +122,7 @@ def last_reconcile_time(self): :return: The last_reconcile_time of this V1JobStatus. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1Time + :rtype: V1Time """ return self._last_reconcile_time @@ -132,7 +132,7 @@ def last_reconcile_time(self, last_reconcile_time): :param last_reconcile_time: The last_reconcile_time of this V1JobStatus. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1Time + :type: V1Time """ self._last_reconcile_time = last_reconcile_time @@ -168,7 +168,7 @@ def start_time(self): :return: The start_time of this V1JobStatus. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1Time + :rtype: V1Time """ return self._start_time @@ -178,7 +178,7 @@ def start_time(self, start_time): :param start_time: The start_time of this V1JobStatus. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1Time + :type: V1Time """ self._start_time = start_time diff --git a/sdk/python/kubeflow/training/models/v1_mx_job.py b/sdk/python/kubeflow/training/models/v1_mx_job.py index e6bddc495f..6410f609cb 100644 --- a/sdk/python/kubeflow/training/models/v1_mx_job.py +++ b/sdk/python/kubeflow/training/models/v1_mx_job.py @@ -35,7 +35,7 @@ class V1MXJob(object): openapi_types = { 'api_version': 'str', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'metadata': 'V1ObjectMeta', 'spec': 'V1MXJobSpec', 'status': 'V1JobStatus' } @@ -124,7 +124,7 @@ def metadata(self): :return: The metadata of this V1MXJob. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :rtype: V1ObjectMeta """ return self._metadata @@ -134,7 +134,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1MXJob. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :type: V1ObjectMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_mx_job_list.py b/sdk/python/kubeflow/training/models/v1_mx_job_list.py index 94ac5dc45a..bd32e52793 100644 --- a/sdk/python/kubeflow/training/models/v1_mx_job_list.py +++ b/sdk/python/kubeflow/training/models/v1_mx_job_list.py @@ -36,7 +36,7 @@ class V1MXJobList(object): 'api_version': 'str', 'items': 'list[V1MXJob]', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + 'metadata': 'V1ListMeta' } attribute_map = { @@ -141,7 +141,7 @@ def metadata(self): :return: The metadata of this V1MXJobList. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + :rtype: V1ListMeta """ return self._metadata @@ -151,7 +151,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1MXJobList. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ListMeta + :type: V1ListMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_py_torch_job.py b/sdk/python/kubeflow/training/models/v1_py_torch_job.py index 99625ce159..9480ab6c3b 100644 --- a/sdk/python/kubeflow/training/models/v1_py_torch_job.py +++ b/sdk/python/kubeflow/training/models/v1_py_torch_job.py @@ -35,7 +35,7 @@ class V1PyTorchJob(object): openapi_types = { 'api_version': 'str', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'metadata': 'V1ObjectMeta', 'spec': 'V1PyTorchJobSpec', 'status': 'V1JobStatus' } @@ -124,7 +124,7 @@ def metadata(self): :return: The metadata of this V1PyTorchJob. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :rtype: V1ObjectMeta """ return self._metadata @@ -134,7 +134,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1PyTorchJob. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :type: V1ObjectMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py b/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py index 581751da36..a1f35526ab 100644 --- a/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py +++ b/sdk/python/kubeflow/training/models/v1_py_torch_job_list.py @@ -36,7 +36,7 @@ class V1PyTorchJobList(object): 'api_version': 'str', 'items': 'list[V1PyTorchJob]', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + 'metadata': 'V1ListMeta' } attribute_map = { @@ -143,7 +143,7 @@ def metadata(self): :return: The metadata of this V1PyTorchJobList. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + :rtype: V1ListMeta """ return self._metadata @@ -153,7 +153,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1PyTorchJobList. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ListMeta + :type: V1ListMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_replica_spec.py b/sdk/python/kubeflow/training/models/v1_replica_spec.py index 3026ea6513..e6dc355259 100644 --- a/sdk/python/kubeflow/training/models/v1_replica_spec.py +++ b/sdk/python/kubeflow/training/models/v1_replica_spec.py @@ -35,7 +35,7 @@ class V1ReplicaSpec(object): openapi_types = { 'replicas': 'int', 'restart_policy': 'str', - 'template': 'K8sIoApiCoreV1PodTemplateSpec' + 'template': 'V1PodTemplateSpec' } attribute_map = { @@ -114,7 +114,7 @@ def template(self): :return: The template of this V1ReplicaSpec. # noqa: E501 - :rtype: K8sIoApiCoreV1PodTemplateSpec + :rtype: V1PodTemplateSpec """ return self._template @@ -124,7 +124,7 @@ def template(self, template): :param template: The template of this V1ReplicaSpec. # noqa: E501 - :type: K8sIoApiCoreV1PodTemplateSpec + :type: V1PodTemplateSpec """ self._template = template diff --git a/sdk/python/kubeflow/training/models/v1_scheduling_policy.py b/sdk/python/kubeflow/training/models/v1_scheduling_policy.py index 0f3ad232cf..c85da15d0e 100644 --- a/sdk/python/kubeflow/training/models/v1_scheduling_policy.py +++ b/sdk/python/kubeflow/training/models/v1_scheduling_policy.py @@ -34,7 +34,7 @@ class V1SchedulingPolicy(object): """ openapi_types = { 'min_available': 'int', - 'min_resources': 'dict(str, K8sIoApimachineryPkgApiResourceQuantity)', + 'min_resources': 'dict(str, Quantity)', 'priority_class': 'str', 'queue': 'str' } @@ -94,7 +94,7 @@ def min_resources(self): :return: The min_resources of this V1SchedulingPolicy. # noqa: E501 - :rtype: dict(str, K8sIoApimachineryPkgApiResourceQuantity) + :rtype: dict(str, Quantity) """ return self._min_resources @@ -104,7 +104,7 @@ def min_resources(self, min_resources): :param min_resources: The min_resources of this V1SchedulingPolicy. # noqa: E501 - :type: dict(str, K8sIoApimachineryPkgApiResourceQuantity) + :type: dict(str, Quantity) """ self._min_resources = min_resources diff --git a/sdk/python/kubeflow/training/models/v1_tf_job.py b/sdk/python/kubeflow/training/models/v1_tf_job.py index 5e03193959..1aec0ae151 100644 --- a/sdk/python/kubeflow/training/models/v1_tf_job.py +++ b/sdk/python/kubeflow/training/models/v1_tf_job.py @@ -35,7 +35,7 @@ class V1TFJob(object): openapi_types = { 'api_version': 'str', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'metadata': 'V1ObjectMeta', 'spec': 'V1TFJobSpec', 'status': 'V1JobStatus' } @@ -124,7 +124,7 @@ def metadata(self): :return: The metadata of this V1TFJob. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :rtype: V1ObjectMeta """ return self._metadata @@ -134,7 +134,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1TFJob. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :type: V1ObjectMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_tf_job_list.py b/sdk/python/kubeflow/training/models/v1_tf_job_list.py index 4e8a50e282..6511006ea3 100644 --- a/sdk/python/kubeflow/training/models/v1_tf_job_list.py +++ b/sdk/python/kubeflow/training/models/v1_tf_job_list.py @@ -36,7 +36,7 @@ class V1TFJobList(object): 'api_version': 'str', 'items': 'list[V1TFJob]', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + 'metadata': 'V1ListMeta' } attribute_map = { @@ -143,7 +143,7 @@ def metadata(self): :return: The metadata of this V1TFJobList. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + :rtype: V1ListMeta """ return self._metadata @@ -153,7 +153,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1TFJobList. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ListMeta + :type: V1ListMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_xg_boost_job.py b/sdk/python/kubeflow/training/models/v1_xg_boost_job.py index 294d06c605..2ef448d317 100644 --- a/sdk/python/kubeflow/training/models/v1_xg_boost_job.py +++ b/sdk/python/kubeflow/training/models/v1_xg_boost_job.py @@ -35,7 +35,7 @@ class V1XGBoostJob(object): openapi_types = { 'api_version': 'str', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ObjectMeta', + 'metadata': 'V1ObjectMeta', 'spec': 'V1XGBoostJobSpec', 'status': 'V1JobStatus' } @@ -124,7 +124,7 @@ def metadata(self): :return: The metadata of this V1XGBoostJob. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :rtype: V1ObjectMeta """ return self._metadata @@ -134,7 +134,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1XGBoostJob. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ObjectMeta + :type: V1ObjectMeta """ self._metadata = metadata diff --git a/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py b/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py index 5bed85c8a2..11e53b0de3 100644 --- a/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py +++ b/sdk/python/kubeflow/training/models/v1_xg_boost_job_list.py @@ -36,7 +36,7 @@ class V1XGBoostJobList(object): 'api_version': 'str', 'items': 'list[V1XGBoostJob]', 'kind': 'str', - 'metadata': 'K8sIoApimachineryPkgApisMetaV1ListMeta' + 'metadata': 'V1ListMeta' } attribute_map = { @@ -141,7 +141,7 @@ def metadata(self): :return: The metadata of this V1XGBoostJobList. # noqa: E501 - :rtype: K8sIoApimachineryPkgApisMetaV1ListMeta + :rtype: V1ListMeta """ return self._metadata @@ -151,7 +151,7 @@ def metadata(self, metadata): :param metadata: The metadata of this V1XGBoostJobList. # noqa: E501 - :type: K8sIoApimachineryPkgApisMetaV1ListMeta + :type: V1ListMeta """ self._metadata = metadata diff --git a/sdk/python/test/e2e/test_e2e_pytorchjob.py b/sdk/python/test/e2e/test_e2e_pytorchjob.py index f241b38a44..e5f6170c1c 100644 --- a/sdk/python/test/e2e/test_e2e_pytorchjob.py +++ b/sdk/python/test/e2e/test_e2e_pytorchjob.py @@ -1,4 +1,4 @@ -# Copyright 2019 kubeflow.org. +# Copyright 2021 kubeflow.org. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ import os -import pytest from kubernetes.client import V1PodTemplateSpec from kubernetes.client import V1ObjectMeta from kubernetes.client import V1PodSpec diff --git a/sdk/python/test/e2e/test_e2e_tfjob.py b/sdk/python/test/e2e/test_e2e_tfjob.py index 63ce69b29d..efade31bab 100644 --- a/sdk/python/test/e2e/test_e2e_tfjob.py +++ b/sdk/python/test/e2e/test_e2e_tfjob.py @@ -1,4 +1,4 @@ -# Copyright 2019 kubeflow.org. +# Copyright 2021 kubeflow.org. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/sdk/python/test/models/__init__.py b/sdk/python/test/models/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sdk/python/test/models/test_v1_job_condition.py b/sdk/python/test/test_v1_job_condition.py similarity index 100% rename from sdk/python/test/models/test_v1_job_condition.py rename to sdk/python/test/test_v1_job_condition.py diff --git a/sdk/python/test/models/test_v1_job_status.py b/sdk/python/test/test_v1_job_status.py similarity index 100% rename from sdk/python/test/models/test_v1_job_status.py rename to sdk/python/test/test_v1_job_status.py diff --git a/sdk/python/test/models/test_v1_mx_job.py b/sdk/python/test/test_v1_mx_job.py similarity index 100% rename from sdk/python/test/models/test_v1_mx_job.py rename to sdk/python/test/test_v1_mx_job.py diff --git a/sdk/python/test/models/test_v1_mx_job_list.py b/sdk/python/test/test_v1_mx_job_list.py similarity index 100% rename from sdk/python/test/models/test_v1_mx_job_list.py rename to sdk/python/test/test_v1_mx_job_list.py diff --git a/sdk/python/test/models/test_v1_mx_job_spec.py b/sdk/python/test/test_v1_mx_job_spec.py similarity index 100% rename from sdk/python/test/models/test_v1_mx_job_spec.py rename to sdk/python/test/test_v1_mx_job_spec.py diff --git a/sdk/python/test/models/test_v1_py_torch_job.py b/sdk/python/test/test_v1_py_torch_job.py similarity index 100% rename from sdk/python/test/models/test_v1_py_torch_job.py rename to sdk/python/test/test_v1_py_torch_job.py diff --git a/sdk/python/test/models/test_v1_py_torch_job_list.py b/sdk/python/test/test_v1_py_torch_job_list.py similarity index 100% rename from sdk/python/test/models/test_v1_py_torch_job_list.py rename to sdk/python/test/test_v1_py_torch_job_list.py diff --git a/sdk/python/test/models/test_v1_py_torch_job_spec.py b/sdk/python/test/test_v1_py_torch_job_spec.py similarity index 100% rename from sdk/python/test/models/test_v1_py_torch_job_spec.py rename to sdk/python/test/test_v1_py_torch_job_spec.py diff --git a/sdk/python/test/models/test_v1_replica_spec.py b/sdk/python/test/test_v1_replica_spec.py similarity index 100% rename from sdk/python/test/models/test_v1_replica_spec.py rename to sdk/python/test/test_v1_replica_spec.py diff --git a/sdk/python/test/models/test_v1_replica_status.py b/sdk/python/test/test_v1_replica_status.py similarity index 100% rename from sdk/python/test/models/test_v1_replica_status.py rename to sdk/python/test/test_v1_replica_status.py diff --git a/sdk/python/test/models/test_v1_run_policy.py b/sdk/python/test/test_v1_run_policy.py similarity index 100% rename from sdk/python/test/models/test_v1_run_policy.py rename to sdk/python/test/test_v1_run_policy.py diff --git a/sdk/python/test/models/test_v1_scheduling_policy.py b/sdk/python/test/test_v1_scheduling_policy.py similarity index 100% rename from sdk/python/test/models/test_v1_scheduling_policy.py rename to sdk/python/test/test_v1_scheduling_policy.py diff --git a/sdk/python/test/models/test_v1_tf_job.py b/sdk/python/test/test_v1_tf_job.py similarity index 100% rename from sdk/python/test/models/test_v1_tf_job.py rename to sdk/python/test/test_v1_tf_job.py diff --git a/sdk/python/test/models/test_v1_tf_job_list.py b/sdk/python/test/test_v1_tf_job_list.py similarity index 100% rename from sdk/python/test/models/test_v1_tf_job_list.py rename to sdk/python/test/test_v1_tf_job_list.py diff --git a/sdk/python/test/models/test_v1_tf_job_spec.py b/sdk/python/test/test_v1_tf_job_spec.py similarity index 100% rename from sdk/python/test/models/test_v1_tf_job_spec.py rename to sdk/python/test/test_v1_tf_job_spec.py diff --git a/sdk/python/test/models/test_v1_xg_boost_job.py b/sdk/python/test/test_v1_xg_boost_job.py similarity index 100% rename from sdk/python/test/models/test_v1_xg_boost_job.py rename to sdk/python/test/test_v1_xg_boost_job.py diff --git a/sdk/python/test/models/test_v1_xg_boost_job_list.py b/sdk/python/test/test_v1_xg_boost_job_list.py similarity index 100% rename from sdk/python/test/models/test_v1_xg_boost_job_list.py rename to sdk/python/test/test_v1_xg_boost_job_list.py diff --git a/sdk/python/test/models/test_v1_xg_boost_job_spec.py b/sdk/python/test/test_v1_xg_boost_job_spec.py similarity index 100% rename from sdk/python/test/models/test_v1_xg_boost_job_spec.py rename to sdk/python/test/test_v1_xg_boost_job_spec.py From ac8ca57ea5c1126e8ee69e494fd5bd81d8122a7f Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Wed, 29 Sep 2021 15:24:31 +0200 Subject: [PATCH 5/8] Allow to specify container name in 'get_logs' methods --- .../training/api/py_torch_job_client.py | 17 +++++++++-------- .../kubeflow/training/api/tf_job_client.py | 7 ++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sdk/python/kubeflow/training/api/py_torch_job_client.py b/sdk/python/kubeflow/training/api/py_torch_job_client.py index cf62fa8f1a..0a033bb8e4 100644 --- a/sdk/python/kubeflow/training/api/py_torch_job_client.py +++ b/sdk/python/kubeflow/training/api/py_torch_job_client.py @@ -183,12 +183,12 @@ def delete(self, name, namespace=None): try: return self.custom_api.delete_namespaced_custom_object( - constants.PYTORCHJOB_GROUP, - constants.PYTORCHJOB_VERSION, - namespace, - constants.PYTORCHJOB_PLURAL, - name, - client.V1DeleteOptions()) + group=constants.PYTORCHJOB_GROUP, + version=constants.PYTORCHJOB_VERSION, + namespace=namespace, + plural=constants.PYTORCHJOB_PLURAL, + name=name, + body=client.V1DeleteOptions()) except client.rest.ApiException as e: raise RuntimeError( "Exception when calling CustomObjectsApi->delete_namespaced_custom_object:\ @@ -347,10 +347,11 @@ def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=i def get_logs(self, name, namespace=None, master=True, replica_type=None, replica_index=None, - follow=False): + follow=False, container="pytorch"): """ Get training logs of the PyTorchJob. By default only get the logs of Pod that has labels 'job-role: master'. + :param container: container name :param name: PyTorchJob name :param namespace: defaults to current or default namespace. :param master: By default get pod with label 'job-role: master' pod if True. @@ -374,7 +375,7 @@ def get_logs(self, name, namespace=None, master=True, for pod in pod_names: try: pod_logs = self.core_api.read_namespaced_pod_log( - pod, namespace, follow=follow) + pod, namespace, follow=follow, container=container) logging.info("The logs of Pod %s:\n %s", pod, pod_logs) except client.rest.ApiException as e: raise RuntimeError( diff --git a/sdk/python/kubeflow/training/api/tf_job_client.py b/sdk/python/kubeflow/training/api/tf_job_client.py index a9d8cf4802..542891930c 100644 --- a/sdk/python/kubeflow/training/api/tf_job_client.py +++ b/sdk/python/kubeflow/training/api/tf_job_client.py @@ -371,10 +371,11 @@ def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=i def get_logs(self, name, namespace=None, master=True, replica_type=None, replica_index=None, - follow=False): + follow=False, container="tensorflow"): """ Get training logs of the TFJob. By default only get the logs of Pod that has labels 'job-role: master'. + :param container: container name :param name: tfjob name :param namespace: defaults to current or default namespace. :param master: By default get pod with label 'job-role: master' pod if True. @@ -398,7 +399,7 @@ def get_logs(self, name, namespace=None, master=True, log_streams = [] for pod in pod_names: log_streams.append(k8s_watch.Watch().stream(self.core_api.read_namespaced_pod_log, - name=pod, namespace=namespace)) + name=pod, namespace=namespace, container=container)) finished = [False for _ in log_streams] # create thread and queue per stream, for non-blocking iteration @@ -424,7 +425,7 @@ def get_logs(self, name, namespace=None, master=True, else: for pod in pod_names: try: - pod_logs = self.core_api.read_namespaced_pod_log(pod, namespace) + pod_logs = self.core_api.read_namespaced_pod_log(pod, namespace, container=container) logging.info("The logs of Pod %s:\n %s", pod, pod_logs) except client.rest.ApiException as e: raise RuntimeError( From 70866c59a6664ab35f6dccf8f7587c2484ba0051 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Thu, 30 Sep 2021 00:14:13 +0200 Subject: [PATCH 6/8] Generalize job labels --- .../training/api/py_torch_job_client.py | 2 +- .../kubeflow/training/api/tf_job_client.py | 6 +-- .../kubeflow/training/constants/constants.py | 20 +++----- sdk/python/kubeflow/training/utils/utils.py | 47 ++++--------------- 4 files changed, 20 insertions(+), 55 deletions(-) diff --git a/sdk/python/kubeflow/training/api/py_torch_job_client.py b/sdk/python/kubeflow/training/api/py_torch_job_client.py index 0a033bb8e4..231a903412 100644 --- a/sdk/python/kubeflow/training/api/py_torch_job_client.py +++ b/sdk/python/kubeflow/training/api/py_torch_job_client.py @@ -324,7 +324,7 @@ def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=i if namespace is None: namespace = utils.get_default_target_namespace() - labels = utils.get_pytorchjob_labels(name, master=master, + labels = utils.get_job_labels(name, master=master, replica_type=replica_type, replica_index=replica_index) diff --git a/sdk/python/kubeflow/training/api/tf_job_client.py b/sdk/python/kubeflow/training/api/tf_job_client.py index 542891930c..731d16cbe4 100644 --- a/sdk/python/kubeflow/training/api/tf_job_client.py +++ b/sdk/python/kubeflow/training/api/tf_job_client.py @@ -348,9 +348,9 @@ def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=i if namespace is None: namespace = utils.get_default_target_namespace() - labels = utils.get_tfjob_labels(name, master=master, - replica_type=replica_type, - replica_index=replica_index) + labels = utils.get_job_labels(name, master=master, + replica_type=replica_type, + replica_index=replica_index) try: resp = self.core_api.list_namespaced_pod( diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py index 1afdf6ffbf..9b1226abd2 100644 --- a/sdk/python/kubeflow/training/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -26,12 +26,12 @@ TFJOB_LOGLEVEL = os.environ.get('TFJOB_LOGLEVEL', 'INFO').upper() -# TFJob Label Names -TFJOB_GROUP_LABEL = 'group-name' -TFJOB_NAME_LABEL = 'job-name' -TFJOB_TYPE_LABEL = 'replica-type' -TFJOB_INDEX_LABEL = 'replica-index' -TFJOB_ROLE_LABEL = 'job-role' +# Job Label Names +JOB_GROUP_LABEL = 'group-name' +JOB_NAME_LABEL = 'job-name' +JOB_TYPE_LABEL = 'replica-type' +JOB_INDEX_LABEL = 'replica-index' +JOB_ROLE_LABEL = 'job-role' # PyTorchJob K8S constants PYTORCHJOB_GROUP = 'kubeflow.org' @@ -40,11 +40,3 @@ PYTORCHJOB_VERSION = os.environ.get('PYTORCHJOB_VERSION', 'v1') PYTORCH_LOGLEVEL = os.environ.get('PYTORCHJOB_LOGLEVEL', 'INFO').upper() - -# PyTorchJob Label Names -PYTORCHJOB_CONTROLLER_LABEL = 'controller-name' -PYTORCHJOB_GROUP_LABEL = 'group-name' -PYTORCHJOB_NAME_LABEL = 'pytorch-job-name' -PYTORCHJOB_TYPE_LABEL = 'pytorch-replica-type' -PYTORCHJOB_INDEX_LABEL = 'pytorch-replica-index' -PYTORCHJOB_ROLE_LABEL = 'job-role' diff --git a/sdk/python/kubeflow/training/utils/utils.py b/sdk/python/kubeflow/training/utils/utils.py index e04f46bbbb..24ace9f15b 100644 --- a/sdk/python/kubeflow/training/utils/utils.py +++ b/sdk/python/kubeflow/training/utils/utils.py @@ -44,55 +44,28 @@ def set_pytorchjob_namespace(pytorchjob): return namespace -def get_tfjob_labels(name, master=False, replica_type=None, replica_index=None): +def get_job_labels(name, master=False, replica_type=None, replica_index=None): """ - Get labels according to speficed flags. - :param name: tfjob name + Get labels according to specified flags. + :param name: job name :param master: if need include label 'job-role: master'. - :param replica_type: User can specify one of 'worker, ps, chief to only' get one type pods. - :param replica_index: Can specfy replica index to get one pod of TFJob. + :param replica_type: Replica type according to the job type (master, worker, chief, ps etc). + :param replica_index: Can specify replica index to get one pod of the job. :return: Dict: Labels """ labels = { - constants.TFJOB_GROUP_LABEL: 'kubeflow.org', - constants.TFJOB_NAME_LABEL: name, + constants.JOB_GROUP_LABEL: 'kubeflow.org', + constants.JOB_NAME_LABEL: name, } if master: - labels[constants.TFJOB_ROLE_LABEL] = 'master' + labels[constants.JOB_ROLE_LABEL] = 'master' if replica_type: - labels[constants.TFJOB_TYPE_LABEL] = str.lower(replica_type) + labels[constants.JOB_TYPE_LABEL] = str.lower(replica_type) if replica_index: - labels[constants.TFJOB_INDEX_LABEL] = replica_index - - return labels - - -def get_pytorchjob_labels(name, master=False, replica_type=None, replica_index=None): - """ - Get labels according to speficed flags. - :param name: PyTorchJob name - :param master: if need include label 'job-role: master'. - :param replica_type: User can specify one of 'worker, ps, chief to only' get one type pods. - :param replica_index: Can specfy replica index to get one pod of PyTorchJob. - :return: Dict: Labels - """ - labels = { - constants.PYTORCHJOB_GROUP_LABEL: 'kubeflow.org', - constants.PYTORCHJOB_CONTROLLER_LABEL: 'pytorch-operator', - constants.PYTORCHJOB_NAME_LABEL: name, - } - - if master: - labels[constants.PYTORCHJOB_ROLE_LABEL] = 'master' - - if replica_type: - labels[constants.PYTORCHJOB_TYPE_LABEL] = str.lower(replica_type) - - if replica_index: - labels[constants.PYTORCHJOB_INDEX_LABEL] = replica_index + labels[constants.JOB_INDEX_LABEL] = replica_index return labels From 876a3e053d3e21002ff69a465362712b3c635b80 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Thu, 30 Sep 2021 08:55:23 +0200 Subject: [PATCH 7/8] Check if attribute exists --- py/kubeflow/tf_operator/k8s_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py/kubeflow/tf_operator/k8s_util.py b/py/kubeflow/tf_operator/k8s_util.py index 3b46d9cba8..ae46e35934 100644 --- a/py/kubeflow/tf_operator/k8s_util.py +++ b/py/kubeflow/tf_operator/k8s_util.py @@ -137,9 +137,9 @@ def list_pods(client, namespace, label_selector): return pods except rest.ApiException as e: message = "" - if e.message: + if hasattr(e, "message"): message = e.message - if e.body: + if hasattr(e, "body"): try: body = json.loads(e.body) except ValueError: From cc4a716b8b91dd68a7f5322b11a5691de83bba55 Mon Sep 17 00:00:00 2001 From: Alex Lembiyeuski Date: Sat, 2 Oct 2021 22:21:00 +0200 Subject: [PATCH 8/8] Address code review comments --- docs/development/developer_guide.md | 2 +- hack/python-sdk/gen-sdk.sh | 2 +- hack/python-sdk/post_gen.py | 22 +- hack/python-sdk/swagger.json | 12 +- sdk/python/docs/PyTorchJobClient.md | 382 ++++++++++++++++++ sdk/python/docs/TFJobClient.md | 378 +++++++++++++++++ .../examples/kubeflow-pytorchjob-sdk.ipynb | 269 ++++++------ sdk/python/examples/kubeflow-tfjob-sdk.ipynb | 294 ++++++++------ sdk/python/kubeflow/training/__init__.py | 2 + .../training/api/py_torch_job_client.py | 6 +- .../training/api/py_torch_job_watch.py | 4 +- .../kubeflow/training/api/tf_job_client.py | 2 +- .../kubeflow/training/api/tf_job_watch.py | 2 +- .../kubeflow/training/constants/constants.py | 4 + sdk/python/kubeflow/training/utils/utils.py | 2 +- sdk/python/setup.py | 2 - sdk/python/test/e2e/test_e2e_pytorchjob.py | 10 +- sdk/python/test/e2e/test_e2e_tfjob.py | 10 +- 18 files changed, 1104 insertions(+), 301 deletions(-) create mode 100644 sdk/python/docs/PyTorchJobClient.md create mode 100644 sdk/python/docs/TFJobClient.md diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md index db79ee89d4..6e9c5045ac 100644 --- a/docs/development/developer_guide.md +++ b/docs/development/developer_guide.md @@ -84,7 +84,7 @@ On ubuntu the default go package appears to be gccgo-go which has problems see [ To generate Python SDK for the operator, run: ``` -.hack/python-sdk/gen-sdk.sh +./hack/python-sdk/gen-sdk.sh ``` This command will re-generate the api and model files together with the documentation and model tests. The following files/folders in `sdk/python` are auto-generated and should not be modified directly: diff --git a/hack/python-sdk/gen-sdk.sh b/hack/python-sdk/gen-sdk.sh index 5288104478..1043b3aa85 100755 --- a/hack/python-sdk/gen-sdk.sh +++ b/hack/python-sdk/gen-sdk.sh @@ -59,7 +59,7 @@ chmod +x /tmp/swagger --output "${repo_root}"/hack/python-sdk/swagger.json --quiet || true echo "Removing previously generated files ..." -rm -rf "${SDK_OUTPUT_PATH}"/docs "${SDK_OUTPUT_PATH}"/kubeflow/training/models "${SDK_OUTPUT_PATH}"/kubeflow/training/*.py "${SDK_OUTPUT_PATH}"/test/*.py +rm -rf "${SDK_OUTPUT_PATH}"/docs/V1*.md "${SDK_OUTPUT_PATH}"/kubeflow/training/models "${SDK_OUTPUT_PATH}"/kubeflow/training/*.py "${SDK_OUTPUT_PATH}"/test/*.py echo "Generating Python SDK for Training Operator ..." java -jar "${SWAGGER_CODEGEN_JAR}" generate -i "${repo_root}"/hack/python-sdk/swagger.json -g python -o "${SDK_OUTPUT_PATH}" -c "${SWAGGER_CODEGEN_CONF}" diff --git a/hack/python-sdk/post_gen.py b/hack/python-sdk/post_gen.py index a0e516807a..cabf87bcdf 100755 --- a/hack/python-sdk/post_gen.py +++ b/hack/python-sdk/post_gen.py @@ -1,5 +1,19 @@ #!/usr/bin/env python +# Copyright 2021 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ This script is used for updating generated SDK files. """ @@ -18,13 +32,13 @@ def main(): fix_test_files() + add_imports() def fix_test_files() -> None: """ Fix invalid model imports in generated model tests """ - os.path.realpath(__file__) test_folder_dir = os.path.join(sdk_dir, "test") test_files = os.listdir(test_folder_dir) for test_file in test_files: @@ -35,6 +49,12 @@ def fix_test_files() -> None: print(_apply_regex(line), end='') +def add_imports() -> None: + with open(os.path.join(sdk_dir, "kubeflow/training/__init__.py"), "a") as init_file: + init_file.write("from kubeflow.training.api.tf_job_client import TFJobClient\n") + init_file.write("from kubeflow.training.api.py_torch_job_client import PyTorchJobClient\n") + + def _apply_regex(input_str: str) -> str: for pattern, replacement in __replacements: input_str = re.sub(pattern, replacement, input_str) diff --git a/hack/python-sdk/swagger.json b/hack/python-sdk/swagger.json index a6d6cdb056..034164ca75 100644 --- a/hack/python-sdk/swagger.json +++ b/hack/python-sdk/swagger.json @@ -17,11 +17,11 @@ "properties": { "lastTransitionTime": { "description": "Last time the condition transitioned from one status to another.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "lastUpdateTime": { "description": "The last time this condition was updated.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "message": { "description": "A human readable message indicating details about the transition.", @@ -51,7 +51,7 @@ "properties": { "completionTime": { "description": "Represents time when the job was completed. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.Time" + "$ref": "#/definitions/v1.Time" }, "conditions": { "description": "Conditions is an array of current observed job conditions.", @@ -370,7 +370,7 @@ }, "metadata": { "description": "Standard list metadata.", - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, @@ -416,7 +416,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ObjectMeta" + "$ref": "#/definitions/v1.ObjectMeta" }, "spec": { "$ref": "#/definitions/v1.XGBoostJobSpec" @@ -448,7 +448,7 @@ "type": "string" }, "metadata": { - "$ref": "#/definitions/k8s.io.apimachinery.pkg.apis.meta.v1.ListMeta" + "$ref": "#/definitions/v1.ListMeta" } } }, diff --git a/sdk/python/docs/PyTorchJobClient.md b/sdk/python/docs/PyTorchJobClient.md new file mode 100644 index 0000000000..f02fd64f40 --- /dev/null +++ b/sdk/python/docs/PyTorchJobClient.md @@ -0,0 +1,382 @@ +# PyTorchJobClient + +> PyTorchJobClient(config_file=None, context=None, client_configuration=None, persist_config=True) + +User can loads authentication and cluster information from kube-config file and stores them in kubernetes.client.configuration. Parameters are as following: + +parameter | Description +------------ | ------------- +config_file | Name of the kube-config file. Defaults to `~/.kube/config`. Note that for the case that the SDK is running in cluster and you want to operate PyTorchJob in another remote cluster, user must set `config_file` to load kube-config file explicitly, e.g. `PyTorchJobClient(config_file="~/.kube/config")`. | +context |Set the active context. If is set to None, current_context from config file will be used.| +client_configuration | The kubernetes.client.Configuration to set configs to.| +persist_config | If True, config file will be updated when changed (e.g GCP token refresh).| + + +The APIs for PyTorchJobClient are as following: + +Class | Method | Description +------------ | ------------- | ------------- +PyTorchJobClient| [create](#create) | Create PyTorchJob| +PyTorchJobClient | [get](#get) | Get the specified PyTorchJob or all PyTorchJob in the namespace | +PyTorchJobClient | [patch](#patch) | Patch the specified PyTorchJob| +PyTorchJobClient | [delete](#delete) | Delete the specified PyTorchJob | +PyTorchJobClient | [wait_for_job](#wait_for_job) | Wait for the specified job to finish | +PyTorchJobClient | [wait_for_condition](#wait_for_condition) | Waits until any of the specified conditions occur | +PyTorchJobClient | [get_job_status](#get_job_status) | Get the PyTorchJob status| +PyTorchJobClient | [is_job_running](#is_job_running) | Check if the PyTorchJob running | +PyTorchJobClient | [is_job_succeeded](#is_job_succeeded) | Check if the PyTorchJob Succeeded | +PyTorchJobClient | [get_pod_names](#get_pod_names) | Get pod names of PyTorchJob | +PyTorchJobClient | [get_logs](#get_logs) | Get training logs of the PyTorchJob | + +## create +> create(pytorchjob, namespace=None) + +Create the provided pytorchjob in the specified namespace + +### Example + +```python +from kubernetes.client import V1PodTemplateSpec +from kubernetes.client import V1ObjectMeta +from kubernetes.client import V1PodSpec +from kubernetes.client import V1Container +from kubernetes.client import V1ResourceRequirements + +from kubeflow.training import constants +from kubeflow.training import utils +from kubeflow.training import V1ReplicaSpec +from kubeflow.training import V1PyTorchJob +from kubeflow.training import V1PyTorchJobSpec +from kubeflow.training import PyTorchJobClient + + container = V1Container( + name="pytorch", + image="gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0", + args=["--backend","gloo"], + ) + + master = V1ReplicaSpec( + replicas=1, + restart_policy="OnFailure", + template=V1PodTemplateSpec( + spec=V1PodSpec( + containers=[container] + ) + ) + ) + + worker = V1ReplicaSpec( + replicas=1, + restart_policy="OnFailure", + template=V1PodTemplateSpec( + spec=V1PodSpec( + containers=[container] + ) + ) + ) + + pytorchjob = V1PyTorchJob( + api_version="kubeflow.org/v1", + kind="PyTorchJob", + metadata=V1ObjectMeta(name="mnist", namespace='default'), + spec=V1PyTorchJobSpec( + clean_pod_policy="None", + pytorch_replica_specs={"Master": master, + "Worker": worker} + ) + ) + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.create(pytorchjob) + +``` + + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +pytorchjob | [V1PyTorchJob](V1PyTorchJob.md) | pytorchjob defination| Required | +namespace | str | Namespace for pytorchjob deploying to. If the `namespace` is not defined, will align with pytorchjob definition, or use current or default namespace if namespace is not specified in pytorchjob definition. | Optional | + +### Return type +object + +## get +> get(name=None, namespace=None, watch=False, timeout_seconds=600) + +Get the created pytorchjob in the specified namespace + +### Example + +```python +from kubeflow.training import pytorchjobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.get('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | pytorchjob name. If the `name` is not specified, it will get all pytorchjobs in the namespace.| Optional. | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace.| Optional | +watch | bool | Watch the created pytorchjob if `True`, otherwise will return the created pytorchjob object. Stop watching if pytorchjob reaches the optional specified `timeout_seconds` or once the PyTorchJob status `Succeeded` or `Failed`. | Optional | +timeout_seconds | int | Timeout seconds for watching. Defaults to 600. | Optional | + + +### Return type +object + + +## patch +> patch(name, pytorchjob, namespace=None) + +Patch the created pytorchjob in the specified namespace. + +Note that if you want to set the field from existing value to `None`, `patch` API may not work, you need to use [replace](#replace) API to remove the field value. + +### Example + +```python + +pytorchjob = V1PyTorchJob( + api_version="kubeflow.org/v1", + ... #update something in PyTorchJob spec +) + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.patch('mnist', isvc) + +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +pytorchjob | [V1PyTorchJob](V1PyTorchJob.md) | pytorchjob defination| Required | +namespace | str | The pytorchjob's namespace for patching. If the `namespace` is not defined, will align with pytorchjob definition, or use current or default namespace if namespace is not specified in pytorchjob definition. | Optional| + +### Return type +object + + +## delete +> delete(name, namespace=None) + +Delete the created pytorchjob in the specified namespace + +### Example + +```python +from kubeflow.training import pytorchjobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.delete('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | pytorchjob name| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace. | Optional| + +### Return type +object + +## wait_for_job +> wait_for_job(name, +> namespace=None, +> watch=False, +> timeout_seconds=600, +> polling_interval=30, +> status_callback=None): + +Wait for the specified job to finish. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.wait_for_job('mnist', namespace='kubeflow') + +# The API also supports watching the PyTorchJob status till it's Succeeded or Failed. +pytorchjob_client.wait_for_job('mnist', namespace='kubeflow', watch=True) +NAME STATE TIME +pytorch-dist-mnist-gloo Created 2020-01-02T09:21:22Z +pytorch-dist-mnist-gloo Running 2020-01-02T09:21:36Z +pytorch-dist-mnist-gloo Running 2020-01-02T09:21:36Z +pytorch-dist-mnist-gloo Running 2020-01-02T09:21:36Z +pytorch-dist-mnist-gloo Running 2020-01-02T09:21:36Z +pytorch-dist-mnist-gloo Succeeded 2020-01-02T09:26:38Z +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name.| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace. | Optional| +watch | bool | Watch the PyTorchJob if `True`. Stop watching if PyTorchJob reaches the optional specified `timeout_seconds` or once the PyTorchJob status `Succeeded` or `Failed`. | Optional | +timeout_seconds | int | How long to wait for the job, default wait for 600 seconds. | Optional| +polling_interval | int | How often to poll for the status of the job.| Optional| +status_callback | str | Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the pytorchjob.| Optional| + +### Return type +object + + +## wait_for_condition +> wait_for_condition(name, +> expected_condition, +> namespace=None, +> timeout_seconds=600, +> polling_interval=30, +> status_callback=None): + + +Waits until any of the specified conditions occur. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.wait_for_condition('mnist', expected_condition=["Succeeded", "Failed"], namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name.| | +expected_condition |List |A list of conditions. Function waits until any of the supplied conditions is reached.| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace. | Optional| +timeout_seconds | int | How long to wait for the job, default wait for 600 seconds. | Optional| +polling_interval | int | How often to poll for the status of the job.| Optional| +status_callback | str | Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the pytorchjob.| Optional| + +### Return type +object + +## get_job_status +> get_job_status(name, namespace=None) + +Returns PyTorchJob status, such as Running, Failed or Succeeded. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.get_job_status('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name. | | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace.| Optional | + +### Return type +Str + +## is_job_running +> is_job_running(name, namespace=None) + +Returns True if the PyTorchJob running; false otherwise. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.is_job_running('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name.| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace.| Optional | + +### Return type +Bool + +## is_job_succeeded +> is_job_succeeded(name, namespace=None) + +Returns True if the PyTorchJob succeeded; false otherwise. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.is_job_succeeded('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name.| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace.| Optional | + +### Return type +Bool + +## get_pod_names +> get_pod_names(name, namespace=None, master=False, replica_type=None, replica_index=None) + +Get pod names of the PyTorchJob. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.get_pod_names('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name.| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace.| Optional | +master | bool | Only get pod with label 'job-role: master' pod if True. | | +replica_type | str | User can specify one of 'master, worker' to only get one type pods. By default get all type pods.| | +replica_index | str | User can specfy replica index to get one pod of the PyTorchJob. | | + +### Return type +Set + + +## get_logs +> get_logs(name, namespace=None, master=True, replica_type=None, replica_index=None, follow=False) + +Get training logs of the PyTorchJob. By default only get the logs of Pod that has labels 'job-role: master', to get all pods logs, specfy the `master=False`. + +### Example + +```python +from kubeflow.training import PyTorchJobClient + +pytorchjob_client = PyTorchJobClient() +pytorchjob_client.get_logs('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The PyTorchJob name.| | +namespace | str | The pytorchjob's namespace. Defaults to current or default namespace.| Optional | +master | bool | Only get pod with label 'job-role: master' pod if True. | | +replica_type | str | User can specify one of 'master, worker' to only get one type pods. By default get all type pods.| | +replica_index | str | User can specfy replica index to get one pod of the PyTorchJob. | | +follow | bool | Follow the log stream of the pod. Defaults to false. | | + +### Return type +Str diff --git a/sdk/python/docs/TFJobClient.md b/sdk/python/docs/TFJobClient.md new file mode 100644 index 0000000000..423e5b16a5 --- /dev/null +++ b/sdk/python/docs/TFJobClient.md @@ -0,0 +1,378 @@ +# TFJobClient + +> TFJobClient(config_file=None, context=None, client_configuration=None, persist_config=True) + +User can loads authentication and cluster information from kube-config file and stores them in kubernetes.client.configuration. Parameters are as following: + +parameter | Description +------------ | ------------- +config_file | Name of the kube-config file. Defaults to `~/.kube/config`. Note that for the case that the SDK is running in cluster and you want to operate tfjob in another remote cluster, user must set `config_file` to load kube-config file explicitly, e.g. `TFJobClient(config_file="~/.kube/config")`. | +context |Set the active context. If is set to None, current_context from config file will be used.| +client_configuration | The kubernetes.client.Configuration to set configs to.| +persist_config | If True, config file will be updated when changed (e.g GCP token refresh).| + + +The APIs for TFJobClient are as following: + +Class | Method | Description +------------ | ------------- | ------------- +TFJobClient| [create](#create) | Create TFJob| +TFJobClient | [get](#get) | Get the specified TFJob or all TFJob in the namespace | +TFJobClient | [patch](#patch) | Patch the specified TFJob| +TFJobClient | [delete](#delete) | Delete the specified TFJob | +TFJobClient | [wait_for_job](#wait_for_job) | Wait for the specified job to finish | +TFJobClient | [wait_for_condition](#wait_for_condition) | Waits until any of the specified conditions occur | +TFJobClient | [get_job_status](#get_job_status) | Get the TFJob status| +TFJobClient | [is_job_running](#is_job_running) | Check if the TFJob status is running | +TFJobClient | [is_job_succeeded](#is_job_succeeded) | Check if the TFJob status is Succeeded | +TFJobClient | [get_pod_names](#get_pod_names) | Get pod names of TFJob | +TFJobClient | [get_logs](#get_logs) | Get training logs of the TFJob | + + +## create +> create(tfjob, namespace=None) + +Create the provided tfjob in the specified namespace + +### Example + +```python +from kubernetes.client import V1PodTemplateSpec +from kubernetes.client import V1ObjectMeta +from kubernetes.client import V1PodSpec +from kubernetes.client import V1Container + +from kubeflow.training import constants +from kubeflow.training import utils +from kubeflow.training import V1ReplicaSpec +from kubeflow.training import V1TFJob +from kubeflow.training import V1TFJobList +from kubeflow.training import V1TFJobSpec +from kubeflow.training import TFJobClient + + +container = V1Container( + name="tensorflow", + image="gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", + command=[ + "python", + "/var/tf_mnist/mnist_with_summaries.py", + "--log_dir=/train/logs", "--learning_rate=0.01", + "--batch_size=150" + ] +) + +worker = V1ReplicaSpec( + replicas=1, + restart_policy="Never", + template=V1PodTemplateSpec( + spec=V1PodSpec( + containers=[container] + ) + ) +) + +tfjob = V1TFJob( + api_version="kubeflow.org/v1", + kind="TFJob", + metadata=V1ObjectMeta(name="mnist",namespace=namespace), + spec=V1TFJobSpec( + clean_pod_policy="None", + tf_replica_specs={"Worker": worker} + ) +) + + +tfjob_client = TFJobClient() +tfjob_client.create(tfjob) + +``` + + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +tfjob | [V1TFJob](V1TFJob.md) | tfjob defination| Required | +namespace | str | Namespace for tfjob deploying to. If the `namespace` is not defined, will align with tfjob definition, or use current or default namespace if namespace is not specified in tfjob definition. | Optional | + +### Return type +object + +## get +> get(name=None, namespace=None, watch=False, timeout_seconds=600) + +Get the created tfjob in the specified namespace + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.get('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name. If the `name` is not specified, it will get all tfjobs in the namespace.| Optional. | +namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | +watch | bool | Watch the created TFJob if `True`, otherwise will return the created TFJob object. Stop watching if TFJob reaches the optional specified `timeout_seconds` or once the TFJob status `Succeeded` or `Failed`. | Optional | +timeout_seconds | int | Timeout seconds for watching. Defaults to 600. | Optional | + +### Return type +object + + +## patch +> patch(name, tfjob, namespace=None) + +Patch the created tfjob in the specified namespace. + +Note that if you want to set the field from existing value to `None`, `patch` API may not work, you need to use [replace](#replace) API to remove the field value. + +### Example + +```python + +tfjob = V1TFJob( + api_version="kubeflow.org/v1", + ... #update something in TFJob spec +) + +tfjob_client = TFJobClient() +tfjob_client.patch('mnist', isvc) + +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +tfjob | [V1TFJob](V1TFJob.md) | tfjob defination| Required | +namespace | str | The tfjob's namespace for patching. If the `namespace` is not defined, will align with tfjob definition, or use current or default namespace if namespace is not specified in tfjob definition. | Optional| + +### Return type +object + + +## delete +> delete(name, namespace=None) + +Delete the created tfjob in the specified namespace + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.delete('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace. | Optional| + +### Return type +object + + +## wait_for_job +> wait_for_job(name, +> namespace=None, +> timeout_seconds=600, +> polling_interval=30, +> watch=False, +> status_callback=None): + +Wait for the specified job to finish. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.wait_for_job('mnist', namespace='kubeflow') + +# The API also supports watching the TFJob status till it's Succeeded or Failed. +tfjob_client.wait_for_job('mnist', namespace=namespace, watch=True) +NAME STATE TIME +mnist Created 2019-12-31T09:20:07Z +mnist Running 2019-12-31T09:20:19Z +mnist Running 2019-12-31T09:20:19Z +mnist Succeeded 2019-12-31T09:22:04Z +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace. | Optional| +timeout_seconds | int | How long to wait for the job, default wait for 600 seconds. | Optional| +polling_interval | int | How often to poll for the status of the job.| Optional| +status_callback | str | Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the tfjob.| Optional| +watch | bool | Watch the TFJob if `True`. Stop watching if TFJob reaches the optional specified `timeout_seconds` or once the TFJob status `Succeeded` or `Failed`. | Optional | + +### Return type +object + + +## wait_for_condition +> wait_for_condition(name, +> expected_condition, +> namespace=None, +> timeout_seconds=600, +> polling_interval=30, +> status_callback=None): + + +Waits until any of the specified conditions occur. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.wait_for_condition('mnist', expected_condition=["Succeeded", "Failed"], namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +expected_condition |List |A list of conditions. Function waits until any of the supplied conditions is reached.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace. | Optional| +timeout_seconds | int | How long to wait for the job, default wait for 600 seconds. | Optional| +polling_interval | int | How often to poll for the status of the job.| Optional| +status_callback | str | Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the tfjob.| Optional| + +### Return type +object + +## get_job_status +> get_job_status(name, namespace=None) + +Returns TFJob status, such as Running, Failed or Succeeded. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.get_job_status('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name. | | +namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | + +### Return type +Str + +## is_job_running +> is_job_running(name, namespace=None) + +Returns True if the TFJob running; false otherwise. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.is_job_running('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | + +### Return type +Bool + +## is_job_succeeded +> is_job_succeeded(name, namespace=None) + +Returns True if the TFJob succeeded; false otherwise. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.is_job_succeeded('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | + +### Return type +Bool + + +## get_pod_names +> get_pod_names(name, namespace=None, master=False, replica_type=None, replica_index=None) + +Get pod names of the TFJob. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.get_pod_names('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | +master | bool | Only get pod with label 'job-role: master' pod if True. | | +replica_type | str | User can specify one of 'worker, ps, chief' to only get one type pods. By default get all type pods.| | +replica_index | str | User can specfy replica index to get one pod of the TFJob. | | + +### Return type +Set + + +## get_logs +> get_logs(name, namespace=None, master=True, replica_type=None, replica_index=None, follow=False) + +Get training logs of the TFJob. By default only get the logs of Pod that has labels 'job-role: master', to get all pods logs, specfy the `master=False`. + +### Example + +```python +from kubeflow.training import TFJobClient + +tfjob_client = TFJobClient() +tfjob_client.get_logs('mnist', namespace='kubeflow') +``` + +### Parameters +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +name | str | The TFJob name.| | +namespace | str | The tfjob's namespace. Defaults to current or default namespace.| Optional | +master | bool | Only get pod with label 'job-role: master' pod if True. | | +replica_type | str | User can specify one of 'worker, ps, chief' to only get one type pods. By default get all type pods.| | +replica_index | str | User can specfy replica index to get one pod of the TFJob. | | +follow | bool | Follow the log stream of the pod. Defaults to false. | | + +### Return type +Str diff --git a/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb b/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb index bd7b073e03..e196e07f42 100644 --- a/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb +++ b/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -34,7 +34,7 @@ "from kubeflow.training import V1PyTorchJob\n", "from kubeflow.training import V1PyTorchJobSpec\n", "from kubeflow.training import V1RunPolicy\n", - "from kubeflow.training.api.py_torch_job_client import PyTorchJobClient" + "from kubeflow.training import PyTorchJobClient" ] }, { @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -128,7 +128,7 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1',\n", " 'kind': 'PyTorchJob',\n", - " 'metadata': {'creationTimestamp': '2021-09-22T21:39:22Z',\n", + " 'metadata': {'creationTimestamp': '2021-10-02T18:55:16Z',\n", " 'generation': 1,\n", " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", " 'fieldsType': 'FieldsV1',\n", @@ -145,11 +145,11 @@ " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}}}},\n", " 'manager': 'OpenAPI-Generator',\n", " 'operation': 'Update',\n", - " 'time': '2021-09-22T21:39:22Z'}],\n", + " 'time': '2021-10-02T18:55:16Z'}],\n", " 'name': 'pytorch-dist-mnist-gloo',\n", " 'namespace': 'default',\n", - " 'resourceVersion': '605918',\n", - " 'uid': '00588811-222f-4c06-a07d-e29d9d01bb77'},\n", + " 'resourceVersion': '5169',\n", + " 'uid': '583b9831-8b6d-44e1-86c1-9a171c472fe3'},\n", " 'spec': {'pytorchReplicaSpecs': {'Master': {'replicas': 1,\n", " 'restartPolicy': 'OnFailure',\n", " 'template': {'spec': {'containers': [{'args': ['--backend', 'gloo'],\n", @@ -163,7 +163,7 @@ " 'runPolicy': {'cleanPodPolicy': 'None'}}}" ] }, - "execution_count": 12, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -190,7 +190,7 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1',\n", " 'kind': 'PyTorchJob',\n", - " 'metadata': {'creationTimestamp': '2021-09-22T21:39:22Z',\n", + " 'metadata': {'creationTimestamp': '2021-10-02T18:55:16Z',\n", " 'generation': 1,\n", " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", " 'fieldsType': 'FieldsV1',\n", @@ -199,34 +199,29 @@ " 'f:Master': {'.': {},\n", " 'f:replicas': {},\n", " 'f:restartPolicy': {},\n", - " 'f:template': {'.': {}, 'f:spec': {}}},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", " 'f:Worker': {'.': {},\n", " 'f:replicas': {},\n", " 'f:restartPolicy': {},\n", - " 'f:template': {'.': {}, 'f:spec': {}}}}}},\n", + " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}},\n", + " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}}}},\n", " 'manager': 'OpenAPI-Generator',\n", " 'operation': 'Update',\n", - " 'time': '2021-09-22T21:39:22Z'},\n", + " 'time': '2021-10-02T18:55:16Z'},\n", " {'apiVersion': 'kubeflow.org/v1',\n", " 'fieldsType': 'FieldsV1',\n", - " 'fieldsV1': {'f:spec': {'f:cleanPodPolicy': {},\n", - " 'f:pytorchReplicaSpecs': {'f:Master': {'f:template': {'f:metadata': {'.': {},\n", - " 'f:creationTimestamp': {}},\n", - " 'f:spec': {'f:containers': {}}}},\n", - " 'f:Worker': {'f:template': {'f:metadata': {'.': {},\n", - " 'f:creationTimestamp': {}},\n", - " 'f:spec': {'f:containers': {}}}}}},\n", - " 'f:status': {'.': {},\n", + " 'fieldsV1': {'f:status': {'.': {},\n", " 'f:conditions': {},\n", - " 'f:replicaStatuses': {'.': {}, 'f:Master': {}, 'f:Worker': {}},\n", - " 'f:startTime': {}}},\n", - " 'manager': 'pytorch-operator.v1',\n", + " 'f:replicaStatuses': {'.': {},\n", + " 'f:Master': {'.': {}, 'f:active': {}},\n", + " 'f:Worker': {'.': {}, 'f:active': {}}}}},\n", + " 'manager': 'manager',\n", " 'operation': 'Update',\n", - " 'time': '2021-09-22T21:39:22Z'}],\n", + " 'time': '2021-10-02T18:55:17Z'}],\n", " 'name': 'pytorch-dist-mnist-gloo',\n", " 'namespace': 'default',\n", - " 'resourceVersion': '605956',\n", - " 'uid': '00588811-222f-4c06-a07d-e29d9d01bb77'},\n", + " 'resourceVersion': '5204',\n", + " 'uid': '583b9831-8b6d-44e1-86c1-9a171c472fe3'},\n", " 'spec': {'pytorchReplicaSpecs': {'Master': {'replicas': 1,\n", " 'restartPolicy': 'OnFailure',\n", " 'template': {'spec': {'containers': [{'args': ['--backend', 'gloo'],\n", @@ -238,17 +233,22 @@ " 'image': 'gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0',\n", " 'name': 'pytorch'}]}}}},\n", " 'runPolicy': {'cleanPodPolicy': 'None'}},\n", - " 'status': {'conditions': [{'lastTransitionTime': '2021-09-22T21:39:22Z',\n", - " 'lastUpdateTime': '2021-09-22T21:39:22Z',\n", + " 'status': {'conditions': [{'lastTransitionTime': '2021-10-02T18:55:16Z',\n", + " 'lastUpdateTime': '2021-10-02T18:55:16Z',\n", " 'message': 'PyTorchJob pytorch-dist-mnist-gloo is created.',\n", " 'reason': 'PyTorchJobCreated',\n", " 'status': 'True',\n", - " 'type': 'Created'}],\n", - " 'replicaStatuses': {'Master': {}, 'Worker': {}},\n", - " 'startTime': '2021-09-22T21:39:22Z'}}" + " 'type': 'Created'},\n", + " {'lastTransitionTime': '2021-10-02T18:55:16Z',\n", + " 'lastUpdateTime': '2021-10-02T18:55:16Z',\n", + " 'message': 'PyTorchJob pytorch-dist-mnist-gloo is running.',\n", + " 'reason': 'JobRunning',\n", + " 'status': 'True',\n", + " 'type': 'Running'}],\n", + " 'replicaStatuses': {'Master': {'active': 1}, 'Worker': {'active': 1}}}}" ] }, - "execution_count": 13, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -266,16 +266,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'Created'" + "'Running'" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -293,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -301,9 +301,9 @@ "output_type": "stream", "text": [ "NAME STATE TIME \n", - "pytorch-dist-mnist-gloo Created 2021-09-22T21:39:22Z \n", - "pytorch-dist-mnist-gloo Running 2021-09-22T21:40:29Z \n", - "pytorch-dist-mnist-gloo Running 2021-09-22T21:40:29Z \n" + "pytorch-dist-mnist-gloo Running 2021-10-02T18:55:16Z \n", + "pytorch-dist-mnist-gloo Running 2021-10-02T18:55:16Z \n", + "pytorch-dist-mnist-gloo Succeeded 2021-10-02T18:57:38Z \n" ] } ], @@ -347,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -369,95 +369,95 @@ "Train Epoch: 1 [2560/60000 (4%)]\tloss=1.8679\n", "Train Epoch: 1 [3200/60000 (5%)]\tloss=1.4135\n", "Train Epoch: 1 [3840/60000 (6%)]\tloss=1.0003\n", - "Train Epoch: 1 [4480/60000 (7%)]\tloss=0.7762\n", + "Train Epoch: 1 [4480/60000 (7%)]\tloss=0.7763\n", "Train Epoch: 1 [5120/60000 (9%)]\tloss=0.4598\n", - "Train Epoch: 1 [5760/60000 (10%)]\tloss=0.4860\n", - "Train Epoch: 1 [6400/60000 (11%)]\tloss=0.4389\n", - "Train Epoch: 1 [7040/60000 (12%)]\tloss=0.4084\n", - "Train Epoch: 1 [7680/60000 (13%)]\tloss=0.4602\n", - "Train Epoch: 1 [8320/60000 (14%)]\tloss=0.4289\n", - "Train Epoch: 1 [8960/60000 (15%)]\tloss=0.3990\n", - "Train Epoch: 1 [9600/60000 (16%)]\tloss=0.3850\n", - "Train Epoch: 1 [10240/60000 (17%)]\tloss=0.2985\n", - "Train Epoch: 1 [10880/60000 (18%)]\tloss=0.5031\n", - "Train Epoch: 1 [11520/60000 (19%)]\tloss=0.5235\n", - "Train Epoch: 1 [12160/60000 (20%)]\tloss=0.3379\n", - "Train Epoch: 1 [12800/60000 (21%)]\tloss=0.3667\n", - "Train Epoch: 1 [13440/60000 (22%)]\tloss=0.4503\n", + "Train Epoch: 1 [5760/60000 (10%)]\tloss=0.4870\n", + "Train Epoch: 1 [6400/60000 (11%)]\tloss=0.4381\n", + "Train Epoch: 1 [7040/60000 (12%)]\tloss=0.4089\n", + "Train Epoch: 1 [7680/60000 (13%)]\tloss=0.4618\n", + "Train Epoch: 1 [8320/60000 (14%)]\tloss=0.4284\n", + "Train Epoch: 1 [8960/60000 (15%)]\tloss=0.3992\n", + "Train Epoch: 1 [9600/60000 (16%)]\tloss=0.3840\n", + "Train Epoch: 1 [10240/60000 (17%)]\tloss=0.2981\n", + "Train Epoch: 1 [10880/60000 (18%)]\tloss=0.5013\n", + "Train Epoch: 1 [11520/60000 (19%)]\tloss=0.5246\n", + "Train Epoch: 1 [12160/60000 (20%)]\tloss=0.3376\n", + "Train Epoch: 1 [12800/60000 (21%)]\tloss=0.3678\n", + "Train Epoch: 1 [13440/60000 (22%)]\tloss=0.4515\n", "Train Epoch: 1 [14080/60000 (23%)]\tloss=0.3043\n", - "Train Epoch: 1 [14720/60000 (25%)]\tloss=0.3589\n", - "Train Epoch: 1 [15360/60000 (26%)]\tloss=0.3320\n", - "Train Epoch: 1 [16000/60000 (27%)]\tloss=0.4406\n", - "Train Epoch: 1 [16640/60000 (28%)]\tloss=0.3641\n", - "Train Epoch: 1 [17280/60000 (29%)]\tloss=0.3170\n", - "Train Epoch: 1 [17920/60000 (30%)]\tloss=0.2014\n", - "Train Epoch: 1 [18560/60000 (31%)]\tloss=0.4985\n", - "Train Epoch: 1 [19200/60000 (32%)]\tloss=0.3264\n", - "Train Epoch: 1 [19840/60000 (33%)]\tloss=0.1198\n", - "Train Epoch: 1 [20480/60000 (34%)]\tloss=0.1904\n", - "Train Epoch: 1 [21120/60000 (35%)]\tloss=0.1424\n", - "Train Epoch: 1 [21760/60000 (36%)]\tloss=0.3143\n", - "Train Epoch: 1 [22400/60000 (37%)]\tloss=0.1494\n", - "Train Epoch: 1 [23040/60000 (38%)]\tloss=0.2901\n", - "Train Epoch: 1 [23680/60000 (39%)]\tloss=0.4670\n", - "Train Epoch: 1 [24320/60000 (41%)]\tloss=0.2151\n", - "Train Epoch: 1 [24960/60000 (42%)]\tloss=0.1521\n", - "Train Epoch: 1 [25600/60000 (43%)]\tloss=0.2240\n", - "Train Epoch: 1 [26240/60000 (44%)]\tloss=0.2629\n", - "Train Epoch: 1 [26880/60000 (45%)]\tloss=0.2330\n", - "Train Epoch: 1 [27520/60000 (46%)]\tloss=0.2630\n", + "Train Epoch: 1 [14720/60000 (25%)]\tloss=0.3581\n", + "Train Epoch: 1 [15360/60000 (26%)]\tloss=0.3301\n", + "Train Epoch: 1 [16000/60000 (27%)]\tloss=0.4392\n", + "Train Epoch: 1 [16640/60000 (28%)]\tloss=0.3626\n", + "Train Epoch: 1 [17280/60000 (29%)]\tloss=0.3179\n", + "Train Epoch: 1 [17920/60000 (30%)]\tloss=0.2013\n", + "Train Epoch: 1 [18560/60000 (31%)]\tloss=0.5004\n", + "Train Epoch: 1 [19200/60000 (32%)]\tloss=0.3266\n", + "Train Epoch: 1 [19840/60000 (33%)]\tloss=0.1194\n", + "Train Epoch: 1 [20480/60000 (34%)]\tloss=0.1898\n", + "Train Epoch: 1 [21120/60000 (35%)]\tloss=0.1402\n", + "Train Epoch: 1 [21760/60000 (36%)]\tloss=0.3161\n", + "Train Epoch: 1 [22400/60000 (37%)]\tloss=0.1499\n", + "Train Epoch: 1 [23040/60000 (38%)]\tloss=0.2888\n", + "Train Epoch: 1 [23680/60000 (39%)]\tloss=0.4680\n", + "Train Epoch: 1 [24320/60000 (41%)]\tloss=0.2159\n", + "Train Epoch: 1 [24960/60000 (42%)]\tloss=0.1518\n", + "Train Epoch: 1 [25600/60000 (43%)]\tloss=0.2247\n", + "Train Epoch: 1 [26240/60000 (44%)]\tloss=0.2634\n", + "Train Epoch: 1 [26880/60000 (45%)]\tloss=0.2333\n", + "Train Epoch: 1 [27520/60000 (46%)]\tloss=0.2626\n", "Train Epoch: 1 [28160/60000 (47%)]\tloss=0.2126\n", - "Train Epoch: 1 [28800/60000 (48%)]\tloss=0.1327\n", - "Train Epoch: 1 [29440/60000 (49%)]\tloss=0.2789\n", - "Train Epoch: 1 [30080/60000 (50%)]\tloss=0.0947\n", - "Train Epoch: 1 [30720/60000 (51%)]\tloss=0.1280\n", - "Train Epoch: 1 [31360/60000 (52%)]\tloss=0.2458\n", - "Train Epoch: 1 [32000/60000 (53%)]\tloss=0.3394\n", - "Train Epoch: 1 [32640/60000 (54%)]\tloss=0.1527\n", - "Train Epoch: 1 [33280/60000 (55%)]\tloss=0.0901\n", - "Train Epoch: 1 [33920/60000 (57%)]\tloss=0.1451\n", - "Train Epoch: 1 [34560/60000 (58%)]\tloss=0.1994\n", - "Train Epoch: 1 [35200/60000 (59%)]\tloss=0.2171\n", - "Train Epoch: 1 [35840/60000 (60%)]\tloss=0.0633\n", - "Train Epoch: 1 [36480/60000 (61%)]\tloss=0.1369\n", - "Train Epoch: 1 [37120/60000 (62%)]\tloss=0.1160\n", - "Train Epoch: 1 [37760/60000 (63%)]\tloss=0.2355\n", - "Train Epoch: 1 [38400/60000 (64%)]\tloss=0.0634\n", - "Train Epoch: 1 [39040/60000 (65%)]\tloss=0.1062\n", - "Train Epoch: 1 [39680/60000 (66%)]\tloss=0.1608\n", - "Train Epoch: 1 [40320/60000 (67%)]\tloss=0.1101\n", - "Train Epoch: 1 [40960/60000 (68%)]\tloss=0.1775\n", - "Train Epoch: 1 [41600/60000 (69%)]\tloss=0.2285\n", - "Train Epoch: 1 [42240/60000 (70%)]\tloss=0.0737\n", - "Train Epoch: 1 [42880/60000 (71%)]\tloss=0.1562\n", - "Train Epoch: 1 [43520/60000 (72%)]\tloss=0.2775\n", - "Train Epoch: 1 [44160/60000 (74%)]\tloss=0.1418\n", - "Train Epoch: 1 [44800/60000 (75%)]\tloss=0.1163\n", - "Train Epoch: 1 [45440/60000 (76%)]\tloss=0.1221\n", - "Train Epoch: 1 [46080/60000 (77%)]\tloss=0.0768\n", - "Train Epoch: 1 [46720/60000 (78%)]\tloss=0.1950\n", - "Train Epoch: 1 [47360/60000 (79%)]\tloss=0.0706\n", - "Train Epoch: 1 [48000/60000 (80%)]\tloss=0.2091\n", - "Train Epoch: 1 [48640/60000 (81%)]\tloss=0.1380\n", - "Train Epoch: 1 [49280/60000 (82%)]\tloss=0.0950\n", - "Train Epoch: 1 [49920/60000 (83%)]\tloss=0.1070\n", - "Train Epoch: 1 [50560/60000 (84%)]\tloss=0.1194\n", - "Train Epoch: 1 [51200/60000 (85%)]\tloss=0.1447\n", - "Train Epoch: 1 [51840/60000 (86%)]\tloss=0.0662\n", - "Train Epoch: 1 [52480/60000 (87%)]\tloss=0.0239\n", - "Train Epoch: 1 [53120/60000 (88%)]\tloss=0.2622\n", - "Train Epoch: 1 [53760/60000 (90%)]\tloss=0.0928\n", - "Train Epoch: 1 [54400/60000 (91%)]\tloss=0.1297\n", - "Train Epoch: 1 [55040/60000 (92%)]\tloss=0.1907\n", - "Train Epoch: 1 [55680/60000 (93%)]\tloss=0.0347\n", - "Train Epoch: 1 [56320/60000 (94%)]\tloss=0.0354\n", + "Train Epoch: 1 [28800/60000 (48%)]\tloss=0.1335\n", + "Train Epoch: 1 [29440/60000 (49%)]\tloss=0.2777\n", + "Train Epoch: 1 [30080/60000 (50%)]\tloss=0.0940\n", + "Train Epoch: 1 [30720/60000 (51%)]\tloss=0.1276\n", + "Train Epoch: 1 [31360/60000 (52%)]\tloss=0.2465\n", + "Train Epoch: 1 [32000/60000 (53%)]\tloss=0.3388\n", + "Train Epoch: 1 [32640/60000 (54%)]\tloss=0.1522\n", + "Train Epoch: 1 [33280/60000 (55%)]\tloss=0.0904\n", + "Train Epoch: 1 [33920/60000 (57%)]\tloss=0.1449\n", + "Train Epoch: 1 [34560/60000 (58%)]\tloss=0.1985\n", + "Train Epoch: 1 [35200/60000 (59%)]\tloss=0.2195\n", + "Train Epoch: 1 [35840/60000 (60%)]\tloss=0.0631\n", + "Train Epoch: 1 [36480/60000 (61%)]\tloss=0.1359\n", + "Train Epoch: 1 [37120/60000 (62%)]\tloss=0.1165\n", + "Train Epoch: 1 [37760/60000 (63%)]\tloss=0.2356\n", + "Train Epoch: 1 [38400/60000 (64%)]\tloss=0.0635\n", + "Train Epoch: 1 [39040/60000 (65%)]\tloss=0.1068\n", + "Train Epoch: 1 [39680/60000 (66%)]\tloss=0.1600\n", + "Train Epoch: 1 [40320/60000 (67%)]\tloss=0.1089\n", + "Train Epoch: 1 [40960/60000 (68%)]\tloss=0.1781\n", + "Train Epoch: 1 [41600/60000 (69%)]\tloss=0.2301\n", + "Train Epoch: 1 [42240/60000 (70%)]\tloss=0.0741\n", + "Train Epoch: 1 [42880/60000 (71%)]\tloss=0.1549\n", + "Train Epoch: 1 [43520/60000 (72%)]\tloss=0.2785\n", + "Train Epoch: 1 [44160/60000 (74%)]\tloss=0.1427\n", + "Train Epoch: 1 [44800/60000 (75%)]\tloss=0.1164\n", + "Train Epoch: 1 [45440/60000 (76%)]\tloss=0.1217\n", + "Train Epoch: 1 [46080/60000 (77%)]\tloss=0.0779\n", + "Train Epoch: 1 [46720/60000 (78%)]\tloss=0.1949\n", + "Train Epoch: 1 [47360/60000 (79%)]\tloss=0.0687\n", + "Train Epoch: 1 [48000/60000 (80%)]\tloss=0.2096\n", + "Train Epoch: 1 [48640/60000 (81%)]\tloss=0.1387\n", + "Train Epoch: 1 [49280/60000 (82%)]\tloss=0.0942\n", + "Train Epoch: 1 [49920/60000 (83%)]\tloss=0.1073\n", + "Train Epoch: 1 [50560/60000 (84%)]\tloss=0.1198\n", + "Train Epoch: 1 [51200/60000 (85%)]\tloss=0.1442\n", + "Train Epoch: 1 [51840/60000 (86%)]\tloss=0.0656\n", + "Train Epoch: 1 [52480/60000 (87%)]\tloss=0.0242\n", + "Train Epoch: 1 [53120/60000 (88%)]\tloss=0.2644\n", + "Train Epoch: 1 [53760/60000 (90%)]\tloss=0.0932\n", + "Train Epoch: 1 [54400/60000 (91%)]\tloss=0.1294\n", + "Train Epoch: 1 [55040/60000 (92%)]\tloss=0.1901\n", + "Train Epoch: 1 [55680/60000 (93%)]\tloss=0.0341\n", + "Train Epoch: 1 [56320/60000 (94%)]\tloss=0.0358\n", "Train Epoch: 1 [56960/60000 (95%)]\tloss=0.0770\n", - "Train Epoch: 1 [57600/60000 (96%)]\tloss=0.1175\n", - "Train Epoch: 1 [58240/60000 (97%)]\tloss=0.1919\n", - "Train Epoch: 1 [58880/60000 (98%)]\tloss=0.2053\n", - "Train Epoch: 1 [59520/60000 (99%)]\tloss=0.0639\n", + "Train Epoch: 1 [57600/60000 (96%)]\tloss=0.1181\n", + "Train Epoch: 1 [58240/60000 (97%)]\tloss=0.1945\n", + "Train Epoch: 1 [58880/60000 (98%)]\tloss=0.2064\n", + "Train Epoch: 1 [59520/60000 (99%)]\tloss=0.0642\n", "\n", - "accuracy=0.9664\n", + "accuracy=0.9667\n", "\n", "\n" ] @@ -476,7 +476,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -489,10 +489,10 @@ " 'details': {'name': 'pytorch-dist-mnist-gloo',\n", " 'group': 'kubeflow.org',\n", " 'kind': 'pytorchjobs',\n", - " 'uid': '47f9dc9a-36af-11ea-beb5-00163e01f7d2'}}" + " 'uid': '583b9831-8b6d-44e1-86c1-9a171c472fe3'}}" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -500,13 +500,6 @@ "source": [ "pytorchjob_client.delete('pytorch-dist-mnist-gloo')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -525,7 +518,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/sdk/python/examples/kubeflow-tfjob-sdk.ipynb b/sdk/python/examples/kubeflow-tfjob-sdk.ipynb index 6d4e8a857b..0333447be0 100644 --- a/sdk/python/examples/kubeflow-tfjob-sdk.ipynb +++ b/sdk/python/examples/kubeflow-tfjob-sdk.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -33,7 +33,7 @@ "from kubeflow.training import V1TFJob\n", "from kubeflow.training import V1TFJobSpec\n", "from kubeflow.training import V1RunPolicy\n", - "from kubeflow.training.api.tf_job_client import TFJobClient" + "from kubeflow.training import TFJobClient" ] }, { @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -143,7 +143,7 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1',\n", " 'kind': 'TFJob',\n", - " 'metadata': {'creationTimestamp': '2021-09-22T21:27:46Z',\n", + " 'metadata': {'creationTimestamp': '2021-10-02T19:02:08Z',\n", " 'generation': 1,\n", " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", " 'fieldsType': 'FieldsV1',\n", @@ -164,11 +164,11 @@ " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n", " 'manager': 'OpenAPI-Generator',\n", " 'operation': 'Update',\n", - " 'time': '2021-09-22T21:27:46Z'}],\n", + " 'time': '2021-10-02T19:02:08Z'}],\n", " 'name': 'mnist',\n", " 'namespace': 'default',\n", - " 'resourceVersion': '594847',\n", - " 'uid': '13e06ee2-3bb0-42f5-832b-f0b4fb2f5874'},\n", + " 'resourceVersion': '6042',\n", + " 'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'},\n", " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n", " 'tfReplicaSpecs': {'Chief': {'replicas': 1,\n", " 'restartPolicy': 'Never',\n", @@ -199,7 +199,7 @@ " 'name': 'tensorflow'}]}}}}}}" ] }, - "execution_count": 19, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -218,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -226,7 +226,7 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1',\n", " 'kind': 'TFJob',\n", - " 'metadata': {'creationTimestamp': '2021-09-22T21:27:46Z',\n", + " 'metadata': {'creationTimestamp': '2021-10-02T19:02:08Z',\n", " 'generation': 1,\n", " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", " 'fieldsType': 'FieldsV1',\n", @@ -247,11 +247,23 @@ " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n", " 'manager': 'OpenAPI-Generator',\n", " 'operation': 'Update',\n", - " 'time': '2021-09-22T21:27:46Z'}],\n", + " 'time': '2021-10-02T19:02:08Z'},\n", + " {'apiVersion': 'kubeflow.org/v1',\n", + " 'fieldsType': 'FieldsV1',\n", + " 'fieldsV1': {'f:status': {'.': {},\n", + " 'f:conditions': {},\n", + " 'f:replicaStatuses': {'.': {},\n", + " 'f:Chief': {'.': {}, 'f:active': {}},\n", + " 'f:PS': {'.': {}, 'f:active': {}},\n", + " 'f:Worker': {}},\n", + " 'f:startTime': {}}},\n", + " 'manager': 'manager',\n", + " 'operation': 'Update',\n", + " 'time': '2021-10-02T19:02:10Z'}],\n", " 'name': 'mnist',\n", " 'namespace': 'default',\n", - " 'resourceVersion': '594847',\n", - " 'uid': '13e06ee2-3bb0-42f5-832b-f0b4fb2f5874'},\n", + " 'resourceVersion': '6105',\n", + " 'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'},\n", " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n", " 'tfReplicaSpecs': {'Chief': {'replicas': 1,\n", " 'restartPolicy': 'Never',\n", @@ -279,10 +291,26 @@ " '--learning_rate=0.01',\n", " '--batch_size=150'],\n", " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", - " 'name': 'tensorflow'}]}}}}}}" + " 'name': 'tensorflow'}]}}}}},\n", + " 'status': {'conditions': [{'lastTransitionTime': '2021-10-02T19:02:08Z',\n", + " 'lastUpdateTime': '2021-10-02T19:02:08Z',\n", + " 'message': 'TFJob mnist is created.',\n", + " 'reason': 'TFJobCreated',\n", + " 'status': 'True',\n", + " 'type': 'Created'},\n", + " {'lastTransitionTime': '2021-10-02T19:02:10Z',\n", + " 'lastUpdateTime': '2021-10-02T19:02:10Z',\n", + " 'message': 'TFJob default/mnist is running.',\n", + " 'reason': 'TFJobRunning',\n", + " 'status': 'True',\n", + " 'type': 'Running'}],\n", + " 'replicaStatuses': {'Chief': {'active': 1},\n", + " 'PS': {'active': 1},\n", + " 'Worker': {}},\n", + " 'startTime': '2021-10-02T19:02:09Z'}}" ] }, - "execution_count": 20, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -300,16 +328,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "''" + "'Running'" ] }, - "execution_count": 21, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -327,20 +355,18 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "mnist \n", - "mnist Created 2021-09-22T21:27:46Z \n", - "mnist Created 2021-09-22T21:27:46Z \n", - "mnist Created 2021-09-22T21:27:46Z \n", - "mnist Running 2021-09-22T21:27:51Z \n", - "mnist Running 2021-09-22T21:27:51Z \n", - "mnist Succeeded 2021-09-22T21:29:38Z \n" + "NAME STATE TIME \n", + "mnist Running 2021-10-02T19:02:10Z \n", + "mnist Running 2021-10-02T19:02:10Z \n", + "mnist Running 2021-10-02T19:02:10Z \n", + "mnist Succeeded 2021-10-02T19:04:10Z \n" ] } ], @@ -357,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "metadata": { "scrolled": true }, @@ -368,7 +394,7 @@ "True" ] }, - "execution_count": 23, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -386,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -412,7 +438,7 @@ "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: __init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n", - "2020-01-10 06:05:42.333504: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n", + "2021-10-02 19:02:25.434889: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n", "Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n", "Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz\n", "Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n", @@ -421,115 +447,115 @@ "Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz\n", "Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n", "Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz\n", - "Accuracy at step 0: 0.147\n", - "Accuracy at step 10: 0.7369\n", - "Accuracy at step 20: 0.8666\n", - "Accuracy at step 30: 0.9027\n", - "Accuracy at step 40: 0.9117\n", - "Accuracy at step 50: 0.9221\n", - "Accuracy at step 60: 0.9214\n", - "Accuracy at step 70: 0.9266\n", - "Accuracy at step 80: 0.934\n", - "Accuracy at step 90: 0.9322\n", + "Accuracy at step 0: 0.1348\n", + "Accuracy at step 10: 0.787\n", + "Accuracy at step 20: 0.8648\n", + "Accuracy at step 30: 0.9056\n", + "Accuracy at step 40: 0.9162\n", + "Accuracy at step 50: 0.9237\n", + "Accuracy at step 60: 0.926\n", + "Accuracy at step 70: 0.9365\n", + "Accuracy at step 80: 0.9371\n", + "Accuracy at step 90: 0.9352\n", "Adding run metadata for 99\n", - "Accuracy at step 100: 0.9389\n", - "Accuracy at step 110: 0.9356\n", - "Accuracy at step 120: 0.9429\n", - "Accuracy at step 130: 0.9481\n", - "Accuracy at step 140: 0.9526\n", - "Accuracy at step 150: 0.9476\n", - "Accuracy at step 160: 0.9509\n", - "Accuracy at step 170: 0.9483\n", - "Accuracy at step 180: 0.9491\n", - "Accuracy at step 190: 0.9533\n", + "Accuracy at step 100: 0.9439\n", + "Accuracy at step 110: 0.9434\n", + "Accuracy at step 120: 0.9382\n", + "Accuracy at step 130: 0.9444\n", + "Accuracy at step 140: 0.9487\n", + "Accuracy at step 150: 0.9462\n", + "Accuracy at step 160: 0.9454\n", + "Accuracy at step 170: 0.9426\n", + "Accuracy at step 180: 0.9473\n", + "Accuracy at step 190: 0.9536\n", "Adding run metadata for 199\n", - "Accuracy at step 200: 0.9536\n", - "Accuracy at step 210: 0.9456\n", - "Accuracy at step 220: 0.9542\n", - "Accuracy at step 230: 0.957\n", - "Accuracy at step 240: 0.9548\n", - "Accuracy at step 250: 0.951\n", - "Accuracy at step 260: 0.9529\n", - "Accuracy at step 270: 0.9567\n", - "Accuracy at step 280: 0.9558\n", - "Accuracy at step 290: 0.9573\n", + "Accuracy at step 200: 0.9559\n", + "Accuracy at step 210: 0.9519\n", + "Accuracy at step 220: 0.9485\n", + "Accuracy at step 230: 0.95\n", + "Accuracy at step 240: 0.9563\n", + "Accuracy at step 250: 0.9575\n", + "Accuracy at step 260: 0.9591\n", + "Accuracy at step 270: 0.9589\n", + "Accuracy at step 280: 0.957\n", + "Accuracy at step 290: 0.9581\n", "Adding run metadata for 299\n", - "Accuracy at step 300: 0.9496\n", - "Accuracy at step 310: 0.9596\n", - "Accuracy at step 320: 0.9551\n", - "Accuracy at step 330: 0.9539\n", - "Accuracy at step 340: 0.9639\n", - "Accuracy at step 350: 0.9616\n", - "Accuracy at step 360: 0.9574\n", - "Accuracy at step 370: 0.9579\n", - "Accuracy at step 380: 0.9644\n", - "Accuracy at step 390: 0.965\n", + "Accuracy at step 300: 0.9606\n", + "Accuracy at step 310: 0.9585\n", + "Accuracy at step 320: 0.9593\n", + "Accuracy at step 330: 0.958\n", + "Accuracy at step 340: 0.9537\n", + "Accuracy at step 350: 0.961\n", + "Accuracy at step 360: 0.9615\n", + "Accuracy at step 370: 0.962\n", + "Accuracy at step 380: 0.956\n", + "Accuracy at step 390: 0.9591\n", "Adding run metadata for 399\n", - "Accuracy at step 400: 0.9637\n", - "Accuracy at step 410: 0.9655\n", - "Accuracy at step 420: 0.9654\n", - "Accuracy at step 430: 0.9668\n", - "Accuracy at step 440: 0.9698\n", - "Accuracy at step 450: 0.9649\n", - "Accuracy at step 460: 0.965\n", - "Accuracy at step 470: 0.9617\n", - "Accuracy at step 480: 0.9674\n", - "Accuracy at step 490: 0.9686\n", + "Accuracy at step 400: 0.9554\n", + "Accuracy at step 410: 0.9604\n", + "Accuracy at step 420: 0.9638\n", + "Accuracy at step 430: 0.9614\n", + "Accuracy at step 440: 0.9645\n", + "Accuracy at step 450: 0.9683\n", + "Accuracy at step 460: 0.9591\n", + "Accuracy at step 470: 0.9645\n", + "Accuracy at step 480: 0.9557\n", + "Accuracy at step 490: 0.9647\n", "Adding run metadata for 499\n", - "Accuracy at step 500: 0.9684\n", - "Accuracy at step 510: 0.965\n", - "Accuracy at step 520: 0.9665\n", - "Accuracy at step 530: 0.9682\n", - "Accuracy at step 540: 0.9607\n", - "Accuracy at step 550: 0.967\n", - "Accuracy at step 560: 0.9641\n", - "Accuracy at step 570: 0.9706\n", - "Accuracy at step 580: 0.9675\n", - "Accuracy at step 590: 0.9691\n", + "Accuracy at step 500: 0.9611\n", + "Accuracy at step 510: 0.9623\n", + "Accuracy at step 520: 0.9606\n", + "Accuracy at step 530: 0.9661\n", + "Accuracy at step 540: 0.9684\n", + "Accuracy at step 550: 0.9629\n", + "Accuracy at step 560: 0.9605\n", + "Accuracy at step 570: 0.9672\n", + "Accuracy at step 580: 0.9712\n", + "Accuracy at step 590: 0.9649\n", "Adding run metadata for 599\n", - "Accuracy at step 600: 0.9668\n", - "Accuracy at step 610: 0.964\n", - "Accuracy at step 620: 0.9665\n", - "Accuracy at step 630: 0.9713\n", - "Accuracy at step 640: 0.9673\n", - "Accuracy at step 650: 0.9635\n", - "Accuracy at step 660: 0.9643\n", - "Accuracy at step 670: 0.9632\n", - "Accuracy at step 680: 0.9602\n", - "Accuracy at step 690: 0.9621\n", + "Accuracy at step 600: 0.9679\n", + "Accuracy at step 610: 0.9689\n", + "Accuracy at step 620: 0.9664\n", + "Accuracy at step 630: 0.9667\n", + "Accuracy at step 640: 0.9644\n", + "Accuracy at step 650: 0.9721\n", + "Accuracy at step 660: 0.965\n", + "Accuracy at step 670: 0.9646\n", + "Accuracy at step 680: 0.9661\n", + "Accuracy at step 690: 0.9623\n", "Adding run metadata for 699\n", - "Accuracy at step 700: 0.9592\n", - "Accuracy at step 710: 0.9618\n", - "Accuracy at step 720: 0.965\n", - "Accuracy at step 730: 0.9658\n", - "Accuracy at step 740: 0.9611\n", - "Accuracy at step 750: 0.961\n", - "Accuracy at step 760: 0.9677\n", - "Accuracy at step 770: 0.9651\n", + "Accuracy at step 700: 0.9581\n", + "Accuracy at step 710: 0.9649\n", + "Accuracy at step 720: 0.9633\n", + "Accuracy at step 730: 0.9659\n", + "Accuracy at step 740: 0.9607\n", + "Accuracy at step 750: 0.9676\n", + "Accuracy at step 760: 0.9697\n", + "Accuracy at step 770: 0.9662\n", "Accuracy at step 780: 0.9659\n", - "Accuracy at step 790: 0.9655\n", + "Accuracy at step 790: 0.9633\n", "Adding run metadata for 799\n", - "Accuracy at step 800: 0.9637\n", - "Accuracy at step 810: 0.9662\n", - "Accuracy at step 820: 0.9687\n", - "Accuracy at step 830: 0.9705\n", - "Accuracy at step 840: 0.9694\n", - "Accuracy at step 850: 0.9712\n", - "Accuracy at step 860: 0.9684\n", - "Accuracy at step 870: 0.9698\n", - "Accuracy at step 880: 0.9723\n", - "Accuracy at step 890: 0.9699\n", + "Accuracy at step 800: 0.9638\n", + "Accuracy at step 810: 0.9592\n", + "Accuracy at step 820: 0.9642\n", + "Accuracy at step 830: 0.9682\n", + "Accuracy at step 840: 0.9695\n", + "Accuracy at step 850: 0.9657\n", + "Accuracy at step 860: 0.9696\n", + "Accuracy at step 870: 0.9695\n", + "Accuracy at step 880: 0.9711\n", + "Accuracy at step 890: 0.9687\n", "Adding run metadata for 899\n", - "Accuracy at step 900: 0.9699\n", - "Accuracy at step 910: 0.9681\n", - "Accuracy at step 920: 0.97\n", - "Accuracy at step 930: 0.9719\n", - "Accuracy at step 940: 0.9724\n", - "Accuracy at step 950: 0.9673\n", - "Accuracy at step 960: 0.9684\n", - "Accuracy at step 970: 0.9693\n", - "Accuracy at step 980: 0.9712\n", - "Accuracy at step 990: 0.9719\n", + "Accuracy at step 900: 0.9689\n", + "Accuracy at step 910: 0.9699\n", + "Accuracy at step 920: 0.9677\n", + "Accuracy at step 930: 0.9689\n", + "Accuracy at step 940: 0.9702\n", + "Accuracy at step 950: 0.9716\n", + "Accuracy at step 960: 0.9692\n", + "Accuracy at step 970: 0.967\n", + "Accuracy at step 980: 0.9687\n", + "Accuracy at step 990: 0.9665\n", "Adding run metadata for 999\n", "\n" ] @@ -548,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -561,10 +587,10 @@ " 'details': {'name': 'mnist',\n", " 'group': 'kubeflow.org',\n", " 'kind': 'tfjobs',\n", - " 'uid': '13e06ee2-3bb0-42f5-832b-f0b4fb2f5874'}}" + " 'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'}}" ] }, - "execution_count": 25, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } diff --git a/sdk/python/kubeflow/training/__init__.py b/sdk/python/kubeflow/training/__init__.py index 027069720d..b0044fe2d2 100644 --- a/sdk/python/kubeflow/training/__init__.py +++ b/sdk/python/kubeflow/training/__init__.py @@ -46,3 +46,5 @@ from kubeflow.training.models.v1_xg_boost_job_list import V1XGBoostJobList from kubeflow.training.models.v1_xg_boost_job_spec import V1XGBoostJobSpec +from kubeflow.training.api.tf_job_client import TFJobClient +from kubeflow.training.api.py_torch_job_client import PyTorchJobClient diff --git a/sdk/python/kubeflow/training/api/py_torch_job_client.py b/sdk/python/kubeflow/training/api/py_torch_job_client.py index 231a903412..7d922b5f71 100644 --- a/sdk/python/kubeflow/training/api/py_torch_job_client.py +++ b/sdk/python/kubeflow/training/api/py_torch_job_client.py @@ -1,4 +1,4 @@ -# Copyright 2019 The Kubeflow Authors. +# Copyright 2021 The Kubeflow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -296,7 +296,7 @@ def is_job_running(self, name, namespace=None): :return: True or False """ pytorchjob_status = self.get_job_status(name, namespace=namespace) - return pytorchjob_status.lower() == "running" + return pytorchjob_status == constants.JOB_STATUS_RUNNING def is_job_succeeded(self, name, namespace=None): """Returns true if the PyTorchJob succeeded; false otherwise. @@ -306,7 +306,7 @@ def is_job_succeeded(self, name, namespace=None): :return: True or False """ pytorchjob_status = self.get_job_status(name, namespace=namespace) - return pytorchjob_status.lower() == "succeeded" + return pytorchjob_status == constants.JOB_STATUS_SUCCEEDED def get_pod_names(self, name, namespace=None, master=False, # pylint: disable=inconsistent-return-statements replica_type=None, replica_index=None): diff --git a/sdk/python/kubeflow/training/api/py_torch_job_watch.py b/sdk/python/kubeflow/training/api/py_torch_job_watch.py index dcbf1469d3..d19cd08413 100644 --- a/sdk/python/kubeflow/training/api/py_torch_job_watch.py +++ b/sdk/python/kubeflow/training/api/py_torch_job_watch.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Kubeflow Authors. +# Copyright 2021 The Kubeflow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -56,5 +56,5 @@ def watch(name=None, namespace=None, timeout_seconds=600): tbl(pytorchjob_name, status, update_time) if name == pytorchjob_name: - if status == 'Succeeded' or status == 'Failed': + if status in [constants.JOB_STATUS_SUCCEEDED, constants.JOB_STATUS_FAILED]: break diff --git a/sdk/python/kubeflow/training/api/tf_job_client.py b/sdk/python/kubeflow/training/api/tf_job_client.py index 731d16cbe4..6e984795df 100644 --- a/sdk/python/kubeflow/training/api/tf_job_client.py +++ b/sdk/python/kubeflow/training/api/tf_job_client.py @@ -1,4 +1,4 @@ -# Copyright 2019 The Kubeflow Authors. +# Copyright 2021 The Kubeflow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/sdk/python/kubeflow/training/api/tf_job_watch.py b/sdk/python/kubeflow/training/api/tf_job_watch.py index 36c1fc56f4..60971ab03a 100644 --- a/sdk/python/kubeflow/training/api/tf_job_watch.py +++ b/sdk/python/kubeflow/training/api/tf_job_watch.py @@ -56,5 +56,5 @@ def watch(name=None, namespace=None, timeout_seconds=600): tbl(tfjob_name, status, update_time) if name == tfjob_name: - if status == 'Succeeded' or status == 'Failed': + if status in [constants.JOB_STATUS_SUCCEEDED, constants.JOB_STATUS_FAILED]: break diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py index 9b1226abd2..5968d121e6 100644 --- a/sdk/python/kubeflow/training/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -33,6 +33,10 @@ JOB_INDEX_LABEL = 'replica-index' JOB_ROLE_LABEL = 'job-role' +JOB_STATUS_SUCCEEDED = 'Succeeded' +JOB_STATUS_FAILED = 'Failed' +JOB_STATUS_RUNNING = 'Running' + # PyTorchJob K8S constants PYTORCHJOB_GROUP = 'kubeflow.org' PYTORCHJOB_KIND = 'PyTorchJob' diff --git a/sdk/python/kubeflow/training/utils/utils.py b/sdk/python/kubeflow/training/utils/utils.py index 24ace9f15b..e11f547e6d 100644 --- a/sdk/python/kubeflow/training/utils/utils.py +++ b/sdk/python/kubeflow/training/utils/utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 kubeflow.org. +# Copyright 2021 kubeflow.org. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/sdk/python/setup.py b/sdk/python/setup.py index d2420133a8..415b5eb9e5 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -41,8 +41,6 @@ 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', diff --git a/sdk/python/test/e2e/test_e2e_pytorchjob.py b/sdk/python/test/e2e/test_e2e_pytorchjob.py index e5f6170c1c..1fbc53c9ad 100644 --- a/sdk/python/test/e2e/test_e2e_pytorchjob.py +++ b/sdk/python/test/e2e/test_e2e_pytorchjob.py @@ -19,11 +19,11 @@ from kubernetes.client import V1PodSpec from kubernetes.client import V1Container -from kubeflow.training.api.py_torch_job_client import PyTorchJobClient -from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec -from kubeflow.training.models.v1_py_torch_job import V1PyTorchJob -from kubeflow.training.models.v1_py_torch_job_spec import V1PyTorchJobSpec -from kubeflow.training.models.v1_run_policy import V1RunPolicy +from kubeflow.training import PyTorchJobClient +from kubeflow.training import V1ReplicaSpec +from kubeflow.training import V1PyTorchJob +from kubeflow.training import V1PyTorchJobSpec +from kubeflow.training import V1RunPolicy PYTORCH_CLIENT = PyTorchJobClient(config_file=os.getenv('KUBECONFIG', '~/.kube/config')) SDK_TEST_NAMESPACE = 'default' diff --git a/sdk/python/test/e2e/test_e2e_tfjob.py b/sdk/python/test/e2e/test_e2e_tfjob.py index efade31bab..33fc057a1f 100644 --- a/sdk/python/test/e2e/test_e2e_tfjob.py +++ b/sdk/python/test/e2e/test_e2e_tfjob.py @@ -19,11 +19,11 @@ from kubernetes.client import V1PodSpec from kubernetes.client import V1Container -from kubeflow.training.api.tf_job_client import TFJobClient -from kubeflow.training.models.v1_replica_spec import V1ReplicaSpec -from kubeflow.training.models.v1_run_policy import V1RunPolicy -from kubeflow.training.models.v1_tf_job import V1TFJob -from kubeflow.training.models.v1_tf_job_spec import V1TFJobSpec +from kubeflow.training import TFJobClient +from kubeflow.training import V1ReplicaSpec +from kubeflow.training import V1RunPolicy +from kubeflow.training import V1TFJob +from kubeflow.training import V1TFJobSpec TFJOB_CLIENT = TFJobClient(config_file=os.getenv('KUBECONFIG')) SDK_TEST_NAMESPACE = 'kubeflow'