Skip to content

Commit

Permalink
{BatchAI} Migrate BatchAI module to track 2 SDK, azure-mgmt-batchai==…
Browse files Browse the repository at this point in the history
…7.0.0b1 (#18681)
  • Loading branch information
BigCat20196 authored Jul 13, 2021
1 parent a8abfbb commit 5b09302
Show file tree
Hide file tree
Showing 14 changed files with 362 additions and 617 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def batchai_client_factory(cli_ctx, _=None):
from azure.mgmt.batchai import BatchAIManagementClient
from azure.mgmt.batchai import BatchAI as BatchAIManagementClient
from azure.cli.core.commands.client_factory import get_mgmt_service_client
return get_mgmt_service_client(cli_ctx, BatchAIManagementClient)

Expand Down
42 changes: 21 additions & 21 deletions src/azure-cli/azure/cli/command_modules/batchai/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,17 +213,17 @@
-u $USER -k ~/.ssh/id_rsa.pub
"""

helps['batchai file-server delete'] = """
type: command
short-summary: Delete a file server.
examples:
- name: Delete file server and wait for deletion to be completed.
text: az batchai file-server delete -g MyResourceGroup -w MyWorkspace -n MyNFS
- name: Delete file server without asking for confirmation (for non-interactive scenarios).
text: az batchai file-server delete -g MyResourceGroup -w MyWorkspace -n MyNFS -y
- name: Request file server deletion without waiting for deletion to be completed.
text: az batchai file-server delete -g MyResourceGroup -w MyWorkspace -n MyNFS --no-wait
"""
# helps['batchai file-server delete'] = """
# # type: command
# short-summary: Delete a file server.
# examples:
# - name: Delete file server and wait for deletion to be completed.
# text: az batchai file-server delete -g MyResourceGroup -w MyWorkspace -n MyNFS
# - name: Delete file server without asking for confirmation (for non-interactive scenarios).
# text: az batchai file-server delete -g MyResourceGroup -w MyWorkspace -n MyNFS -y
# - name: Request file server deletion without waiting for deletion to be completed.
# text: az batchai file-server delete -g MyResourceGroup -w MyWorkspace -n MyNFS --no-wait
# """

helps['batchai file-server list'] = """
type: command
Expand All @@ -233,16 +233,16 @@
text: az batchai file-server list -g MyResourceGroup -w MyWorkspace -o table
"""

helps['batchai file-server show'] = """
type: command
short-summary: Show information about a file server.
examples:
- name: Show full information about a file server.
text: az batchai file-server show -g MyResourceGroup -w MyWorkspace -n MyNFS
- name: Show file server summary.
text: az batchai file-server show -g MyResourceGroup -w MyWorkspace -n MyNFS -o table
"""
# helps['batchai file-server show'] = """
# # type: command
# short-summary: Show information about a file server.
# examples:
# - name: Show full information about a file server.
# text: az batchai file-server show -g MyResourceGroup -w MyWorkspace -n MyNFS
# - name: Show file server summary.
# text: az batchai file-server show -g MyResourceGroup -w MyWorkspace -n MyNFS -o table
#
# """

helps['batchai job'] = """
type: group
Expand Down
18 changes: 9 additions & 9 deletions src/azure-cli/azure/cli/command_modules/batchai/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
job_show_table_format,
file_list_table_format,
file_server_list_table_format,
file_server_show_table_format,
# file_server_show_table_format,
remote_login_table_format,
node_setup_files_list_table_format,
usage_table_format,
Expand Down Expand Up @@ -64,11 +64,11 @@ def load_command_table(self, _):
g.custom_command('create', 'create_workspace')
g.show_command('show', 'get', table_transformer=workspace_show_table_format)
g.custom_command('list', 'list_workspaces', table_transformer=workspace_list_table_format)
g.command('delete', 'delete', supports_no_wait=True, confirmation=True)
g.command('delete', 'begin_delete', supports_no_wait=True, confirmation=True)

with self.command_group('batchai cluster', batchai_cluster_sdk, client_factory=cluster_client_factory) as g:
g.custom_command('create', 'create_cluster', client_factory=batchai_client_factory)
g.command('delete', 'delete', supports_no_wait=True, confirmation=True)
g.command('delete', 'begin_delete', supports_no_wait=True, confirmation=True)
g.show_command('show', 'get', table_transformer=cluster_show_table_format)
g.custom_command('list', 'list_clusters', table_transformer=cluster_list_table_format)
g.custom_command('resize', 'resize_cluster')
Expand All @@ -82,15 +82,15 @@ def load_command_table(self, _):
g.custom_command('list', 'list_node_setup_files', table_transformer=node_setup_files_list_table_format)

with self.command_group('batchai experiment', batchai_experiment_sdk, client_factory=experiment_client_factory) as g:
g.command('create', 'create')
g.command('create', 'begin_create')
g.show_command('show', 'get', table_transformer=experiment_show_table_format)
g.command('list', 'list_by_workspace', table_transformer=experiment_list_table_format)
g.command('delete', 'delete', supports_no_wait=True, confirmation=True)
g.command('delete', 'begin_delete', supports_no_wait=True, confirmation=True)

with self.command_group('batchai job', batchai_job_sdk, client_factory=job_client_factory) as g:
g.custom_command('create', 'create_job', client_factory=batchai_client_factory)
g.command('delete', 'delete', supports_no_wait=True, confirmation=True)
g.command('terminate', 'terminate', supports_no_wait=True, confirmation=True)
g.command('delete', 'begin_delete', supports_no_wait=True, confirmation=True)
g.command('terminate', 'begin_terminate', supports_no_wait=True, confirmation=True)
g.show_command('show', 'get', table_transformer=job_show_table_format)
g.command('list', 'list_by_experiment', table_transformer=job_list_table_format)
g.custom_command('wait', 'wait_for_job_completion', client_factory=batchai_client_factory)
Expand All @@ -105,8 +105,8 @@ def load_command_table(self, _):

with self.command_group('batchai file-server', batchai_server_sdk, client_factory=file_server_client_factory) as g:
g.custom_command('create', 'create_file_server', no_wait_param='raw', client_factory=batchai_client_factory)
g.command('delete', 'delete', supports_no_wait=True, confirmation=True)
g.show_command('show', 'get', table_transformer=file_server_show_table_format)
# g.command('delete', 'delete', supports_no_wait=True, confirmation=True) # In Track2 Delete command missing
# g.show_command('show', 'get', table_transformer=file_server_show_table_format) # ditte Get command missing
g.command('list', 'list_by_workspace', table_transformer=file_server_list_table_format)

with self.command_group('batchai', batchai_usage_sdk, client_factory=usage_client_factory, deprecate_info=self.deprecate(hide=True)) as g:
Expand Down
27 changes: 15 additions & 12 deletions src/azure-cli/azure/cli/command_modules/batchai/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from azure.cli.core.util import get_default_admin_username
from azure.cli.core.commands.client_factory import get_mgmt_service_client
from azure.cli.core.profiles import ResourceType, get_sdk
from azure.core.exceptions import ResourceNotFoundError
import azure.mgmt.batchai.models as models

# Environment variables for specifying azure storage account and key. We want the user to make explicit
Expand Down Expand Up @@ -106,7 +107,7 @@ def _ensure_resource_not_exist(client, resource_group, workspace, name):
client.get(resource_group, workspace, name)
raise CLIError('"{0}" already exists in "{1}" resource group under {2} resource group.'.format(
name, resource_group, workspace))
except CloudError as e:
except ResourceNotFoundError as e:
if e.status_code != 404:
raise

Expand All @@ -115,7 +116,7 @@ def _ensure_job_not_exist(client, resource_group, workspace, experiment, name):
try:
client.get(resource_group, workspace, experiment, name)
raise CLIError('A job with given name, experiment, workspace and resource group already exists.')
except CloudError as e:
except ResourceNotFoundError as e:
if e.status_code != 404:
raise

Expand Down Expand Up @@ -615,11 +616,12 @@ def list_workspaces(client, resource_group=None):

def create_workspace(cmd, client, resource_group, workspace_name, location=None):
location = location or _get_resource_group_location(cmd.cli_ctx, resource_group)
return client.create(resource_group, workspace_name, location).result()
parameters = models.WorkspaceCreateParameters(location=location)
return client.begin_create(resource_group, workspace_name, parameters)


def create_experiment(client, resource_group, workspace_name, experiment_name):
return client.create(resource_group, workspace_name, experiment_name).result()
return client.begin_create(resource_group, workspace_name, experiment_name)


def _get_effective_resource_parameters(name_or_id, resource_group, workspace):
Expand Down Expand Up @@ -688,16 +690,17 @@ def create_cluster(cmd, client, # pylint: disable=too-many-locals
params.subnet = models.ResourceId(id=subnet)
if setup_task:
params = _add_setup_task(setup_task, setup_task_output, params)
return client.clusters.create(resource_group, workspace_name, cluster_name, params)
return client.clusters.begin_create(resource_group, workspace_name, cluster_name, params)


def list_clusters(client, resource_group, workspace_name):
return list(client.list_by_workspace(resource_group, workspace_name))


def resize_cluster(client, resource_group, workspace_name, cluster_name, target):
return client.update(resource_group, workspace_name, cluster_name, scale_settings=models.ScaleSettings(
parameters = models.ClusterUpdateParameters(scale_settings=models.ScaleSettings(
manual=models.ManualScaleSettings(target_node_count=target)))
return client.update(resource_group, workspace_name, cluster_name, parameters=parameters)


def set_cluster_auto_scale_parameters(client, resource_group, workspace_name, cluster_name, min_nodes, max_nodes):
Expand Down Expand Up @@ -864,7 +867,7 @@ def create_job(cmd, # pylint: disable=too-many-locals
mount_volumes = _add_azure_container_to_mount_volumes(cmd.cli_ctx, mount_volumes, container_name,
container_mount_path, account_name, account_key)
params.mount_volumes = mount_volumes
return client.jobs.create(resource_group, workspace_name, experiment_name, job_name, params)
return client.jobs.begin_create(resource_group, workspace_name, experiment_name, job_name, params)


def list_files(client, resource_group, workspace_name, experiment_name, job_name,
Expand Down Expand Up @@ -899,7 +902,7 @@ def tail_file(client, resource_group, workspace_name, experiment_name, job_name,
break
if url is None:
job = client.get(resource_group, workspace_name, experiment_name, job_name)
if job.execution_state in [models.ExecutionState.succeeded, models.ExecutionState.failed]:
if job.execution_state in [models.ExecutionState.SUCCEEDED, models.ExecutionState.FAILED]:
break
if not reported_absence_of_file:
logger.warning('The file "%s" not found. Waiting for the job to generate it.', file_name)
Expand All @@ -916,7 +919,7 @@ def tail_file(client, resource_group, workspace_name, experiment_name, job_name,
downloaded += len(r.content)
print(r.content.decode(), end='')
job = client.get(resource_group, workspace_name, experiment_name, job_name)
if job.execution_state in [models.ExecutionState.succeeded, models.ExecutionState.failed]:
if job.execution_state in [models.ExecutionState.SUCCEEDED, models.ExecutionState.FAILED]:
break
time.sleep(1)

Expand All @@ -934,11 +937,11 @@ def wait_for_job_completion(client, resource_group, workspace_name, experiment_n
if job.execution_state != last_state:
logger.warning('Job state: %s', job.execution_state)
last_state = job.execution_state
if job.execution_state == models.ExecutionState.succeeded:
if job.execution_state == models.ExecutionState.SUCCEEDED:
logger.warning('Job completed at %s; execution took %s', str(info.end_time),
str(info.end_time - info.start_time))
return
if job.execution_state == models.ExecutionState.failed:
if job.execution_state == models.ExecutionState.FAILED:
_log_failed_job(resource_group, job)
sys.exit(-1)
time.sleep(check_interval_sec)
Expand Down Expand Up @@ -1010,7 +1013,7 @@ def create_file_server(client, resource_group, workspace, file_server_name, json
raise CLIError('Ill-formed subnet resource id')
params.subnet = models.ResourceId(id=subnet)

return client.file_servers.create(resource_group, workspace, file_server_name, params, raw=raw)
return client.file_servers.begin_create(resource_group, workspace, file_server_name, params, raw=raw)


def list_file_servers(client, resource_group, workspace_name):
Expand Down
Loading

0 comments on commit 5b09302

Please sign in to comment.