Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Add pai pipeline #2092

Merged
merged 5 commits into from
Feb 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def run(args):
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default=None)
parser.add_argument("--exclude", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local')
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'paiYarn', 'kubeflow', 'frameworkcontroller'], default='local')
parser.add_argument("--local_gpu", action='store_true')
parser.add_argument("--preinstall", action='store_true')
args = parser.parse_args()
Expand Down
23 changes: 22 additions & 1 deletion test/generate_ts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def update_training_service_config(args):
config = get_yml_content(TRAINING_SERVICE_FILE)
if args.nni_manager_ip is not None:
config[args.ts]['nniManagerIp'] = args.nni_manager_ip
if args.ts == 'pai':
if args.ts == 'paiYarn':
if args.pai_user is not None:
config[args.ts]['paiYarnConfig']['userName'] = args.pai_user
if args.pai_pwd is not None:
Expand All @@ -27,6 +27,23 @@ def update_training_service_config(args):
config[args.ts]['trial']['outputDir'] = args.output_dir
if args.vc is not None:
config[args.ts]['trial']['virtualCluster'] = args.vc
if args.ts == 'pai':
if args.pai_user is not None:
config[args.ts]['paiConfig']['userName'] = args.pai_user
if args.pai_host is not None:
config[args.ts]['paiConfig']['host'] = args.pai_host
if args.pai_token is not None:
config[args.ts]['paiConfig']['token'] = args.pai_token
if args.nni_docker_image is not None:
config[args.ts]['trial']['image'] = args.nni_docker_image
if args.nniManagerNFSMountPath is not None:
config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nni_manager_nfs_mount_path
if args.containerNFSMountPath is not None:
config[args.ts]['trial']['containerNFSMountPath'] = args.container_nfs_mount_path
if args.paiStoragePlugin is not None:
config[args.ts]['trial']['paiStoragePlugin'] = args.pai_storage_plugin
if args.vc is not None:
config[args.ts]['trial']['virtualCluster'] = args.vc
elif args.ts == 'kubeflow':
if args.nfs_server is not None:
config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server
Expand Down Expand Up @@ -94,6 +111,10 @@ def convert_command():
parser.add_argument("--data_dir", type=str)
parser.add_argument("--output_dir", type=str)
parser.add_argument("--vc", type=str)
parser.add_argument("--pai_token", type=str)
parser.add_argument("--pai_storage_plugin", type=str)
parser.add_argument("--nni_manager_nfs_mount_path", type=str)
parser.add_argument("--container_nfs_mount_path", type=str)
# args for kubeflow and frameworkController
parser.add_argument("--nfs_server", type=str)
parser.add_argument("--nfs_path", type=str)
Expand Down
6 changes: 3 additions & 3 deletions test/pipelines-it-pai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ jobs:

echo "TEST_IMG:$TEST_IMG"
cd test
python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) \
--nni_docker_image $TEST_IMG --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip)
python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
--pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)

PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai --exclude multi_phase
Copy link
Contributor

@ultmaster ultmaster Feb 27, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why exclude multi phase?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

paiK8STrainingService does not support multi phase now, maybe will support in next release.

PATH=$HOME/.local/bin:$PATH python3 metrics_test.py
displayName: 'integration test'
59 changes: 59 additions & 0 deletions test/pipelines-it-paiYarn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

jobs:
- job: 'integration_test_paiYarn'
timeoutInMinutes: 0

steps:
- script: python3 -m pip install --upgrade pip setuptools --user
displayName: 'Install python tools'

- script: |
cd deployment/pypi
echo 'building prerelease package...'
make build
ls $(Build.SourcesDirectory)/deployment/pypi/dist/
condition: eq( variables['build_docker_img'], 'true' )
displayName: 'build nni bdsit_wheel'

- script: |
source install.sh
displayName: 'Install nni toolkit via source code'

- script: |
sudo apt-get install swig -y
PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB
displayName: 'Install dependencies for integration tests in PAI mode'

- script: |
set -e
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
echo 'updating docker file for installing nni from local...'
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
export IMG_TAG=`date -u +%y%m%d%H%M`

echo 'build and upload docker image'
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG

export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi

echo "TEST_IMG:$TEST_IMG"
cd test
python3 generate_ts_config.py --ts paiYarn --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) \
--nni_docker_image $TEST_IMG --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip)

PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts paiYarn
PATH=$HOME/.local/bin:$PATH python3 metrics_test.py
displayName: 'integration test'
17 changes: 16 additions & 1 deletion test/training_service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ frameworkcontroller:

local:
trainingServicePlatform: local
pai:
paiYarn:
nniManagerIp:
maxExecDuration: 15m
paiYarnConfig:
Expand All @@ -68,6 +68,21 @@ pai:
memoryMB: 8192
outputDir:
virtualCluster:
pai:
nniManagerIp:
maxExecDuration: 15m
paiConfig:
host:
userName:
trainingServicePlatform: pai
trial:
gpuNum: 1
cpuNum: 1
image:
memoryMB: 8192
nniManagerNFSMountPath:
containerNFSMountPath:
paiStoragePlugin:
remote:
machineList:
- ip:
Expand Down