Skip to content

kubernetes-compute-simple-examples #989

kubernetes-compute-simple-examples

kubernetes-compute-simple-examples #989

name: kubernetes-compute-simple-examples
on:
schedule:
- cron: "0 23 * * *"
workflow_dispatch:
inputs:
LOCATION:
description: 'Resource Region'
required: false
default: 'eastus'
AMLARC_RELEASE_TRAIN:
description: 'Release version: experimental, staging or stable'
required: false
default: 'stable'
REINSTALL_EXTENSION:
description: 'Whether to reinstall extension: true or false'
required: false
default: 'true'
CLEANUP_CLUSTER:
description: 'Whether to delete cluster: true or false'
required: false
default: 'false'
CLEANUP_WORKSPACE:
description: 'Whether to delete workspace: true or false'
required: false
default: 'false'
FILE_TICKET:
description: 'Whether to file icm ticket: true or false'
required: false
default: 'false'
jobs:
test:
runs-on: ubuntu-20.04
env:
SUBSCRIPTION: ${{ secrets.SUBSCRIPTION }}
RESOURCE_GROUP: ${{ secrets.RESOURCE_GROUP }}
KEY_VAULT_NAME: ${{ secrets.KEY_VAULT_NAME }}
LOCATION: eastus
RELEASE_TRAIN: stable
REINSTALL_EXTENSION: true
CLUSTER_TYPE: managedClusters
FILE_TICKET: true
REPOSITORY: https://github.com/Azure/AML-Kubernetes
WORKFLOW: https://github.com/Azure/AML-Kubernetes/actions/workflows/kubernetes-compute-simple-examples.yml
steps:
- name: replace env from workflow_dispatch
if: ${{ github.event_name == 'workflow_dispatch' }}
run: |
echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV
echo "RELEASE_TRAIN=${{ github.event.inputs.AMLARC_RELEASE_TRAIN }}" | tee -a $GITHUB_ENV
echo "REINSTALL_EXTENSION=${{ github.event.inputs.REINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV
echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV
- name: check out repo
uses: actions/checkout@v2
- name: setup python
uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: install tools
run: bash .github/amlarc-tool.sh install_tools
timeout-minutes: 30
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AMLARC_SP_CREDS}}
timeout-minutes: 30
# provision resources
- name: setup_aks
run: |
bash .github/amlarc-tool.sh setup_identity
ID=$(bash .github/amlarc-tool.sh get_identity_id)
bash .github/amlarc-tool.sh setup_aks --assign-identity "${ID}"
timeout-minutes: 30
# - name: connect_arc
# run: bash .github/amlarc-tool.sh connect_arc
# timeout-minutes: 30
- name: install_extension
run: |
export EXTENSION_SETTINGS="enableTraining=True enableInference=True inferenceRouterServiceType=loadBalancer allowInsecureConnections=True"
bash .github/amlarc-tool.sh install_extension
timeout-minutes: 30
- name: setup_workspace
run: bash .github/amlarc-tool.sh setup_workspace
timeout-minutes: 30
- name: setup_compute
run: bash .github/amlarc-tool.sh setup_compute
timeout-minutes: 30
- name: setup_instance_type
run: bash .github/amlarc-tool.sh setup_instance_type
timeout-minutes: 30
# run training test cases
- name: run examples/training/simple-train-cli/job.yml
run: |
bash .github/amlarc-tool.sh run_cli_job examples/training/simple-train-cli/job.yml -c githubtest -it defaultinstancetype
cat examples/training/simple-train-cli/job.yml
continue-on-error: true
timeout-minutes: 30
- name: prepare jupyter dependency
run: |
bash .github/amlarc-tool.sh generate_workspace_config
bash .github/amlarc-tool.sh install_jupyter_dependency
continue-on-error: true
timeout-minutes: 30
- name: run examples/training/simple-train-sdk/img-classification-training.ipynb
run: |
export AMLARC_COMPUTE_NAME=githubtest
bash .github/amlarc-tool.sh run_jupyter_test examples/training/simple-train-sdk/img-classification-training.ipynb
continue-on-error: true
timeout-minutes: 30
# run inference test cases
- name: run examples/inference/simple-flow/endpoint.yml
run: |
set +e
source .github/amlarc-tool.sh
WS_DIR=examples/inference/simple-flow/
SUB_RG_WS=" --subscription ${SUBSCRIPTION} --resource-group ${RESOURCE_GROUP} --workspace-name ${WORKSPACE} "
echo "[JobSubmission] $WS_DIR/endpoint.yml" | tee -a $RESULT_FILE
# create endpoint and deployment
az ml online-endpoint create $SUB_RG_WS --name sklearn-mnist -f $WS_DIR/endpoint.yml --set compute=azureml:$COMPUTE --debug
sleep 60
az ml online-deployment create $SUB_RG_WS --name blue --endpoint sklearn-mnist -f $WS_DIR/blue-deployment.yml --all-traffic --debug
# scoring
sleep 60
primaryKey=$(az ml online-endpoint get-credentials $SUB_RG_WS --name sklearn-mnist -o tsv --query "primaryKey" )
scoring_uri=$(az ml online-endpoint show $SUB_RG_WS --name sklearn-mnist -o tsv --query "scoring_uri" )
sample_data=$(cat $WS_DIR/sample-request.json)
res=$(kubectl exec -n azureml deploy/azureml-fe-v2 -- curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $primaryKey" -s --connect-timeout 5 "$scoring_uri" -d "$sample_data")
echo "$res"
if [ "$res" == "[8]" ] ; then
echo "[JobStatus] $WS_DIR/endpoint.yml Completed" | tee -a $RESULT_FILE
else
echo "[JobStatus] $WS_DIR/endpoint.yml Failed" | tee -a $RESULT_FILE
fi
continue-on-error: true
timeout-minutes: 30
# check result
- name: count_result
if: ${{ always() }}
run: |
bash .github/amlarc-tool.sh count_result
timeout-minutes: 30
# report metrics
- name: download_metrics_dependency
if: ${{ always() && github.event_name != 'pull_request' }}
run: |
if [ "$FILE_TICKET" == "true" ]; then
bash .github/amlarc-tool.sh install_mdm_dependency
fi
timeout-minutes: 30
- name: start_mdm
if: ${{ always() && github.event_name != 'pull_request' }}
run: |
if [ "$FILE_TICKET" == "true" ]; then
# download certificates
export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD
export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD
export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD
export KEY_PEM_NAME=AMLARC-KEY-PEM
export CERT_PEM_NAME=AMLARC-CERT-PEM
bash .github/amlarc-tool.sh download_metrics_info
bash .github/amlarc-tool.sh start_mdm_container
fi
timeout-minutes: 30
- name: report_metrics
if: ${{ always() && github.event_name != 'pull_request' }}
run: |
if [ "$FILE_TICKET" == "true" ]; then
bash .github/amlarc-tool.sh report_test_result_metrics
fi
timeout-minutes: 30
- name: stop_mdm
if: ${{ always() && github.event_name != 'pull_request' }}
run: |
if [ "$FILE_TICKET" == "true" ]; then
bash .github/amlarc-tool.sh stop_mdm_container
fi
timeout-minutes: 30
- name: clean up resources
if: ${{ always() }}
run: |
set +e
bash .github/amlarc-tool.sh delete_endpoints
bash .github/amlarc-tool.sh delete_compute
if [ "${{ github.event.inputs.REINSTALL_EXTENSION }}" == "true" ] ; then
bash .github/amlarc-tool.sh delete_extension
fi
if [ "${{ github.event.inputs.CLEANUP_CLUSTER }}" == "true" ] ; then
bash .github/amlarc-tool.sh delete_arc
bash .github/amlarc-tool.sh delete_aks
fi
if [ "${{ github.event.inputs.CLEANUP_WORKSPACE }}" == "true" ] ; then
bash .github/amlarc-tool.sh delete_workspace
fi
timeout-minutes: 30