From eceed49d28bed7ba3a5d96374147d5794aa4fa10 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sat, 23 Jul 2022 15:37:44 -0700 Subject: [PATCH 01/50] Added AWS EC2 dynamic runner support (#2) --- .../workflows/sycl_linux_build_and_test.yml | 66 ++++- .github/workflows/sycl_nightly.yml | 2 + .github/workflows/sycl_post_commit.yml | 2 + .github/workflows/sycl_precommit.yml | 1 + devops/actions/aws-ec2/action.yml | 80 ++++++ devops/actions/aws-ec2/aws-ec2.js | 230 ++++++++++++++++++ devops/actions/aws-ec2/package.json | 9 + 7 files changed, 389 insertions(+), 1 deletion(-) create mode 100644 devops/actions/aws-ec2/action.yml create mode 100644 devops/actions/aws-ec2/aws-ec2.js create mode 100644 devops/actions/aws-ec2/package.json diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 6e933ff87308c..8cedcef3fad8b 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -64,6 +64,11 @@ on: type: number required: false default: 4 + uniq: + description: Unique string to name dynamic runers in AWS + type: string + required: false + default: ${{ github.run_id }}-${{ github.run_attempt }} jobs: build: @@ -191,8 +196,40 @@ jobs: const script = require('./generate_test_matrix.js'); script({core, process}); +aws-start-matrix: + name: Start AWS Matrix + needs: [ build, resolve_matrix ] + if: ${{ inputs.lts_config != '' }} + strategy: + fail-fast: false + max-parallel: ${{ inputs.max_parallel }} + matrix: + include: ${{ fromJSON(needs.resolve_matrix.outputs.lts) }} + runs-on: ubuntu-latest + environment: aws + steps: + - uses: actions/checkout@v3 + if: ${{ matrix.aws-type }} + with: + path: llvm + - run: npm install ./llvm/devops/actions/aws-ec2 + if: ${{ matrix.aws-type }} + - name: Start AWS EC2 runner + if: ${{ matrix.aws-type }} + uses: ./llvm/devops/actions/aws-ec2 + with: + label: ${{ matrix.runs-on }} + GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} + AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} + aws-ami: ${{ matrix.aws-ami }} + aws-spot: ${{ matrix.aws-spot }} + aws-type: ${{ matrix.aws-type }} + aws-disk: ${{ matrix.aws-disk }} + one-job: ${{ matrix.one-job }} + llvm_test_suite: - needs: [build, resolve_matrix] + needs: [build, resolve_matrix, aws-start-matrix] if: ${{ inputs.lts_config != '' }} strategy: fail-fast: false @@ -240,3 +277,30 @@ jobs: results_name_suffix: ${{ matrix.config }}_${{ inputs.build_artifact_suffix }} cmake_args: '${{ matrix.cmake_args }} ${{ inputs.lts_cmake_extra_args }}' + aws-stop-matrix: + name: Stop AWS Matrix + needs: [ aws-start-matrix, resolve_matrix, llvm_test_suite ] + if: ${{ always() && inputs.lts_config != '' }} + strategy: + fail-fast: false + max-parallel: ${{ inputs.max_parallel }} + matrix: + include: ${{ fromJSON(needs.resolve_matrix.outputs.lts) }} + runs-on: ubuntu-latest + environment: aws + steps: + - uses: actions/checkout@v3 + if: ${{ matrix.aws-type }} + with: + path: llvm + - run: npm install ./llvm/devops/actions/aws-ec2 + if: ${{ matrix.aws-type }} + - name: Stop AWS EC2 runner + if: ${{ matrix.aws-type }} + uses: ./llvm/devops/actions/aws-ec2 + with: + label: ${{ matrix.runs-on }} + mode: stop + GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} + AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} diff --git a/.github/workflows/sycl_nightly.yml b/.github/workflows/sycl_nightly.yml index 7e7cae192456b..38f53c5b4e605 100644 --- a/.github/workflows/sycl_nightly.yml +++ b/.github/workflows/sycl_nightly.yml @@ -13,6 +13,7 @@ jobs: ubuntu2004_build_test: if: github.repository == 'intel/llvm' uses: ./.github/workflows/sycl_linux_build_and_test.yml + secrets: inherit with: build_cache_root: "/__w/" build_artifact_suffix: default @@ -22,6 +23,7 @@ jobs: ubuntu2004_opaque_pointers_build_test: if: github.repository == 'intel/llvm' uses: ./.github/workflows/sycl_linux_build_and_test.yml + secrets: inherit with: build_cache_root: "/__w/" build_cache_suffix: opaque_pointers diff --git a/.github/workflows/sycl_post_commit.yml b/.github/workflows/sycl_post_commit.yml index d54cfc67e69c9..5aae06728e175 100644 --- a/.github/workflows/sycl_post_commit.yml +++ b/.github/workflows/sycl_post_commit.yml @@ -14,12 +14,14 @@ jobs: linux_default: name: Linux Default uses: ./.github/workflows/sycl_linux_build_and_test.yml + secrets: inherit with: build_cache_root: "/__w/llvm" build_artifact_suffix: default linux_no_assert: name: Linux (no assert) uses: ./.github/workflows/sycl_linux_build_and_test.yml + secrets: inherit with: build_cache_root: "/__w/llvm" build_cache_suffix: gcc_no_assertions diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index 374f930d00cea..e0704c083b579 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -36,6 +36,7 @@ jobs: needs: lint if: always() && (success() || contains(github.event.pull_request.labels.*.name, 'ignore-lint')) uses: ./.github/workflows/sycl_linux_build_and_test.yml + secrets: inherit with: build_cache_root: "/__w/" build_cache_size: "8G" diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml new file mode 100644 index 0000000000000..47defc4752464 --- /dev/null +++ b/devops/actions/aws-ec2/action.yml @@ -0,0 +1,80 @@ +name: aws-ec2 +description: Starts AWS EC2 instance with github actions runner agent in it to process one job + +inputs: + label: + description: "Name of the unique label assigned to the runner used as 'runs-on' property for the following jobs" + required: true + GH_PERSONAL_ACCESS_TOKEN: + description: "Github personal access token with repo permission" + required: true + AWS_ACCESS_KEY: + description: "AWS access id" + required: true + AWS_SECRET_KEY: + description: "WS access secret key" + required: true + aws-region: + description: "AWS EC2 region" + required: false + default: "us-east-2" # Ohio + aws-ami: + description: "AWS AMI id. Makes sense only for start mode" + required: false + default: "ami-0966bccbb521ccb24" # Ubuntu 22.04 (ami-02f3416038bdb17fb with /dev/sda1 disk) with docker installed and gh_runner (1001) like this: + # sudo -s + # apt-get update + # curl -fsSL https://get.docker.com -o /tmp/get-docker.sh + # sh /tmp/get-docker.sh # or "yum install -y docker" for Amazon Linux or RHEL/CentOS + # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner + # sync; shutdown -h now + + # "ami-02ec0f344128253f9" # Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver (ami-06bf0a3f89fe08f0a with /dev/xvda disk) with docker installed and gh_runner (1001) like this: + # sudo -s + # yum update -y + # amazon-linux-extras install docker + # sudo systemctl --now enable docker + # distribution=$(. /etc/os-release;echo $ID$VERSION_ID) && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo + # yum-config-manager --disable amzn2-graphics; yum clean expire-cache; yum install -y nvidia-docker2; systemctl restart docker + # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner + # sync; shutdown -h now + + # "ami-0ccda708841dde988" # Amazon Linux 2 AMI with AMD Radeon Pro Driver (ami-0bb1072e787242eb6 with /dev/xvda disk) with docker installed and gh_runner (1001) like this: + # sudo -s + # sh ./get-docker.sh # or "yum install -y docker" for Amazon Linux or RHEL/CentOS + # amazon-linux-extras install docker + # sudo systemctl --now enable docker + # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner + # sync; shutdown -h now + + aws-type: + description: "AWS EC2 instance type. Makes sense only for start mode" + required: false + default: "[ \"t2.micro\" ]" + # "[ \"g4dn.2xlarge\" ]": 1 NVIDIA T4 GPU, 8 CPU, 32 GB RAM + # "[ \"g4ad.4xlarge\" ]": 1 AMD Radeon Pro V520 GPU, 16 CPU, 64 GB RAM (overloaded pool so do not use spot here now) + aws-spot: + description: "Enable usage of spot instances to save money (less reliable). Makes sense only for start mode" + requred: false + default: true + aws-disk: + description: "AWS EC2 instance AMI specific disk device path and size in GB (8 by default). Makes sense only for start mode" + required: false + default: "/dev/sda1:16" # Github actions container engine will fail with lack of disk space for 8GB + aws-timebomb: + description: "AWS EC2 instance maximum live time. Makes sense only for start mode" + required: false + default: 1h + + mode: + description: "Mode of operation: start or stop" + required: false + default: start + one-job: + description: "Will terminate AWS EC2 instance after one job (not waiting for stop job) saving money" + required: false + default: true + +runs: + using: node12 + main: ./aws-ec2.js diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js new file mode 100644 index 0000000000000..50fe21f0abc46 --- /dev/null +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -0,0 +1,230 @@ +const core = require('@actions/core'); +const github = require('@actions/github'); +const AWS = require('aws-sdk'); + +// shortcut to reference current repo +const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; + +// get github registration token that allows to register new runner based on +// GH_PERSONAL_ACCESS_TOKEN github user api key +async function getGithubRegToken() { + const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); + + try { + const response = await octokit.request( + `POST /repos/${repo}/actions/runners/registration-token`); + core.info("Got Github Actions Runner registration token"); + return response.data.token; + } catch (error) { + core.error("Error getting Github Actions Runner registration token"); + throw error; + } +} + +// add delay before retrying promise one more time +function rejectDelay(reason) { + return new Promise(function( + resolve, reject) { setTimeout(reject.bind(null, reason), 10 * 1000); }); +} + +// starts AWS EC2 instance that will spawn Github runner for a given label +async function start(label) { + const ec2 = new AWS.EC2(); + + // we better keep GH_PERSONAL_ACCESS_TOKEN here and do not pass it to AWS EC2 + // userscript so it will keep secret + const reg_token = await getGithubRegToken(); + const timebomb = core.getInput("aws-timebomb"); + const raw_ec2types = JSON.parse(core.getInput("aws-type")); + const ec2types = + typeof raw_ec2types == "string" ? [ raw_ec2types ] : raw_ec2types; + const ec2disk = core.getInput("aws-disk"); + const ec2spot = core.getInput("aws-spot") != "false"; + const onejob = core.getInput("one-job") != "false"; + + let ec2id; // AWS EC2 instance id + let last_error; // last error that ill be thrown in case all our attemps in + // instance creation will fails + // loop for spot/ondemand instances + for (let spot of (ec2spot ? [ 1, 0 ] : [ 0 ])) { + const spot_str = spot ? "spot" : "on-demand"; + for (let ec2type of ec2types) { // iterate for provided instance types + const setup_github_actions_runner = [ + `#!/bin/bash -x`, `mkdir actions-runner`, `cd actions-runner`, + // we can not place runner into AMI image since it is updated often and + // latest version in required to connect to github + `export RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest | sed -n \'s,.*"tag_name": "v\\(.*\\)".*,\\1,p\')`, + `curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, + `tar xf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, + `su gh_runner -c "./config.sh --unattended --url https://github.com/${repo} --token ${reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${label} --replace || shutdown -h now"`, + // timebomb to avoid paying for stale AWS instances + `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${reg_token}"; shutdown -h now) &`, + // ephemeral runner will exit after one job so we will terminate + // instance sooner + onejob ? `su gh_runner -c "./run.sh --ephemeral"` + : `su gh_runner -c "./run.sh"`, + `su gh_runner -c "./config.sh remove --token ${reg_token}"`, + `shutdown -h now` // in case we launch insance with + // InstanceInitiatedShutdownBehavior = "terminate" it + // will terminate instance here as well + ]; + try { + let params = { + ImageId : core.getInput("aws-ami"), + InstanceType : ec2type, + InstanceInitiatedShutdownBehavior : "terminate", + UserData : Buffer.from(setup_github_actions_runner.join('\n')) + .toString('base64'), + MinCount : 1, + MaxCount : 1, + TagSpecifications : [ { + ResourceType : "instance", + Tags : [ {Key : "Label", Value : label} ] + } ] + }; + if (spot) + params.InstanceMarketOptions = {MarketType : "spot"}; + if (ec2disk) { + const items = ec2disk.split(':'); + params.BlockDeviceMappings = + [ {DeviceName : items[0], Ebs : {VolumeSize : items[1]}} ]; + } + const result = await ec2.runInstances(params).promise(); + ec2id = result.Instances[0].InstanceId; + core.info(`Created AWS EC2 ${spot_str} instance ${ec2id} of ${ec2type} type with ${label} label`); + break; + } catch (error) { + core.warning(`Error creating AWS EC2 ${spot_str} instance of ${ec2type} type with ${label} label`); + last_error = error; + } + } + // we already created instance and do not need to iterate these loops + if (ec2id) + break; + } + if (last_error) { + core.error(`Error creating AWS EC2 instance with ${label} label`); + throw last_error; + } + + // wait untill instance will be found running before continuing (spot instance + // can be created but never run and will be in pending state untill + // termination) + let p = ec2.waitFor("instanceRunning", + {Filters : [ {Name : "tag:Label", Values : [ label ]} ]}) + .promise(); + for (let i = 0; i < 2; i++) { + p = p.catch(function() { + core.warning(`Error searching for running AWS EC2 spot instance ${ec2id} with ${label} label. Will retry.`); + }).catch(rejectDelay); + } + p = p.then(function() { + core.info(`Found running AWS EC2 spot instance ${ec2id} with ${label} label`); + }).catch(function(error) { + core.error(`Error searching for running AWS EC2 spot instance ${ec2id} with ${label} label`); + throw error; + }); +} + +// terminate (completely remove) AWS EC instances (normally one instance) with +// given tag label and also remove all Github actions runners (normally one +// runner) with that label +async function stop(label) { + // last error that will be thrown in case something will break here + let last_error; + const ec2 = new AWS.EC2(); + + // find AWS EC2 instances with tag label + let instances; + try { + instances = + await ec2 + .describeInstances( + {Filters : [ {Name : "tag:Label", Values : [ label ]} ]}) + .promise(); + core.info(`Searched for AWS EC2 instance with label ${label}`); + } catch (error) { + core.error(`Error searching for AWS EC2 instance with label ${label}`); + last_error = error; + } + + // remove all found AWS EC2 instances + if (instances) + for (const reservation of instances.Reservations) { + for (const instance of reservation.Instances) { + try { + await ec2.terminateInstances({InstanceIds : [ instance.InstanceId ]}) + .promise(); + core.info(`Terminated AWS EC2 instance ${instance.InstanceId} with label ${label}`); + } catch (error) { + core.error(`Error terminating AWS EC2 instance ${instance.InstanceId} with label ${label}`); + last_error = error; + } + } + } + + // find all Github action runners + const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); + let runners; + try { + runners = await octokit.paginate(`GET /repos/${repo}/actions/runners`); + core.info(`Searched for Github action runners with label ${label}`); + } catch (error) { + core.info(`Error searching for Github action runners with label ${label}`); + last_error = error; + } + + // remove Github action runners with specified label + if (runners) + for (runner of runners) { + let label_found = false; + for (label_obj of runner.labels) + if (label_obj.name == label) { + label_found = true; + break; + } + if (!label_found) + continue; + let p = + octokit.request(`DELETE /repos/${repo}/actions/runners/${runner.id}`); + // retry deletion up to 5 times (with 10 seconds delay) sincec Github can + // not remove runners still marked as active (with running job) + for (let i = 0; i < 5; i++) { + p = p.catch(function() { + core.warning(`Error removing Github self-hosted runner ${runner.id} with ${label}. Will retry.`); + }).catch(rejectDelay); + } + p = p.then(function() { + core.info(`Removed Github self-hosted runner ${runner.id} with ${label}`); + }).catch(function(error) { + core.error(`Error removing Github self-hosted runner ${runner.id} with ${label}`); + last_error = error; + }); + } + + if (last_error) + throw last_error; +} + +(async function() { + try { + // provide AWS credentials + AWS.config.update({ + accessKeyId : core.getInput("AWS_ACCESS_KEY"), + secretAccessKey : core.getInput("AWS_SECRET_KEY"), + region : core.getInput("aws-region") + }); + // mode is start or stop + const mode = core.getInput("mode"); + // label used to indentify AWS EC2 instances and Github runners + const label = core.getInput("label"); + if (mode == "start") { + await start(label); + } else if (mode == "stop") { + await stop(label); + } + } catch (error) { + core.error(error); + core.setFailed(error.message); + } +})(); diff --git a/devops/actions/aws-ec2/package.json b/devops/actions/aws-ec2/package.json new file mode 100644 index 0000000000000..0aa9f488bc77c --- /dev/null +++ b/devops/actions/aws-ec2/package.json @@ -0,0 +1,9 @@ +{ + "name": "aws-ec2", + "description": "Starts AWS EC2 instance with github actions runner agent in it to process one job", + "dependencies": { + "@actions/core": "^1.9.0", + "@actions/github": "^5.0.3", + "aws-sdk": "^2.1179.0" + } +} From 9efc4e208f54aff9142060a94cb03a504a042262 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sat, 23 Jul 2022 16:08:30 -0700 Subject: [PATCH 02/50] Fixed indentation --- .github/workflows/sycl_linux_build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 8cedcef3fad8b..d2b54727c622e 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -227,8 +227,8 @@ aws-start-matrix: aws-type: ${{ matrix.aws-type }} aws-disk: ${{ matrix.aws-disk }} one-job: ${{ matrix.one-job }} - - llvm_test_suite: + +llvm_test_suite: needs: [build, resolve_matrix, aws-start-matrix] if: ${{ inputs.lts_config != '' }} strategy: From 7a4e3ec960bdd34ba0877483bda6e996587ff284 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sat, 23 Jul 2022 16:11:03 -0700 Subject: [PATCH 03/50] Fixed intendation --- .github/workflows/sycl_linux_build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index d2b54727c622e..bb1c52d476c36 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -196,7 +196,7 @@ jobs: const script = require('./generate_test_matrix.js'); script({core, process}); -aws-start-matrix: + aws-start-matrix: name: Start AWS Matrix needs: [ build, resolve_matrix ] if: ${{ inputs.lts_config != '' }} @@ -228,7 +228,7 @@ aws-start-matrix: aws-disk: ${{ matrix.aws-disk }} one-job: ${{ matrix.one-job }} -llvm_test_suite: + llvm_test_suite: needs: [build, resolve_matrix, aws-start-matrix] if: ${{ inputs.lts_config != '' }} strategy: @@ -303,4 +303,4 @@ llvm_test_suite: mode: stop GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} - AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} + AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} From 3cd6efb482b45639fb7351404c4804ea7cc42c9d Mon Sep 17 00:00:00 2001 From: apstasen Date: Sat, 23 Jul 2022 16:12:56 -0700 Subject: [PATCH 04/50] Remove whitespace --- .github/workflows/sycl_linux_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index bb1c52d476c36..843c1063e3bb9 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -303,4 +303,4 @@ jobs: mode: stop GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} - AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} + AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} From 23ab5a7588bce54fb72741c1c5589a0030bfd53d Mon Sep 17 00:00:00 2001 From: apstasen Date: Sat, 23 Jul 2022 16:16:57 -0700 Subject: [PATCH 05/50] Clarified some messages text --- devops/actions/aws-ec2/aws-ec2.js | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 50fe21f0abc46..f2d29ac8edeb7 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -59,8 +59,7 @@ async function start(label) { `su gh_runner -c "./config.sh --unattended --url https://github.com/${repo} --token ${reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${label} --replace || shutdown -h now"`, // timebomb to avoid paying for stale AWS instances `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${reg_token}"; shutdown -h now) &`, - // ephemeral runner will exit after one job so we will terminate - // instance sooner + // ephemeral runner will exit after one job so we will terminate instance sooner onejob ? `su gh_runner -c "./run.sh --ephemeral"` : `su gh_runner -c "./run.sh"`, `su gh_runner -c "./config.sh remove --token ${reg_token}"`, @@ -115,13 +114,13 @@ async function start(label) { .promise(); for (let i = 0; i < 2; i++) { p = p.catch(function() { - core.warning(`Error searching for running AWS EC2 spot instance ${ec2id} with ${label} label. Will retry.`); + core.warning(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label. Will retry.`); }).catch(rejectDelay); } p = p.then(function() { - core.info(`Found running AWS EC2 spot instance ${ec2id} with ${label} label`); + core.info(`Found running AWS EC2 instance ${ec2id} with ${label} label`); }).catch(function(error) { - core.error(`Error searching for running AWS EC2 spot instance ${ec2id} with ${label} label`); + core.error(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label`); throw error; }); } From 8fefd4f4ceb71b0b1e15aff91250cc1738552bdb Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 10:04:47 -0700 Subject: [PATCH 06/50] Removed not needed setup line in comment --- devops/actions/aws-ec2/action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml index 47defc4752464..fb34a10e0cc69 100644 --- a/devops/actions/aws-ec2/action.yml +++ b/devops/actions/aws-ec2/action.yml @@ -41,7 +41,6 @@ inputs: # "ami-0ccda708841dde988" # Amazon Linux 2 AMI with AMD Radeon Pro Driver (ami-0bb1072e787242eb6 with /dev/xvda disk) with docker installed and gh_runner (1001) like this: # sudo -s - # sh ./get-docker.sh # or "yum install -y docker" for Amazon Linux or RHEL/CentOS # amazon-linux-extras install docker # sudo systemctl --now enable docker # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner From 3b828e60c76fb524ca7989f1eeb3c0cbc5dd8611 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 10:07:23 -0700 Subject: [PATCH 07/50] Clarified description --- devops/actions/aws-ec2/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/package.json b/devops/actions/aws-ec2/package.json index 0aa9f488bc77c..8649a3ce2798a 100644 --- a/devops/actions/aws-ec2/package.json +++ b/devops/actions/aws-ec2/package.json @@ -1,6 +1,6 @@ { "name": "aws-ec2", - "description": "Starts AWS EC2 instance with github actions runner agent in it to process one job", + "description": "Starts AWS EC2 spot instance with Github actions runner agent in it", "dependencies": { "@actions/core": "^1.9.0", "@actions/github": "^5.0.3", From 3cabe7b7c5336057bfcd415050d95053e83048c0 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 10:08:00 -0700 Subject: [PATCH 08/50] Clarified description --- devops/actions/aws-ec2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml index fb34a10e0cc69..dd012faad67f9 100644 --- a/devops/actions/aws-ec2/action.yml +++ b/devops/actions/aws-ec2/action.yml @@ -1,5 +1,5 @@ name: aws-ec2 -description: Starts AWS EC2 instance with github actions runner agent in it to process one job +description: Starts AWS EC2 instance with Github actions runner agent in it inputs: label: From 677ea19faee6c278a1fc9c7f87ffe1549097f899 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 10:08:34 -0700 Subject: [PATCH 09/50] Fixed typo in description --- devops/actions/aws-ec2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml index dd012faad67f9..5f5684a5085d7 100644 --- a/devops/actions/aws-ec2/action.yml +++ b/devops/actions/aws-ec2/action.yml @@ -12,7 +12,7 @@ inputs: description: "AWS access id" required: true AWS_SECRET_KEY: - description: "WS access secret key" + description: "AWS access secret key" required: true aws-region: description: "AWS EC2 region" From 596929a53f48738b5c01ac5db2e0becbab0a1c5f Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 10:53:01 -0700 Subject: [PATCH 10/50] Fixed --ephemeral option usage (should be in config.sh) --- devops/actions/aws-ec2/aws-ec2.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index f2d29ac8edeb7..497a12a68ee65 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -41,6 +41,8 @@ async function start(label) { const ec2disk = core.getInput("aws-disk"); const ec2spot = core.getInput("aws-spot") != "false"; const onejob = core.getInput("one-job") != "false"; + // ephemeral runner will exit after one job so we will terminate instance sooner + const ephemeral_str = onejob ? "--ephemeral" : ""; let ec2id; // AWS EC2 instance id let last_error; // last error that ill be thrown in case all our attemps in @@ -56,12 +58,10 @@ async function start(label) { `export RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest | sed -n \'s,.*"tag_name": "v\\(.*\\)".*,\\1,p\')`, `curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, `tar xf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, - `su gh_runner -c "./config.sh --unattended --url https://github.com/${repo} --token ${reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${label} --replace || shutdown -h now"`, + `su gh_runner -c "./config.sh --unattended ${ephemeral_str} --url https://github.com/${repo} --token ${reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${label} --replace || shutdown -h now"`, // timebomb to avoid paying for stale AWS instances `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${reg_token}"; shutdown -h now) &`, - // ephemeral runner will exit after one job so we will terminate instance sooner - onejob ? `su gh_runner -c "./run.sh --ephemeral"` - : `su gh_runner -c "./run.sh"`, + `su gh_runner -c "./run.sh"`, `su gh_runner -c "./config.sh remove --token ${reg_token}"`, `shutdown -h now` // in case we launch insance with // InstanceInitiatedShutdownBehavior = "terminate" it From 565732b0ba32c753d839a476531ee7d6187fbd6c Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 11:00:11 -0700 Subject: [PATCH 11/50] Formatted text for lint --- devops/actions/aws-ec2/aws-ec2.js | 51 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 497a12a68ee65..bb3f72298fe69 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -41,7 +41,8 @@ async function start(label) { const ec2disk = core.getInput("aws-disk"); const ec2spot = core.getInput("aws-spot") != "false"; const onejob = core.getInput("one-job") != "false"; - // ephemeral runner will exit after one job so we will terminate instance sooner + // ephemeral runner will exit after one job so we will terminate instance + // sooner const ephemeral_str = onejob ? "--ephemeral" : ""; let ec2id; // AWS EC2 instance id @@ -58,9 +59,13 @@ async function start(label) { `export RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest | sed -n \'s,.*"tag_name": "v\\(.*\\)".*,\\1,p\')`, `curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, `tar xf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, - `su gh_runner -c "./config.sh --unattended ${ephemeral_str} --url https://github.com/${repo} --token ${reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${label} --replace || shutdown -h now"`, + `su gh_runner -c "./config.sh --unattended ${ + ephemeral_str} --url https://github.com/${repo} --token ${ + reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${ + label} --replace || shutdown -h now"`, // timebomb to avoid paying for stale AWS instances - `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${reg_token}"; shutdown -h now) &`, + `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${ + reg_token}"; shutdown -h now) &`, `su gh_runner -c "./run.sh"`, `su gh_runner -c "./config.sh remove --token ${reg_token}"`, `shutdown -h now` // in case we launch insance with @@ -73,7 +78,7 @@ async function start(label) { InstanceType : ec2type, InstanceInitiatedShutdownBehavior : "terminate", UserData : Buffer.from(setup_github_actions_runner.join('\n')) - .toString('base64'), + .toString('base64'), MinCount : 1, MaxCount : 1, TagSpecifications : [ { @@ -90,10 +95,12 @@ async function start(label) { } const result = await ec2.runInstances(params).promise(); ec2id = result.Instances[0].InstanceId; - core.info(`Created AWS EC2 ${spot_str} instance ${ec2id} of ${ec2type} type with ${label} label`); + core.info(`Created AWS EC2 ${spot_str} instance ${ec2id} of ${ + ec2type} type with ${label} label`); break; } catch (error) { - core.warning(`Error creating AWS EC2 ${spot_str} instance of ${ec2type} type with ${label} label`); + core.warning(`Error creating AWS EC2 ${spot_str} instance of ${ + ec2type} type with ${label} label`); last_error = error; } } @@ -114,13 +121,16 @@ async function start(label) { .promise(); for (let i = 0; i < 2; i++) { p = p.catch(function() { - core.warning(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label. Will retry.`); - }).catch(rejectDelay); + core.warning(`Error searching for running AWS EC2 instance ${ + ec2id} with ${label} label. Will retry.`); + }).catch(rejectDelay); } p = p.then(function() { - core.info(`Found running AWS EC2 instance ${ec2id} with ${label} label`); - }).catch(function(error) { - core.error(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label`); + core.info( + `Found running AWS EC2 instance ${ec2id} with ${label} label`); + }).catch(function(error) { + core.error(`Error searching for running AWS EC2 instance ${ec2id} with ${ + label} label`); throw error; }); } @@ -154,9 +164,11 @@ async function stop(label) { try { await ec2.terminateInstances({InstanceIds : [ instance.InstanceId ]}) .promise(); - core.info(`Terminated AWS EC2 instance ${instance.InstanceId} with label ${label}`); + core.info(`Terminated AWS EC2 instance ${ + instance.InstanceId} with label ${label}`); } catch (error) { - core.error(`Error terminating AWS EC2 instance ${instance.InstanceId} with label ${label}`); + core.error(`Error terminating AWS EC2 instance ${ + instance.InstanceId} with label ${label}`); last_error = error; } } @@ -190,13 +202,16 @@ async function stop(label) { // not remove runners still marked as active (with running job) for (let i = 0; i < 5; i++) { p = p.catch(function() { - core.warning(`Error removing Github self-hosted runner ${runner.id} with ${label}. Will retry.`); - }).catch(rejectDelay); + core.warning(`Error removing Github self-hosted runner ${ + runner.id} with ${label}. Will retry.`); + }).catch(rejectDelay); } p = p.then(function() { - core.info(`Removed Github self-hosted runner ${runner.id} with ${label}`); - }).catch(function(error) { - core.error(`Error removing Github self-hosted runner ${runner.id} with ${label}`); + core.info(`Removed Github self-hosted runner ${runner.id} with ${ + label}`); + }).catch(function(error) { + core.error(`Error removing Github self-hosted runner ${ + runner.id} with ${label}`); last_error = error; }); } From c9c40f646bdad1c0ad2ff948bd3ddd7173737056 Mon Sep 17 00:00:00 2001 From: apstasen Date: Sun, 24 Jul 2022 11:06:35 -0700 Subject: [PATCH 12/50] Typo fix in description --- .github/workflows/sycl_linux_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 843c1063e3bb9..f5fef64a154a9 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -65,7 +65,7 @@ on: required: false default: 4 uniq: - description: Unique string to name dynamic runers in AWS + description: Unique string to name dynamic runners in AWS type: string required: false default: ${{ github.run_id }}-${{ github.run_attempt }} From df24c1af0ea845765df6d6e00e76ffe063b291c8 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sun, 24 Jul 2022 12:39:57 -0700 Subject: [PATCH 13/50] Revert "Formatted text for lint" This reverts commit 565732b0ba32c753d839a476531ee7d6187fbd6c. --- devops/actions/aws-ec2/aws-ec2.js | 51 +++++++++++-------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index bb3f72298fe69..497a12a68ee65 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -41,8 +41,7 @@ async function start(label) { const ec2disk = core.getInput("aws-disk"); const ec2spot = core.getInput("aws-spot") != "false"; const onejob = core.getInput("one-job") != "false"; - // ephemeral runner will exit after one job so we will terminate instance - // sooner + // ephemeral runner will exit after one job so we will terminate instance sooner const ephemeral_str = onejob ? "--ephemeral" : ""; let ec2id; // AWS EC2 instance id @@ -59,13 +58,9 @@ async function start(label) { `export RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest | sed -n \'s,.*"tag_name": "v\\(.*\\)".*,\\1,p\')`, `curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, `tar xf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz || shutdown -h now`, - `su gh_runner -c "./config.sh --unattended ${ - ephemeral_str} --url https://github.com/${repo} --token ${ - reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${ - label} --replace || shutdown -h now"`, + `su gh_runner -c "./config.sh --unattended ${ephemeral_str} --url https://github.com/${repo} --token ${reg_token} --name ${label}_${ec2type}_${spot_str} --labels ${label} --replace || shutdown -h now"`, // timebomb to avoid paying for stale AWS instances - `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${ - reg_token}"; shutdown -h now) &`, + `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${reg_token}"; shutdown -h now) &`, `su gh_runner -c "./run.sh"`, `su gh_runner -c "./config.sh remove --token ${reg_token}"`, `shutdown -h now` // in case we launch insance with @@ -78,7 +73,7 @@ async function start(label) { InstanceType : ec2type, InstanceInitiatedShutdownBehavior : "terminate", UserData : Buffer.from(setup_github_actions_runner.join('\n')) - .toString('base64'), + .toString('base64'), MinCount : 1, MaxCount : 1, TagSpecifications : [ { @@ -95,12 +90,10 @@ async function start(label) { } const result = await ec2.runInstances(params).promise(); ec2id = result.Instances[0].InstanceId; - core.info(`Created AWS EC2 ${spot_str} instance ${ec2id} of ${ - ec2type} type with ${label} label`); + core.info(`Created AWS EC2 ${spot_str} instance ${ec2id} of ${ec2type} type with ${label} label`); break; } catch (error) { - core.warning(`Error creating AWS EC2 ${spot_str} instance of ${ - ec2type} type with ${label} label`); + core.warning(`Error creating AWS EC2 ${spot_str} instance of ${ec2type} type with ${label} label`); last_error = error; } } @@ -121,16 +114,13 @@ async function start(label) { .promise(); for (let i = 0; i < 2; i++) { p = p.catch(function() { - core.warning(`Error searching for running AWS EC2 instance ${ - ec2id} with ${label} label. Will retry.`); - }).catch(rejectDelay); + core.warning(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label. Will retry.`); + }).catch(rejectDelay); } p = p.then(function() { - core.info( - `Found running AWS EC2 instance ${ec2id} with ${label} label`); - }).catch(function(error) { - core.error(`Error searching for running AWS EC2 instance ${ec2id} with ${ - label} label`); + core.info(`Found running AWS EC2 instance ${ec2id} with ${label} label`); + }).catch(function(error) { + core.error(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label`); throw error; }); } @@ -164,11 +154,9 @@ async function stop(label) { try { await ec2.terminateInstances({InstanceIds : [ instance.InstanceId ]}) .promise(); - core.info(`Terminated AWS EC2 instance ${ - instance.InstanceId} with label ${label}`); + core.info(`Terminated AWS EC2 instance ${instance.InstanceId} with label ${label}`); } catch (error) { - core.error(`Error terminating AWS EC2 instance ${ - instance.InstanceId} with label ${label}`); + core.error(`Error terminating AWS EC2 instance ${instance.InstanceId} with label ${label}`); last_error = error; } } @@ -202,16 +190,13 @@ async function stop(label) { // not remove runners still marked as active (with running job) for (let i = 0; i < 5; i++) { p = p.catch(function() { - core.warning(`Error removing Github self-hosted runner ${ - runner.id} with ${label}. Will retry.`); - }).catch(rejectDelay); + core.warning(`Error removing Github self-hosted runner ${runner.id} with ${label}. Will retry.`); + }).catch(rejectDelay); } p = p.then(function() { - core.info(`Removed Github self-hosted runner ${runner.id} with ${ - label}`); - }).catch(function(error) { - core.error(`Error removing Github self-hosted runner ${ - runner.id} with ${label}`); + core.info(`Removed Github self-hosted runner ${runner.id} with ${label}`); + }).catch(function(error) { + core.error(`Error removing Github self-hosted runner ${runner.id} with ${label}`); last_error = error; }); } From 15deeae4fae084c2d9bcb26e34e7fb45765d56ac Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sun, 24 Jul 2022 12:56:22 -0700 Subject: [PATCH 14/50] Restored original formatting not warped by lint --- devops/actions/aws-ec2/aws-ec2.js | 95 +++++++++++++------------------ 1 file changed, 41 insertions(+), 54 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 497a12a68ee65..0d94c7de78ea1 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -1,6 +1,6 @@ -const core = require('@actions/core'); +const core = require('@actions/core'); const github = require('@actions/github'); -const AWS = require('aws-sdk'); +const AWS = require('aws-sdk'); // shortcut to reference current repo const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; @@ -11,8 +11,7 @@ async function getGithubRegToken() { const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); try { - const response = await octokit.request( - `POST /repos/${repo}/actions/runners/registration-token`); + const response = await octokit.request(`POST /repos/${repo}/actions/runners/registration-token`); core.info("Got Github Actions Runner registration token"); return response.data.token; } catch (error) { @@ -23,8 +22,9 @@ async function getGithubRegToken() { // add delay before retrying promise one more time function rejectDelay(reason) { - return new Promise(function( - resolve, reject) { setTimeout(reject.bind(null, reason), 10 * 1000); }); + return new Promise(function(resolve, reject) { + setTimeout(reject.bind(null, reason), 10 * 1000); + }); } // starts AWS EC2 instance that will spawn Github runner for a given label @@ -33,20 +33,19 @@ async function start(label) { // we better keep GH_PERSONAL_ACCESS_TOKEN here and do not pass it to AWS EC2 // userscript so it will keep secret - const reg_token = await getGithubRegToken(); - const timebomb = core.getInput("aws-timebomb"); + const reg_token = await getGithubRegToken(); + const timebomb = core.getInput("aws-timebomb"); const raw_ec2types = JSON.parse(core.getInput("aws-type")); - const ec2types = - typeof raw_ec2types == "string" ? [ raw_ec2types ] : raw_ec2types; - const ec2disk = core.getInput("aws-disk"); - const ec2spot = core.getInput("aws-spot") != "false"; - const onejob = core.getInput("one-job") != "false"; + const ec2types = typeof raw_ec2types == "string" ? [ raw_ec2types ] : raw_ec2types; + const ec2disk = core.getInput("aws-disk"); + const ec2spot = core.getInput("aws-spot") != "false"; + const onejob = core.getInput("one-job") != "false"; // ephemeral runner will exit after one job so we will terminate instance sooner const ephemeral_str = onejob ? "--ephemeral" : ""; let ec2id; // AWS EC2 instance id - let last_error; // last error that ill be thrown in case all our attemps in - // instance creation will fails + // last error that will be thrown in case all our attemps in instance creation will fails + let last_error; // loop for spot/ondemand instances for (let spot of (ec2spot ? [ 1, 0 ] : [ 0 ])) { const spot_str = spot ? "spot" : "on-demand"; @@ -63,30 +62,25 @@ async function start(label) { `(sleep ${timebomb}; su gh_runner -c "./config.sh remove --token ${reg_token}"; shutdown -h now) &`, `su gh_runner -c "./run.sh"`, `su gh_runner -c "./config.sh remove --token ${reg_token}"`, - `shutdown -h now` // in case we launch insance with - // InstanceInitiatedShutdownBehavior = "terminate" it - // will terminate instance here as well + // in case we launch insance with InstanceInitiatedShutdownBehavior = "terminate" it will terminate instance here as well + `shutdown -h now` ]; try { let params = { - ImageId : core.getInput("aws-ami"), - InstanceType : ec2type, - InstanceInitiatedShutdownBehavior : "terminate", - UserData : Buffer.from(setup_github_actions_runner.join('\n')) - .toString('base64'), - MinCount : 1, - MaxCount : 1, - TagSpecifications : [ { - ResourceType : "instance", - Tags : [ {Key : "Label", Value : label} ] - } ] + ImageId: core.getInput("aws-ami"), + InstanceType: ec2type, + UserData: Buffer.from(setup_github_actions_runner.join('\n')).toString('base64'), + MinCount: 1, + MaxCount: 1, + InstanceInitiatedShutdownBehavior: "terminate", + TagSpecifications: [ + { ResourceType: "instance", Tags: [ {Key: "Label", Value: label} ] } + ] }; - if (spot) - params.InstanceMarketOptions = {MarketType : "spot"}; + if (spot) params.InstanceMarketOptions = { MarketType: "spot" }; if (ec2disk) { const items = ec2disk.split(':'); - params.BlockDeviceMappings = - [ {DeviceName : items[0], Ebs : {VolumeSize : items[1]}} ]; + params.BlockDeviceMappings = [ {DeviceName: items[0], Ebs: {VolumeSize: items[1]}} ]; } const result = await ec2.runInstances(params).promise(); ec2id = result.Instances[0].InstanceId; @@ -98,8 +92,7 @@ async function start(label) { } } // we already created instance and do not need to iterate these loops - if (ec2id) - break; + if (ec2id) break; } if (last_error) { core.error(`Error creating AWS EC2 instance with ${label} label`); @@ -109,9 +102,9 @@ async function start(label) { // wait untill instance will be found running before continuing (spot instance // can be created but never run and will be in pending state untill // termination) - let p = ec2.waitFor("instanceRunning", - {Filters : [ {Name : "tag:Label", Values : [ label ]} ]}) - .promise(); + let p = ec2.waitFor("instanceRunning", { + Filters: [ { Name: "tag:Label", Values: [ label ] } ] + }).promise(); for (let i = 0; i < 2; i++) { p = p.catch(function() { core.warning(`Error searching for running AWS EC2 instance ${ec2id} with ${label} label. Will retry.`); @@ -136,11 +129,9 @@ async function stop(label) { // find AWS EC2 instances with tag label let instances; try { - instances = - await ec2 - .describeInstances( - {Filters : [ {Name : "tag:Label", Values : [ label ]} ]}) - .promise(); + instances = await ec2.describeInstances({ + Filters: [ { Name: "tag:Label", Values: [ label ] } ] + }).promise(); core.info(`Searched for AWS EC2 instance with label ${label}`); } catch (error) { core.error(`Error searching for AWS EC2 instance with label ${label}`); @@ -152,8 +143,7 @@ async function stop(label) { for (const reservation of instances.Reservations) { for (const instance of reservation.Instances) { try { - await ec2.terminateInstances({InstanceIds : [ instance.InstanceId ]}) - .promise(); + await ec2.terminateInstances({ InstanceIds: [ instance.InstanceId ] }).promise(); core.info(`Terminated AWS EC2 instance ${instance.InstanceId} with label ${label}`); } catch (error) { core.error(`Error terminating AWS EC2 instance ${instance.InstanceId} with label ${label}`); @@ -182,10 +172,8 @@ async function stop(label) { label_found = true; break; } - if (!label_found) - continue; - let p = - octokit.request(`DELETE /repos/${repo}/actions/runners/${runner.id}`); + if (!label_found) continue; + let p = octokit.request(`DELETE /repos/${repo}/actions/runners/${runner.id}`); // retry deletion up to 5 times (with 10 seconds delay) sincec Github can // not remove runners still marked as active (with running job) for (let i = 0; i < 5; i++) { @@ -201,17 +189,16 @@ async function stop(label) { }); } - if (last_error) - throw last_error; + if (last_error) throw last_error; } (async function() { try { // provide AWS credentials AWS.config.update({ - accessKeyId : core.getInput("AWS_ACCESS_KEY"), - secretAccessKey : core.getInput("AWS_SECRET_KEY"), - region : core.getInput("aws-region") + accessKeyId: core.getInput("AWS_ACCESS_KEY"), + secretAccessKey: core.getInput("AWS_SECRET_KEY"), + region: core.getInput("aws-region") }); // mode is start or stop const mode = core.getInput("mode"); From 1be24bf59c4012a88d4a8c06049cb39f56bfcef9 Mon Sep 17 00:00:00 2001 From: apstasen Date: Mon, 25 Jul 2022 14:16:33 -0700 Subject: [PATCH 15/50] Removed not needed part of comment --- devops/actions/aws-ec2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml index 5f5684a5085d7..eda7a343e39cb 100644 --- a/devops/actions/aws-ec2/action.yml +++ b/devops/actions/aws-ec2/action.yml @@ -25,7 +25,7 @@ inputs: # sudo -s # apt-get update # curl -fsSL https://get.docker.com -o /tmp/get-docker.sh - # sh /tmp/get-docker.sh # or "yum install -y docker" for Amazon Linux or RHEL/CentOS + # sh /tmp/get-docker.sh # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner # sync; shutdown -h now From 8f0c52260dc2f9695020a9e085c8e4cb9e684bc3 Mon Sep 17 00:00:00 2001 From: apstasen Date: Wed, 3 Aug 2022 19:14:27 -0700 Subject: [PATCH 16/50] Added EOL --- .github/workflows/sycl_linux_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index e8ad362f288b0..0125e107e6793 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -267,4 +267,4 @@ aws-stop-matrix: mode: stop GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} - AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} \ No newline at end of file + AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} From aa13988ce59c71fa1506c51a3bde02fc4286328d Mon Sep 17 00:00:00 2001 From: apstasen Date: Wed, 3 Aug 2022 19:17:36 -0700 Subject: [PATCH 17/50] Fixed indent --- .github/workflows/sycl_linux_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 0125e107e6793..12bf6a2dde4d6 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -241,7 +241,7 @@ jobs: results_name_suffix: ${{ matrix.config }}_${{ inputs.build_artifact_suffix }} cmake_args: '${{ matrix.cmake_args }} ${{ inputs.lts_cmake_extra_args }}' -aws-stop-matrix: + aws-stop-matrix: name: Stop AWS Matrix needs: [ aws-start-matrix, llvm_test_suite ] if: ${{ always() && inputs.lts_matrix != '' }} From 50855fa526a54e237ff6d950fef74fe63283a2ed Mon Sep 17 00:00:00 2001 From: Pavel V Chupin Date: Thu, 4 Aug 2022 11:07:08 -0700 Subject: [PATCH 18/50] Remove trailing spaces --- .github/workflows/sycl_linux_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 12bf6a2dde4d6..7b5d6184a3a43 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -191,7 +191,7 @@ jobs: aws-type: ${{ matrix.aws-type }} aws-disk: ${{ matrix.aws-disk }} one-job: ${{ matrix.one-job }} - + llvm_test_suite: needs: [build, aws-start-matrix] if: ${{ inputs.lts_matrix != '' }} From 284fc39346b538c3850668273290c46acfcfa5d2 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Thu, 4 Aug 2022 15:59:22 -0700 Subject: [PATCH 19/50] Moved uniq into sycl_resolve_test_matrix.yml and removed max-parallel for aws-start-matrix/aws-stop-matrix --- .github/workflows/sycl_linux_build_and_test.yml | 7 ------- .github/workflows/sycl_resolve_test_matrix.yml | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 7b5d6184a3a43..90eb7f4a510ce 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -52,11 +52,6 @@ on: type: number required: false default: 4 - uniq: - description: Unique string to name dynamic runners in AWS - type: string - required: false - default: ${{ github.run_id }}-${{ github.run_attempt }} jobs: build: @@ -166,7 +161,6 @@ jobs: if: ${{ inputs.lts_matrix != '' }} strategy: fail-fast: false - max-parallel: ${{ inputs.max_parallel }} matrix: include: ${{ fromJSON(inputs.lts_matrix) }} runs-on: ubuntu-latest @@ -247,7 +241,6 @@ jobs: if: ${{ always() && inputs.lts_matrix != '' }} strategy: fail-fast: false - max-parallel: ${{ inputs.max_parallel }} matrix: include: ${{ fromJSON(inputs.lts_matrix) }} runs-on: ubuntu-latest diff --git a/.github/workflows/sycl_resolve_test_matrix.yml b/.github/workflows/sycl_resolve_test_matrix.yml index 6d20fe9dedbf7..4b67cb8b1cd9b 100644 --- a/.github/workflows/sycl_resolve_test_matrix.yml +++ b/.github/workflows/sycl_resolve_test_matrix.yml @@ -19,6 +19,11 @@ on: type: string required: true default: "" + uniq: + description: Unique string to name dynamic runners in AWS + type: string + required: false + default: ${{ github.run_id }}-${{ github.run_attempt }} outputs: lts_matrix: description: "Generated Matrix" From f76119969381e59126b851b492e0dde8b1636378 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 15:25:50 -0700 Subject: [PATCH 20/50] Do not create dummy aws start/stop runners --- .../workflows/sycl_linux_build_and_test.yml | 18 ++++++++---------- .github/workflows/sycl_post_commit.yml | 1 + .github/workflows/sycl_precommit.yml | 1 + .github/workflows/sycl_resolve_test_matrix.yml | 3 +++ devops/scripts/generate_test_matrix.js | 9 +++++++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 90eb7f4a510ce..df17409969ca8 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -40,6 +40,10 @@ on: type: string required: false default: "" + lts_aws_matrix: + type: string + required: false + default: "" lts_cmake_extra_args: type: string required: false @@ -158,22 +162,19 @@ jobs: aws-start-matrix: name: Start AWS Matrix needs: build - if: ${{ inputs.lts_matrix != '' }} + if: ${{ inputs.lts_aws_matrix != '' }} strategy: fail-fast: false matrix: - include: ${{ fromJSON(inputs.lts_matrix) }} + include: ${{ fromJSON(inputs.lts_aws_matrix) }} runs-on: ubuntu-latest environment: aws steps: - uses: actions/checkout@v3 - if: ${{ matrix.aws-type }} with: path: llvm - run: npm install ./llvm/devops/actions/aws-ec2 - if: ${{ matrix.aws-type }} - name: Start AWS EC2 runner - if: ${{ matrix.aws-type }} uses: ./llvm/devops/actions/aws-ec2 with: label: ${{ matrix.runs-on }} @@ -238,22 +239,19 @@ jobs: aws-stop-matrix: name: Stop AWS Matrix needs: [ aws-start-matrix, llvm_test_suite ] - if: ${{ always() && inputs.lts_matrix != '' }} + if: ${{ always() && inputs.lts_aws_matrix != '' }} strategy: fail-fast: false matrix: - include: ${{ fromJSON(inputs.lts_matrix) }} + include: ${{ fromJSON(inputs.lts_aws_matrix) }} runs-on: ubuntu-latest environment: aws steps: - uses: actions/checkout@v3 - if: ${{ matrix.aws-type }} with: path: llvm - run: npm install ./llvm/devops/actions/aws-ec2 - if: ${{ matrix.aws-type }} - name: Stop AWS EC2 runner - if: ${{ matrix.aws-type }} uses: ./llvm/devops/actions/aws-ec2 with: label: ${{ matrix.runs-on }} diff --git a/.github/workflows/sycl_post_commit.yml b/.github/workflows/sycl_post_commit.yml index 04f335fe030b9..cef878bb8331d 100644 --- a/.github/workflows/sycl_post_commit.yml +++ b/.github/workflows/sycl_post_commit.yml @@ -26,6 +26,7 @@ jobs: build_cache_root: "/__w/llvm" build_artifact_suffix: "post_commit" lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }} + lts_aws_matrix: ${{ needs.resolve_matrix.outputs.lts_aws_matrix }} linux_no_assert: name: Linux (no assert) uses: ./.github/workflows/sycl_linux_build_and_test.yml diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index a8e3c84373ac2..9ff053343441f 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -50,3 +50,4 @@ jobs: build_artifact_suffix: "default" build_cache_suffix: "default" lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }} + lts_aws_matrix: ${{ needs.resolve_matrix.outputs.lts_aws_matrix }} diff --git a/.github/workflows/sycl_resolve_test_matrix.yml b/.github/workflows/sycl_resolve_test_matrix.yml index 4b67cb8b1cd9b..087709f65954a 100644 --- a/.github/workflows/sycl_resolve_test_matrix.yml +++ b/.github/workflows/sycl_resolve_test_matrix.yml @@ -28,6 +28,9 @@ on: lts_matrix: description: "Generated Matrix" value: ${{ jobs.resolve_matrix.outputs.lts_matrix }} + lts_aws_matrix: + description: "Generated Matrix AWS subset" + value: ${{ jobs.resolve_matrix.outputs.lts_ats_matrix }} jobs: resolve_matrix: name: Resolve Test Matrix diff --git a/devops/scripts/generate_test_matrix.js b/devops/scripts/generate_test_matrix.js index dd6cdafcb689d..154af1eb49240 100644 --- a/devops/scripts/generate_test_matrix.js +++ b/devops/scripts/generate_test_matrix.js @@ -27,6 +27,7 @@ module.exports = ({core, process}) => { const ltsConfigs = inputs.lts_config.split(';'); const enabledLTSConfigs = []; + const enabledLTSAWSConfigs = []; testConfigs.lts.forEach(v => { if (ltsConfigs.includes(v.config)) { @@ -44,22 +45,30 @@ module.exports = ({core, process}) => { v["env"] = {}; } enabledLTSConfigs.push(v); + if (v["aws-type"]) enabledLTSAWSConfigs.push(v); } }); let ltsString = JSON.stringify(enabledLTSConfigs); + let ltsAWSString = JSON.stringify(enabledLTSAWSConfigs); console.log(ltsString); + console.log(ltsAWSString) for (let [key, value] of Object.entries(inputs)) { ltsString = ltsString.replaceAll("${{ inputs." + key + " }}", value); + ltsAWSString = ltsAWSString.replaceAll("${{ inputs." + key + " }}", value); } if (needsDrivers) { ltsString = ltsString.replaceAll( "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest", "ghcr.io/intel/llvm/ubuntu2004_base:latest"); + ltsAWSString = ltsAWSString.replaceAll( + "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest", + "ghcr.io/intel/llvm/ubuntu2004_base:latest"); } core.setOutput('lts_matrix', ltsString); + core.setOutput('lts_aws_matrix', ltsAWSString); } }); } From 2548f9d68e0447640a119ab96802ba4341c4822a Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 15:28:52 -0700 Subject: [PATCH 21/50] Fixed indent --- .github/workflows/sycl_resolve_test_matrix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_resolve_test_matrix.yml b/.github/workflows/sycl_resolve_test_matrix.yml index 087709f65954a..7fc02c584b8e6 100644 --- a/.github/workflows/sycl_resolve_test_matrix.yml +++ b/.github/workflows/sycl_resolve_test_matrix.yml @@ -28,7 +28,7 @@ on: lts_matrix: description: "Generated Matrix" value: ${{ jobs.resolve_matrix.outputs.lts_matrix }} - lts_aws_matrix: + lts_aws_matrix: description: "Generated Matrix AWS subset" value: ${{ jobs.resolve_matrix.outputs.lts_ats_matrix }} jobs: From 9c883b55f9d9e80638cb11491603d5ee0bd9ea8a Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 16:34:51 -0700 Subject: [PATCH 22/50] Start all AWS instances on one job to avoid waiting for parallel jobs --- .../workflows/sycl_linux_build_and_test.yml | 24 +--- .github/workflows/sycl_post_commit.yml | 1 - .github/workflows/sycl_precommit.yml | 1 - .../workflows/sycl_resolve_test_matrix.yml | 3 - devops/actions/aws-ec2/action.yml | 103 ++++++++---------- devops/actions/aws-ec2/aws-ec2.js | 35 ++++-- devops/actions/aws-ec2/package.json | 2 +- devops/scripts/generate_test_matrix.js | 9 -- 8 files changed, 75 insertions(+), 103 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index df17409969ca8..b03c35ba3981d 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -40,10 +40,6 @@ on: type: string required: false default: "" - lts_aws_matrix: - type: string - required: false - default: "" lts_cmake_extra_args: type: string required: false @@ -162,11 +158,6 @@ jobs: aws-start-matrix: name: Start AWS Matrix needs: build - if: ${{ inputs.lts_aws_matrix != '' }} - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(inputs.lts_aws_matrix) }} runs-on: ubuntu-latest environment: aws steps: @@ -177,15 +168,10 @@ jobs: - name: Start AWS EC2 runner uses: ./llvm/devops/actions/aws-ec2 with: - label: ${{ matrix.runs-on }} + runs-on-list: ${{ inputs.lts_matrix }} GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} - aws-ami: ${{ matrix.aws-ami }} - aws-spot: ${{ matrix.aws-spot }} - aws-type: ${{ matrix.aws-type }} - aws-disk: ${{ matrix.aws-disk }} - one-job: ${{ matrix.one-job }} llvm_test_suite: needs: [build, aws-start-matrix] @@ -239,11 +225,7 @@ jobs: aws-stop-matrix: name: Stop AWS Matrix needs: [ aws-start-matrix, llvm_test_suite ] - if: ${{ always() && inputs.lts_aws_matrix != '' }} - strategy: - fail-fast: false - matrix: - include: ${{ fromJSON(inputs.lts_aws_matrix) }} + if: ${{ always() }} runs-on: ubuntu-latest environment: aws steps: @@ -254,7 +236,7 @@ jobs: - name: Stop AWS EC2 runner uses: ./llvm/devops/actions/aws-ec2 with: - label: ${{ matrix.runs-on }} + runs-on-list: ${{ inputs.lts_matrix }} mode: stop GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} diff --git a/.github/workflows/sycl_post_commit.yml b/.github/workflows/sycl_post_commit.yml index cef878bb8331d..04f335fe030b9 100644 --- a/.github/workflows/sycl_post_commit.yml +++ b/.github/workflows/sycl_post_commit.yml @@ -26,7 +26,6 @@ jobs: build_cache_root: "/__w/llvm" build_artifact_suffix: "post_commit" lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }} - lts_aws_matrix: ${{ needs.resolve_matrix.outputs.lts_aws_matrix }} linux_no_assert: name: Linux (no assert) uses: ./.github/workflows/sycl_linux_build_and_test.yml diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index 9ff053343441f..a8e3c84373ac2 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -50,4 +50,3 @@ jobs: build_artifact_suffix: "default" build_cache_suffix: "default" lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }} - lts_aws_matrix: ${{ needs.resolve_matrix.outputs.lts_aws_matrix }} diff --git a/.github/workflows/sycl_resolve_test_matrix.yml b/.github/workflows/sycl_resolve_test_matrix.yml index 7fc02c584b8e6..4b67cb8b1cd9b 100644 --- a/.github/workflows/sycl_resolve_test_matrix.yml +++ b/.github/workflows/sycl_resolve_test_matrix.yml @@ -28,9 +28,6 @@ on: lts_matrix: description: "Generated Matrix" value: ${{ jobs.resolve_matrix.outputs.lts_matrix }} - lts_aws_matrix: - description: "Generated Matrix AWS subset" - value: ${{ jobs.resolve_matrix.outputs.lts_ats_matrix }} jobs: resolve_matrix: name: Resolve Test Matrix diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml index eda7a343e39cb..be9f3f0465c5a 100644 --- a/devops/actions/aws-ec2/action.yml +++ b/devops/actions/aws-ec2/action.yml @@ -1,78 +1,65 @@ name: aws-ec2 -description: Starts AWS EC2 instance with Github actions runner agent in it +description: Start AWS EC2 instances with Github actions runner agent in it inputs: - label: - description: "Name of the unique label assigned to the runner used as 'runs-on' property for the following jobs" + runs-on-list: + description: "JSON string with array of objects with aws-type, runs-on, aws-ami, aws-spot, aws-disk, aws-timebomb, one-job properties" required: true + # aws-type: AWS EC2 instance type. This property must be present if you want to trigger AWS EC2 instance start/stop. + # runs-on: Name of the unique label assigned to the runner used as 'runs-on' property for the following jobs. Mandatory presence required. + # aws-ami: AWS AMI id. Makes sense only for start mode. Default "ami-0966bccbb521ccb24". + + # ami-0966bccbb521ccb24: Ubuntu 22.04 (ami-02f3416038bdb17fb with /dev/sda1 disk) with docker installed and gh_runner (1001) like this: + # sudo -s + # apt-get update + # curl -fsSL https://get.docker.com -o /tmp/get-docker.sh + # sh /tmp/get-docker.sh + # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner + # sync; shutdown -h now + + # ami-02ec0f344128253f9: Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver (ami-06bf0a3f89fe08f0a with /dev/xvda disk) with docker installed and gh_runner (1001) like this: + # sudo -s + # yum update -y + # amazon-linux-extras install docker + # sudo systemctl --now enable docker + # distribution=$(. /etc/os-release;echo $ID$VERSION_ID) && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo + # yum-config-manager --disable amzn2-graphics; yum clean expire-cache; yum install -y nvidia-docker2; systemctl restart docker + # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner + # sync; shutdown -h now + + # ami-0ccda708841dde988: Amazon Linux 2 AMI with AMD Radeon Pro Driver (ami-0bb1072e787242eb6 with /dev/xvda disk) with docker installed and gh_runner (1001) like this: + # sudo -s + # amazon-linux-extras install docker + # sudo systemctl --now enable docker + # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner + # sync; shutdown -h now + + # aws-spot: Enable usage of spot instances to save money (less reliable). Makes sense only for start mode. Default true. + # aws-disk: AWS EC2 instance AMI specific disk device path and size in GB (8 by default). Makes sense only for start mode. Default "/dev/sda1:16". + # aws-timebomp: AWS EC2 instance maximum live time. Makes sense only for start mode. Default "1h". + # one-job: Will terminate AWS EC2 instance after one job (not waiting for stop job) saving money. Makes sense only for start mode. Default true. + + mode: + description: "Mode of operation: start or stop" + required: false + default: start + GH_PERSONAL_ACCESS_TOKEN: description: "Github personal access token with repo permission" required: true + AWS_ACCESS_KEY: description: "AWS access id" required: true + AWS_SECRET_KEY: description: "AWS access secret key" required: true + aws-region: description: "AWS EC2 region" required: false default: "us-east-2" # Ohio - aws-ami: - description: "AWS AMI id. Makes sense only for start mode" - required: false - default: "ami-0966bccbb521ccb24" # Ubuntu 22.04 (ami-02f3416038bdb17fb with /dev/sda1 disk) with docker installed and gh_runner (1001) like this: - # sudo -s - # apt-get update - # curl -fsSL https://get.docker.com -o /tmp/get-docker.sh - # sh /tmp/get-docker.sh - # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner - # sync; shutdown -h now - - # "ami-02ec0f344128253f9" # Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver (ami-06bf0a3f89fe08f0a with /dev/xvda disk) with docker installed and gh_runner (1001) like this: - # sudo -s - # yum update -y - # amazon-linux-extras install docker - # sudo systemctl --now enable docker - # distribution=$(. /etc/os-release;echo $ID$VERSION_ID) && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo - # yum-config-manager --disable amzn2-graphics; yum clean expire-cache; yum install -y nvidia-docker2; systemctl restart docker - # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner - # sync; shutdown -h now - - # "ami-0ccda708841dde988" # Amazon Linux 2 AMI with AMD Radeon Pro Driver (ami-0bb1072e787242eb6 with /dev/xvda disk) with docker installed and gh_runner (1001) like this: - # sudo -s - # amazon-linux-extras install docker - # sudo systemctl --now enable docker - # groupadd -g 1001 gh_runner; useradd gh_runner -u 1001 -g 1001 -m -s /bin/bash; usermod -aG docker gh_runner; usermod -aG video gh_runner - # sync; shutdown -h now - - aws-type: - description: "AWS EC2 instance type. Makes sense only for start mode" - required: false - default: "[ \"t2.micro\" ]" - # "[ \"g4dn.2xlarge\" ]": 1 NVIDIA T4 GPU, 8 CPU, 32 GB RAM - # "[ \"g4ad.4xlarge\" ]": 1 AMD Radeon Pro V520 GPU, 16 CPU, 64 GB RAM (overloaded pool so do not use spot here now) - aws-spot: - description: "Enable usage of spot instances to save money (less reliable). Makes sense only for start mode" - requred: false - default: true - aws-disk: - description: "AWS EC2 instance AMI specific disk device path and size in GB (8 by default). Makes sense only for start mode" - required: false - default: "/dev/sda1:16" # Github actions container engine will fail with lack of disk space for 8GB - aws-timebomb: - description: "AWS EC2 instance maximum live time. Makes sense only for start mode" - required: false - default: 1h - - mode: - description: "Mode of operation: start or stop" - required: false - default: start - one-job: - description: "Will terminate AWS EC2 instance after one job (not waiting for stop job) saving money" - required: false - default: true runs: using: node12 diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 0d94c7de78ea1..bc05edcf4e665 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -28,18 +28,20 @@ function rejectDelay(reason) { } // starts AWS EC2 instance that will spawn Github runner for a given label -async function start(label) { +async function start(param_type, param_label, param_ami, param_spot, param_disk, param_timebomb, param_onejob) { const ec2 = new AWS.EC2(); // we better keep GH_PERSONAL_ACCESS_TOKEN here and do not pass it to AWS EC2 // userscript so it will keep secret const reg_token = await getGithubRegToken(); - const timebomb = core.getInput("aws-timebomb"); - const raw_ec2types = JSON.parse(core.getInput("aws-type")); + const raw_ec2types = JSON.parse(param_type); const ec2types = typeof raw_ec2types == "string" ? [ raw_ec2types ] : raw_ec2types; - const ec2disk = core.getInput("aws-disk"); - const ec2spot = core.getInput("aws-spot") != "false"; - const onejob = core.getInput("one-job") != "false"; + const label = param_label; + const ec2ami = typeof param_ami !== 'undefined' ? param_ami : "ami-0966bccbb521ccb24"; + const ec2spot = typeof param_spot !== 'undefined' ? param_spot : true; + const ec2disk = typeof param_disk !== 'undefined' ? param_disk : "/dev/sda1:16"; + const timebomb = typeof param_timebomb !== 'undefined' ? param_timebomb : "1h"; + const onejob = typeof param_onejob !== 'undefined' ? param_onejob : true; // ephemeral runner will exit after one job so we will terminate instance sooner const ephemeral_str = onejob ? "--ephemeral" : ""; @@ -67,7 +69,7 @@ async function start(label) { ]; try { let params = { - ImageId: core.getInput("aws-ami"), + ImageId: ec2ami, InstanceType: ec2type, UserData: Buffer.from(setup_github_actions_runner.join('\n')).toString('base64'), MinCount: 1, @@ -202,12 +204,27 @@ async function stop(label) { }); // mode is start or stop const mode = core.getInput("mode"); + const runs_on_list = JSON.parse(core.getInput("runs-on-list")); + // label used to indentify AWS EC2 instances and Github runners const label = core.getInput("label"); if (mode == "start") { - await start(label); + for (let c of runs_on_list) { + if (c["aws-type"]) await start(c["aws-type"], c["runs-on"], c["aws-ami"], c["aws-spot"], c["aws-disk"], c["aws-timebomb"], c["one-job"]); + } } else if (mode == "stop") { - await stop(label); + // last error that will be thrown in case something will break here + let last_error; + for (let c of runs_on_list) { + const label = c["runs-on"]; + try { + if (c["aws-type"]) await stop(label); + } catch (error) { + core.error(`Error removing runner with ${label}`); + last_error = error; + } + } + if (last_error) throw last_error; } } catch (error) { core.error(error); diff --git a/devops/actions/aws-ec2/package.json b/devops/actions/aws-ec2/package.json index 8649a3ce2798a..449bb9d8ab2c5 100644 --- a/devops/actions/aws-ec2/package.json +++ b/devops/actions/aws-ec2/package.json @@ -1,6 +1,6 @@ { "name": "aws-ec2", - "description": "Starts AWS EC2 spot instance with Github actions runner agent in it", + "description": "Start AWS EC2 spot instances with Github actions runner agent in it", "dependencies": { "@actions/core": "^1.9.0", "@actions/github": "^5.0.3", diff --git a/devops/scripts/generate_test_matrix.js b/devops/scripts/generate_test_matrix.js index 154af1eb49240..dd6cdafcb689d 100644 --- a/devops/scripts/generate_test_matrix.js +++ b/devops/scripts/generate_test_matrix.js @@ -27,7 +27,6 @@ module.exports = ({core, process}) => { const ltsConfigs = inputs.lts_config.split(';'); const enabledLTSConfigs = []; - const enabledLTSAWSConfigs = []; testConfigs.lts.forEach(v => { if (ltsConfigs.includes(v.config)) { @@ -45,30 +44,22 @@ module.exports = ({core, process}) => { v["env"] = {}; } enabledLTSConfigs.push(v); - if (v["aws-type"]) enabledLTSAWSConfigs.push(v); } }); let ltsString = JSON.stringify(enabledLTSConfigs); - let ltsAWSString = JSON.stringify(enabledLTSAWSConfigs); console.log(ltsString); - console.log(ltsAWSString) for (let [key, value] of Object.entries(inputs)) { ltsString = ltsString.replaceAll("${{ inputs." + key + " }}", value); - ltsAWSString = ltsAWSString.replaceAll("${{ inputs." + key + " }}", value); } if (needsDrivers) { ltsString = ltsString.replaceAll( "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest", "ghcr.io/intel/llvm/ubuntu2004_base:latest"); - ltsAWSString = ltsAWSString.replaceAll( - "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest", - "ghcr.io/intel/llvm/ubuntu2004_base:latest"); } core.setOutput('lts_matrix', ltsString); - core.setOutput('lts_aws_matrix', ltsAWSString); } }); } From 97cef06f688a26d0b3e2220a48e69e778ae8ad67 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 16:37:37 -0700 Subject: [PATCH 23/50] Added extra debug info --- devops/actions/aws-ec2/aws-ec2.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index bc05edcf4e665..b09426ebf3d9a 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -210,7 +210,9 @@ async function stop(label) { const label = core.getInput("label"); if (mode == "start") { for (let c of runs_on_list) { - if (c["aws-type"]) await start(c["aws-type"], c["runs-on"], c["aws-ami"], c["aws-spot"], c["aws-disk"], c["aws-timebomb"], c["one-job"]); + const label = c["runs-on"]; + if (c["aws-type"]) await start(c["aws-type"], label, c["aws-ami"], c["aws-spot"], c["aws-disk"], c["aws-timebomb"], c["one-job"]); + else core.info(`Skipping ${label} config`); } } else if (mode == "stop") { // last error that will be thrown in case something will break here From e1a84f8e5c651fa33e40e524e361934305c6fb3f Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 17:32:19 -0700 Subject: [PATCH 24/50] Fixed handling empty/undefined runs-on-list --- devops/actions/aws-ec2/aws-ec2.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index b09426ebf3d9a..cf964d0ce21ad 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -204,10 +204,8 @@ async function stop(label) { }); // mode is start or stop const mode = core.getInput("mode"); - const runs_on_list = JSON.parse(core.getInput("runs-on-list")); + const runs_on_list = core.getInput("runs-on-list") ? JSON.parse(core.getInput("runs-on-list")) : []; - // label used to indentify AWS EC2 instances and Github runners - const label = core.getInput("label"); if (mode == "start") { for (let c of runs_on_list) { const label = c["runs-on"]; From 7befe3335a040b49c0bca6f99eebd7bed5fbe089 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 18:24:56 -0700 Subject: [PATCH 25/50] Added extra debug message --- devops/actions/aws-ec2/aws-ec2.js | 1 + 1 file changed, 1 insertion(+) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index cf964d0ce21ad..6ca899a6928b5 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -219,6 +219,7 @@ async function stop(label) { const label = c["runs-on"]; try { if (c["aws-type"]) await stop(label); + else core.info(`Skipping ${label} config`); } catch (error) { core.error(`Error removing runner with ${label}`); last_error = error; From 29fc79d88330786921b81e1b5ff15c91c5474336 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 18:32:41 -0700 Subject: [PATCH 26/50] Fix nightly testing --- .github/workflows/sycl_nightly.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sycl_nightly.yml b/.github/workflows/sycl_nightly.yml index 38f53c5b4e605..668317627c1a3 100644 --- a/.github/workflows/sycl_nightly.yml +++ b/.github/workflows/sycl_nightly.yml @@ -18,7 +18,7 @@ jobs: build_cache_root: "/__w/" build_artifact_suffix: default build_configure_extra_args: '' - lts_config: "ocl_gen9;ocl_x64" + lts_matrix: "ocl_gen9;ocl_x64" ubuntu2004_opaque_pointers_build_test: if: github.repository == 'intel/llvm' @@ -29,7 +29,7 @@ jobs: build_cache_suffix: opaque_pointers build_artifact_suffix: opaque_pointers build_configure_extra_args: "--hip --cuda --enable-esimd-emulator --cmake-opt=-DDPCPP_ENABLE_OPAQUE_POINTERS=TRUE" - lts_config: "ocl_gen9;ocl_x64" + lts_matrix: "ocl_gen9;ocl_x64" windows_default: name: Windows From 5777010accd21f052d3bc50e95644325902a79a3 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 21:26:31 -0700 Subject: [PATCH 27/50] Revert "Fix nightly testing" This reverts commit 29fc79d88330786921b81e1b5ff15c91c5474336. --- .github/workflows/sycl_nightly.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sycl_nightly.yml b/.github/workflows/sycl_nightly.yml index 668317627c1a3..38f53c5b4e605 100644 --- a/.github/workflows/sycl_nightly.yml +++ b/.github/workflows/sycl_nightly.yml @@ -18,7 +18,7 @@ jobs: build_cache_root: "/__w/" build_artifact_suffix: default build_configure_extra_args: '' - lts_matrix: "ocl_gen9;ocl_x64" + lts_config: "ocl_gen9;ocl_x64" ubuntu2004_opaque_pointers_build_test: if: github.repository == 'intel/llvm' @@ -29,7 +29,7 @@ jobs: build_cache_suffix: opaque_pointers build_artifact_suffix: opaque_pointers build_configure_extra_args: "--hip --cuda --enable-esimd-emulator --cmake-opt=-DDPCPP_ENABLE_OPAQUE_POINTERS=TRUE" - lts_matrix: "ocl_gen9;ocl_x64" + lts_config: "ocl_gen9;ocl_x64" windows_default: name: Windows From 1f4a076e4c43bbebb34ee17082e092f61f155365 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 21:31:49 -0700 Subject: [PATCH 28/50] Do not parse aws type JSON anymore and pass it directly --- devops/actions/aws-ec2/aws-ec2.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 6ca899a6928b5..6592029632a0f 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -34,8 +34,7 @@ async function start(param_type, param_label, param_ami, param_spot, param_disk, // we better keep GH_PERSONAL_ACCESS_TOKEN here and do not pass it to AWS EC2 // userscript so it will keep secret const reg_token = await getGithubRegToken(); - const raw_ec2types = JSON.parse(param_type); - const ec2types = typeof raw_ec2types == "string" ? [ raw_ec2types ] : raw_ec2types; + const ec2types = typeof param_type == "string" ? [ param_type ] : param_type; const label = param_label; const ec2ami = typeof param_ami !== 'undefined' ? param_ami : "ami-0966bccbb521ccb24"; const ec2spot = typeof param_spot !== 'undefined' ? param_spot : true; From d5d187e2cb6a0704676d9787c3477f4e358cfa1d Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Fri, 5 Aug 2022 21:32:12 -0700 Subject: [PATCH 29/50] Add test AWS usage --- devops/test_configs.json | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/devops/test_configs.json b/devops/test_configs.json index a66d106163b39..ca5df15f2835b 100644 --- a/devops/test_configs.json +++ b/devops/test_configs.json @@ -51,10 +51,11 @@ { "config": "hip_amdgpu", "name": "HIP AMDGPU LLVM Test Suite", - "runs-on": [ - "Linux", - "amdgpu" - ], + "runs-on": "aws-amdgpu_${{ inputs.uniq }}", + "aws-ami": "ami-0ccda708841dde988", + "aws-type": [ "g4ad.2xlarge", "g4ad.4xlarge" ], + "aws-spot": false, + "aws-disk": "/dev/xvda:64", "image": "${{ inputs.amdgpu_image }}", "container_options": "--device=/dev/dri --device=/dev/kfd", "check_sycl_all": "hip:gpu,host", @@ -63,10 +64,10 @@ { "config": "cuda", "name": "CUDA LLVM Test Suite", - "runs-on": [ - "Linux", - "cuda" - ], + "runs-on": "aws-cuda_${{ inputs.uniq }}", + "aws-ami": "ami-02ec0f344128253f9", + "aws-type": [ "g4dn.2xlarge", "g4dn.4xlarge" ], + "aws-disk": "/dev/xvda:64", "image": "${{ inputs.cuda_image }}", "container_options": "--gpus all", "check_sycl_all": "cuda:gpu,host", From 7b63970260c9add70764fabf0e5a2ba7b73fd45c Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 00:00:25 -0700 Subject: [PATCH 30/50] Adjusted AWS action names --- .github/workflows/sycl_linux_build_and_test.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index b03c35ba3981d..f74afdbc87b72 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -155,8 +155,8 @@ jobs: name: sycl_lit_${{ inputs.build_artifact_suffix }} path: lit.tar.xz - aws-start-matrix: - name: Start AWS Matrix + aws-start: + name: Start AWS needs: build runs-on: ubuntu-latest environment: aws @@ -165,7 +165,7 @@ jobs: with: path: llvm - run: npm install ./llvm/devops/actions/aws-ec2 - - name: Start AWS EC2 runner + - name: Start AWS EC2 runners uses: ./llvm/devops/actions/aws-ec2 with: runs-on-list: ${{ inputs.lts_matrix }} @@ -174,7 +174,7 @@ jobs: AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} llvm_test_suite: - needs: [build, aws-start-matrix] + needs: [build, aws-start] if: ${{ inputs.lts_matrix != '' }} strategy: fail-fast: false @@ -222,9 +222,9 @@ jobs: results_name_suffix: ${{ matrix.config }}_${{ inputs.build_artifact_suffix }} cmake_args: '${{ matrix.cmake_args }} ${{ inputs.lts_cmake_extra_args }}' - aws-stop-matrix: - name: Stop AWS Matrix - needs: [ aws-start-matrix, llvm_test_suite ] + aws-stop: + name: Stop AWS + needs: [ aws-start, llvm_test_suite ] if: ${{ always() }} runs-on: ubuntu-latest environment: aws @@ -233,7 +233,7 @@ jobs: with: path: llvm - run: npm install ./llvm/devops/actions/aws-ec2 - - name: Stop AWS EC2 runner + - name: Stop AWS EC2 runners uses: ./llvm/devops/actions/aws-ec2 with: runs-on-list: ${{ inputs.lts_matrix }} From 6cf578148e6ea9469d270f324581a97423d6f032 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 00:39:00 -0700 Subject: [PATCH 31/50] Do not get Github reg token more than once --- devops/actions/aws-ec2/aws-ec2.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 6592029632a0f..d5d8d2ac3782d 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -27,13 +27,15 @@ function rejectDelay(reason) { }); } +// we better keep GH_PERSONAL_ACCESS_TOKEN here and do not pass it to AWS EC2 +// userscript so it will keep secret +let reg_token; + // starts AWS EC2 instance that will spawn Github runner for a given label async function start(param_type, param_label, param_ami, param_spot, param_disk, param_timebomb, param_onejob) { const ec2 = new AWS.EC2(); - // we better keep GH_PERSONAL_ACCESS_TOKEN here and do not pass it to AWS EC2 - // userscript so it will keep secret - const reg_token = await getGithubRegToken(); + reg_token = reg_token ? reg_token : await getGithubRegToken(); const ec2types = typeof param_type == "string" ? [ param_type ] : param_type; const label = param_label; const ec2ami = typeof param_ami !== 'undefined' ? param_ami : "ami-0966bccbb521ccb24"; From 3688399155906fdfeb53b902062b8dbb77bb9cb1 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 07:57:35 -0700 Subject: [PATCH 32/50] Added option to understand label from array (of one element) --- devops/actions/aws-ec2/aws-ec2.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index d5d8d2ac3782d..7614ef6440915 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -36,7 +36,7 @@ async function start(param_type, param_label, param_ami, param_spot, param_disk, const ec2 = new AWS.EC2(); reg_token = reg_token ? reg_token : await getGithubRegToken(); - const ec2types = typeof param_type == "string" ? [ param_type ] : param_type; + const ec2types = typeof param_type === 'string' ? [ param_type ] : param_type; const label = param_label; const ec2ami = typeof param_ami !== 'undefined' ? param_ami : "ami-0966bccbb521ccb24"; const ec2spot = typeof param_spot !== 'undefined' ? param_spot : true; @@ -209,7 +209,7 @@ async function stop(label) { if (mode == "start") { for (let c of runs_on_list) { - const label = c["runs-on"]; + const label = typeof c["runs-on"] === 'string' ? c["runs-on"] : c["runs-on"][0]; if (c["aws-type"]) await start(c["aws-type"], label, c["aws-ami"], c["aws-spot"], c["aws-disk"], c["aws-timebomb"], c["one-job"]); else core.info(`Skipping ${label} config`); } @@ -217,7 +217,7 @@ async function stop(label) { // last error that will be thrown in case something will break here let last_error; for (let c of runs_on_list) { - const label = c["runs-on"]; + const label = typeof c["runs-on"] === 'string' ? c["runs-on"] : c["runs-on"][0]; try { if (c["aws-type"]) await stop(label); else core.info(`Skipping ${label} config`); From 513a6a3f295034f732dae3938017996a66764d24 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 08:24:40 -0700 Subject: [PATCH 33/50] Unified label handling code --- devops/actions/aws-ec2/aws-ec2.js | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 7614ef6440915..cd5c65a32ffec 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -36,8 +36,8 @@ async function start(param_type, param_label, param_ami, param_spot, param_disk, const ec2 = new AWS.EC2(); reg_token = reg_token ? reg_token : await getGithubRegToken(); - const ec2types = typeof param_type === 'string' ? [ param_type ] : param_type; - const label = param_label; + const ec2types = typeof param_type === 'string' ? [ param_type ] : param_type; + const label = typeof param_label === 'string' ? param_label : param_label[0]; const ec2ami = typeof param_ami !== 'undefined' ? param_ami : "ami-0966bccbb521ccb24"; const ec2spot = typeof param_spot !== 'undefined' ? param_spot : true; const ec2disk = typeof param_disk !== 'undefined' ? param_disk : "/dev/sda1:16"; @@ -124,11 +124,13 @@ async function start(param_type, param_label, param_ami, param_spot, param_disk, // terminate (completely remove) AWS EC instances (normally one instance) with // given tag label and also remove all Github actions runners (normally one // runner) with that label -async function stop(label) { +async function stop(param_label) { // last error that will be thrown in case something will break here let last_error; const ec2 = new AWS.EC2(); + const label = typeof param_label === 'string' ? param_label : param_label[0]; + // find AWS EC2 instances with tag label let instances; try { @@ -209,20 +211,22 @@ async function stop(label) { if (mode == "start") { for (let c of runs_on_list) { - const label = typeof c["runs-on"] === 'string' ? c["runs-on"] : c["runs-on"][0]; - if (c["aws-type"]) await start(c["aws-type"], label, c["aws-ami"], c["aws-spot"], c["aws-disk"], c["aws-timebomb"], c["one-job"]); - else core.info(`Skipping ${label} config`); + const raw_label = c["runs-on"]; + if (c["aws-type"]) { + await start(c["aws-type"], raw_label, c["aws-ami"], c["aws-spot"], c["aws-disk"], c["aws-timebomb"], c["one-job"]); + } else core.info(`Skipping ${raw_label} config`); } } else if (mode == "stop") { // last error that will be thrown in case something will break here let last_error; for (let c of runs_on_list) { - const label = typeof c["runs-on"] === 'string' ? c["runs-on"] : c["runs-on"][0]; + const raw_label = c["runs-on"]; try { - if (c["aws-type"]) await stop(label); - else core.info(`Skipping ${label} config`); + if (c["aws-type"]) { + await stop(raw_label); + } else core.info(`Skipping ${raw_label} config`); } catch (error) { - core.error(`Error removing runner with ${label}`); + core.error(`Error removing runner with ${raw_label}`); last_error = error; } } From bbf44900c934ea155ba6179e5afb18a26bbdf247 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 08:31:05 -0700 Subject: [PATCH 34/50] Removed problem configs with too generic runner labels --- devops/test_configs.json | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/devops/test_configs.json b/devops/test_configs.json index ca5df15f2835b..95b4de2ad893b 100644 --- a/devops/test_configs.json +++ b/devops/test_configs.json @@ -24,30 +24,6 @@ "check_sycl_all": "opencl:gpu,host", "cmake_args": "" }, - { - "config": "ocl_x64", - "name": "OCL x64 LLVM Test Suite", - "runs-on": [ - "Linux", - "x64" - ], - "image": "${{ inputs.intel_drivers_image }}", - "container_options": "-u 1001", - "check_sycl_all": "opencl:cpu,host", - "cmake_args": "" - }, - { - "config": "esimd_emu", - "name": "ESIMD Emu LLVM Test Suite", - "runs-on": [ - "Linux", - "x64" - ], - "image": "${{ inputs.intel_drivers_image }}", - "container_options": "-u 1001", - "check_sycl_all": "ext_intel_esimd_emulator:gpu", - "cmake_args": "" - }, { "config": "hip_amdgpu", "name": "HIP AMDGPU LLVM Test Suite", From 65f39d5efc8437eb1e2e870ea27d112cca3b1fd3 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 08:32:58 -0700 Subject: [PATCH 35/50] Revert "Removed problem configs with too generic runner labels" This reverts commit bbf44900c934ea155ba6179e5afb18a26bbdf247. --- devops/test_configs.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/devops/test_configs.json b/devops/test_configs.json index 95b4de2ad893b..ca5df15f2835b 100644 --- a/devops/test_configs.json +++ b/devops/test_configs.json @@ -24,6 +24,30 @@ "check_sycl_all": "opencl:gpu,host", "cmake_args": "" }, + { + "config": "ocl_x64", + "name": "OCL x64 LLVM Test Suite", + "runs-on": [ + "Linux", + "x64" + ], + "image": "${{ inputs.intel_drivers_image }}", + "container_options": "-u 1001", + "check_sycl_all": "opencl:cpu,host", + "cmake_args": "" + }, + { + "config": "esimd_emu", + "name": "ESIMD Emu LLVM Test Suite", + "runs-on": [ + "Linux", + "x64" + ], + "image": "${{ inputs.intel_drivers_image }}", + "container_options": "-u 1001", + "check_sycl_all": "ext_intel_esimd_emulator:gpu", + "cmake_args": "" + }, { "config": "hip_amdgpu", "name": "HIP AMDGPU LLVM Test Suite", From 4be70065b7bafdd1d6f9e9318b301db937972fbf Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Sat, 6 Aug 2022 08:34:41 -0700 Subject: [PATCH 36/50] Removed problem configs with too generic runner labels --- .github/workflows/sycl_precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index a8e3c84373ac2..d2860e7f7bbc0 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -34,7 +34,7 @@ jobs: name: Resolve Test Matrix uses: ./.github/workflows/sycl_resolve_test_matrix.yml with: - lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu;cuda" + lts_config: "hip_amdgpu;ocl_gen9;l0_gen9;cuda" linux_default: name: Linux From d3cc81f87af09bbd1d5457136208f47cd585d6e5 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 17:36:39 -0700 Subject: [PATCH 37/50] Resolved conflict and fixed potential issue with temporary AWS CI testing --- .github/workflows/sycl_nightly.yml | 2 ++ devops/actions/aws-ec2/aws-ec2.js | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sycl_nightly.yml b/.github/workflows/sycl_nightly.yml index 38f53c5b4e605..4618f9d221ce3 100644 --- a/.github/workflows/sycl_nightly.yml +++ b/.github/workflows/sycl_nightly.yml @@ -13,6 +13,7 @@ jobs: ubuntu2004_build_test: if: github.repository == 'intel/llvm' uses: ./.github/workflows/sycl_linux_build_and_test.yml + needs: resolve_matrix secrets: inherit with: build_cache_root: "/__w/" @@ -23,6 +24,7 @@ jobs: ubuntu2004_opaque_pointers_build_test: if: github.repository == 'intel/llvm' uses: ./.github/workflows/sycl_linux_build_and_test.yml + needs: resolve_matrix secrets: inherit with: build_cache_root: "/__w/" diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index cd5c65a32ffec..080649e88ff77 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -3,7 +3,8 @@ const github = require('@actions/github'); const AWS = require('aws-sdk'); // shortcut to reference current repo -const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; +//const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; +const repo = `intel/${github.context.repo.repo}`; // get github registration token that allows to register new runner based on // GH_PERSONAL_ACCESS_TOKEN github user api key From 8404e91a948be54524a71a5a65ee792aee3889c2 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 18:25:37 -0700 Subject: [PATCH 38/50] Added more logs --- devops/actions/aws-ec2/aws-ec2.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 080649e88ff77..5c30fcbff737d 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -4,7 +4,7 @@ const AWS = require('aws-sdk'); // shortcut to reference current repo //const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; -const repo = `intel/${github.context.repo.repo}`; +const repo = `intel/llvm`; // get github registration token that allows to register new runner based on // GH_PERSONAL_ACCESS_TOKEN github user api key @@ -12,6 +12,7 @@ async function getGithubRegToken() { const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); try { + core.info("Getting Github Actions Runner registration token"); const response = await octokit.request(`POST /repos/${repo}/actions/runners/registration-token`); core.info("Got Github Actions Runner registration token"); return response.data.token; From c14f4db3805dd18aa5e4d50dd0fa2fb500a4ddfa Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 19:20:30 -0700 Subject: [PATCH 39/50] More logging info --- devops/actions/aws-ec2/action.yml | 2 +- devops/actions/aws-ec2/aws-ec2.js | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/devops/actions/aws-ec2/action.yml b/devops/actions/aws-ec2/action.yml index be9f3f0465c5a..9e2730fdc4107 100644 --- a/devops/actions/aws-ec2/action.yml +++ b/devops/actions/aws-ec2/action.yml @@ -60,7 +60,7 @@ inputs: description: "AWS EC2 region" required: false default: "us-east-2" # Ohio - + runs: using: node12 main: ./aws-ec2.js diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 5c30fcbff737d..5e6536984ec5a 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -9,10 +9,12 @@ const repo = `intel/llvm`; // get github registration token that allows to register new runner based on // GH_PERSONAL_ACCESS_TOKEN github user api key async function getGithubRegToken() { - const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); + core.info("Preparing Github SDK API"); + const token = core.getInput("GH_PERSONAL_ACCESS_TOKEN"); + const octokit = github.getOctokit(token); try { - core.info("Getting Github Actions Runner registration token"); + core.info("Getting Github Actions Runner registration token for ${repo} repo (${token.length})"); const response = await octokit.request(`POST /repos/${repo}/actions/runners/registration-token`); core.info("Got Github Actions Runner registration token"); return response.data.token; @@ -141,7 +143,7 @@ async function stop(param_label) { }).promise(); core.info(`Searched for AWS EC2 instance with label ${label}`); } catch (error) { - core.error(`Error searching for AWS EC2 instance with label ${label}`); + core.error(`Error searching for AWS EC2 instance with label ${label}: ${error}`); last_error = error; } @@ -153,13 +155,14 @@ async function stop(param_label) { await ec2.terminateInstances({ InstanceIds: [ instance.InstanceId ] }).promise(); core.info(`Terminated AWS EC2 instance ${instance.InstanceId} with label ${label}`); } catch (error) { - core.error(`Error terminating AWS EC2 instance ${instance.InstanceId} with label ${label}`); + core.error(`Error terminating AWS EC2 instance ${instance.InstanceId} with label ${label}: ${error}`); last_error = error; } } } // find all Github action runners + core.info("Preparing Github SDK API"); const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); let runners; try { @@ -191,7 +194,7 @@ async function stop(param_label) { p = p.then(function() { core.info(`Removed Github self-hosted runner ${runner.id} with ${label}`); }).catch(function(error) { - core.error(`Error removing Github self-hosted runner ${runner.id} with ${label}`); + core.error(`Error removing Github self-hosted runner ${runner.id} with ${label}: ${error}`); last_error = error; }); } @@ -228,7 +231,7 @@ async function stop(param_label) { await stop(raw_label); } else core.info(`Skipping ${raw_label} config`); } catch (error) { - core.error(`Error removing runner with ${raw_label}`); + core.error(`Error removing runner with ${raw_label}: ${error}`); last_error = error; } } From 7cd213a528caad6e762928798deb11440a2e9b0d Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 19:23:52 -0700 Subject: [PATCH 40/50] More logging --- devops/actions/aws-ec2/aws-ec2.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index 5e6536984ec5a..e8f2ca1926985 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -11,10 +11,11 @@ const repo = `intel/llvm`; async function getGithubRegToken() { core.info("Preparing Github SDK API"); const token = core.getInput("GH_PERSONAL_ACCESS_TOKEN"); + core.info(`${token.length}`); const octokit = github.getOctokit(token); try { - core.info("Getting Github Actions Runner registration token for ${repo} repo (${token.length})"); + core.info(`Getting Github Actions Runner registration token for ${repo} repo`); const response = await octokit.request(`POST /repos/${repo}/actions/runners/registration-token`); core.info("Got Github Actions Runner registration token"); return response.data.token; From 7b595050fc0acb202810d23d31b35d64cb5c64ad Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 19:46:48 -0700 Subject: [PATCH 41/50] Need target repo run context --- .github/workflows/sycl_precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index d2860e7f7bbc0..8c8e15ef922b9 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -1,7 +1,7 @@ name: SYCL on: - pull_request: + pull_request_target: branches: - sycl # Do not run builds if changes are only in the following locations From fd24b2ea6d7f616835d4821649149f2183b96536 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 20:49:38 -0700 Subject: [PATCH 42/50] Revert "Add test AWS usage" This reverts commit d5d187e2cb6a0704676d9787c3477f4e358cfa1d. --- devops/test_configs.json | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/devops/test_configs.json b/devops/test_configs.json index ca5df15f2835b..a66d106163b39 100644 --- a/devops/test_configs.json +++ b/devops/test_configs.json @@ -51,11 +51,10 @@ { "config": "hip_amdgpu", "name": "HIP AMDGPU LLVM Test Suite", - "runs-on": "aws-amdgpu_${{ inputs.uniq }}", - "aws-ami": "ami-0ccda708841dde988", - "aws-type": [ "g4ad.2xlarge", "g4ad.4xlarge" ], - "aws-spot": false, - "aws-disk": "/dev/xvda:64", + "runs-on": [ + "Linux", + "amdgpu" + ], "image": "${{ inputs.amdgpu_image }}", "container_options": "--device=/dev/dri --device=/dev/kfd", "check_sycl_all": "hip:gpu,host", @@ -64,10 +63,10 @@ { "config": "cuda", "name": "CUDA LLVM Test Suite", - "runs-on": "aws-cuda_${{ inputs.uniq }}", - "aws-ami": "ami-02ec0f344128253f9", - "aws-type": [ "g4dn.2xlarge", "g4dn.4xlarge" ], - "aws-disk": "/dev/xvda:64", + "runs-on": [ + "Linux", + "cuda" + ], "image": "${{ inputs.cuda_image }}", "container_options": "--gpus all", "check_sycl_all": "cuda:gpu,host", From 0260be269c7df8c3f90e419ff2dc7000e25c1f3d Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 20:50:20 -0700 Subject: [PATCH 43/50] Preparing for merge --- .github/workflows/sycl_precommit.yml | 2 +- devops/actions/aws-ec2/aws-ec2.js | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index 8c8e15ef922b9..d9bbaa7032263 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -34,7 +34,7 @@ jobs: name: Resolve Test Matrix uses: ./.github/workflows/sycl_resolve_test_matrix.yml with: - lts_config: "hip_amdgpu;ocl_gen9;l0_gen9;cuda" + lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu;cuda" linux_default: name: Linux diff --git a/devops/actions/aws-ec2/aws-ec2.js b/devops/actions/aws-ec2/aws-ec2.js index e8f2ca1926985..bf7cedea673de 100644 --- a/devops/actions/aws-ec2/aws-ec2.js +++ b/devops/actions/aws-ec2/aws-ec2.js @@ -3,16 +3,13 @@ const github = require('@actions/github'); const AWS = require('aws-sdk'); // shortcut to reference current repo -//const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; -const repo = `intel/llvm`; +const repo = `${github.context.repo.owner}/${github.context.repo.repo}`; // get github registration token that allows to register new runner based on // GH_PERSONAL_ACCESS_TOKEN github user api key async function getGithubRegToken() { core.info("Preparing Github SDK API"); - const token = core.getInput("GH_PERSONAL_ACCESS_TOKEN"); - core.info(`${token.length}`); - const octokit = github.getOctokit(token); + const octokit = github.getOctokit(core.getInput("GH_PERSONAL_ACCESS_TOKEN")); try { core.info(`Getting Github Actions Runner registration token for ${repo} repo`); From 152d0b7fcc82a371b3a83dda049b77859195d2c2 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 21:08:29 -0700 Subject: [PATCH 44/50] Skip empty AWS start job --- .github/workflows/sycl_linux_build_and_test.yml | 13 +++++++++---- .github/workflows/sycl_post_commit.yml | 1 + .github/workflows/sycl_precommit.yml | 3 ++- .github/workflows/sycl_resolve_test_matrix.yml | 3 +++ devops/scripts/generate_test_matrix.js | 9 +++++++++ 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index f74afdbc87b72..6f357f226b3ba 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -40,6 +40,10 @@ on: type: string required: false default: "" + lts_aws_matrix: + type: string + required: false + default: "" lts_cmake_extra_args: type: string required: false @@ -158,6 +162,7 @@ jobs: aws-start: name: Start AWS needs: build + if: ${{ inputs.lts_aws_matrix != '' }} runs-on: ubuntu-latest environment: aws steps: @@ -168,14 +173,14 @@ jobs: - name: Start AWS EC2 runners uses: ./llvm/devops/actions/aws-ec2 with: - runs-on-list: ${{ inputs.lts_matrix }} + runs-on-list: ${{ inputs.lts_aws_matrix }} GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} llvm_test_suite: needs: [build, aws-start] - if: ${{ inputs.lts_matrix != '' }} + if: ${{ !failure() && inputs.lts_matrix != '' }} strategy: fail-fast: false max-parallel: ${{ inputs.max_parallel }} @@ -225,7 +230,7 @@ jobs: aws-stop: name: Stop AWS needs: [ aws-start, llvm_test_suite ] - if: ${{ always() }} + if: ${{ always() && inputs.lts_ats_matrix != '' }} runs-on: ubuntu-latest environment: aws steps: @@ -236,7 +241,7 @@ jobs: - name: Stop AWS EC2 runners uses: ./llvm/devops/actions/aws-ec2 with: - runs-on-list: ${{ inputs.lts_matrix }} + runs-on-list: ${{ inputs.lts_aws_matrix }} mode: stop GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} diff --git a/.github/workflows/sycl_post_commit.yml b/.github/workflows/sycl_post_commit.yml index 04f335fe030b9..cef878bb8331d 100644 --- a/.github/workflows/sycl_post_commit.yml +++ b/.github/workflows/sycl_post_commit.yml @@ -26,6 +26,7 @@ jobs: build_cache_root: "/__w/llvm" build_artifact_suffix: "post_commit" lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }} + lts_aws_matrix: ${{ needs.resolve_matrix.outputs.lts_aws_matrix }} linux_no_assert: name: Linux (no assert) uses: ./.github/workflows/sycl_linux_build_and_test.yml diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index d9bbaa7032263..9ff053343441f 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -1,7 +1,7 @@ name: SYCL on: - pull_request_target: + pull_request: branches: - sycl # Do not run builds if changes are only in the following locations @@ -50,3 +50,4 @@ jobs: build_artifact_suffix: "default" build_cache_suffix: "default" lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }} + lts_aws_matrix: ${{ needs.resolve_matrix.outputs.lts_aws_matrix }} diff --git a/.github/workflows/sycl_resolve_test_matrix.yml b/.github/workflows/sycl_resolve_test_matrix.yml index 4b67cb8b1cd9b..4bb754d1837d9 100644 --- a/.github/workflows/sycl_resolve_test_matrix.yml +++ b/.github/workflows/sycl_resolve_test_matrix.yml @@ -28,6 +28,9 @@ on: lts_matrix: description: "Generated Matrix" value: ${{ jobs.resolve_matrix.outputs.lts_matrix }} + lts_aws_matrix: + description: "Generated Matrix AWS subset" + value: ${{ jobs.resolve_matrix.outputs.lts_aws_matrix }} jobs: resolve_matrix: name: Resolve Test Matrix diff --git a/devops/scripts/generate_test_matrix.js b/devops/scripts/generate_test_matrix.js index dd6cdafcb689d..154af1eb49240 100644 --- a/devops/scripts/generate_test_matrix.js +++ b/devops/scripts/generate_test_matrix.js @@ -27,6 +27,7 @@ module.exports = ({core, process}) => { const ltsConfigs = inputs.lts_config.split(';'); const enabledLTSConfigs = []; + const enabledLTSAWSConfigs = []; testConfigs.lts.forEach(v => { if (ltsConfigs.includes(v.config)) { @@ -44,22 +45,30 @@ module.exports = ({core, process}) => { v["env"] = {}; } enabledLTSConfigs.push(v); + if (v["aws-type"]) enabledLTSAWSConfigs.push(v); } }); let ltsString = JSON.stringify(enabledLTSConfigs); + let ltsAWSString = JSON.stringify(enabledLTSAWSConfigs); console.log(ltsString); + console.log(ltsAWSString) for (let [key, value] of Object.entries(inputs)) { ltsString = ltsString.replaceAll("${{ inputs." + key + " }}", value); + ltsAWSString = ltsAWSString.replaceAll("${{ inputs." + key + " }}", value); } if (needsDrivers) { ltsString = ltsString.replaceAll( "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest", "ghcr.io/intel/llvm/ubuntu2004_base:latest"); + ltsAWSString = ltsAWSString.replaceAll( + "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest", + "ghcr.io/intel/llvm/ubuntu2004_base:latest"); } core.setOutput('lts_matrix', ltsString); + core.setOutput('lts_aws_matrix', ltsAWSString); } }); } From 025a2d96dd4fdc0493d1d2256b139c6211f0a920 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 21:09:48 -0700 Subject: [PATCH 45/50] Indent fix --- .github/workflows/sycl_resolve_test_matrix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_resolve_test_matrix.yml b/.github/workflows/sycl_resolve_test_matrix.yml index 4bb754d1837d9..8feeee04860b3 100644 --- a/.github/workflows/sycl_resolve_test_matrix.yml +++ b/.github/workflows/sycl_resolve_test_matrix.yml @@ -28,7 +28,7 @@ on: lts_matrix: description: "Generated Matrix" value: ${{ jobs.resolve_matrix.outputs.lts_matrix }} - lts_aws_matrix: + lts_aws_matrix: description: "Generated Matrix AWS subset" value: ${{ jobs.resolve_matrix.outputs.lts_aws_matrix }} jobs: From 291bc39072c416c56102185e0f62dd23264287af Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 21:11:03 -0700 Subject: [PATCH 46/50] Indent fix --- .github/workflows/sycl_linux_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 6f357f226b3ba..7ee30929b21bf 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -162,7 +162,7 @@ jobs: aws-start: name: Start AWS needs: build - if: ${{ inputs.lts_aws_matrix != '' }} + if: ${{ inputs.lts_aws_matrix != '' }} runs-on: ubuntu-latest environment: aws steps: From 20e9fa9ddb912762407254c0adf010b79039ede5 Mon Sep 17 00:00:00 2001 From: apstasen Date: Mon, 8 Aug 2022 21:57:22 -0700 Subject: [PATCH 47/50] Use target repo env --- .github/workflows/sycl_precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index 9ff053343441f..da4f96eea07bc 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -1,7 +1,7 @@ name: SYCL on: - pull_request: + pull_request_target: branches: - sycl # Do not run builds if changes are only in the following locations From 6cd3733aa995b2bc2fe0a3ac1fa05acf7e5f6001 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 22:19:19 -0700 Subject: [PATCH 48/50] Use exact package versions --- devops/actions/aws-ec2/package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devops/actions/aws-ec2/package.json b/devops/actions/aws-ec2/package.json index 449bb9d8ab2c5..8dc4f1087946c 100644 --- a/devops/actions/aws-ec2/package.json +++ b/devops/actions/aws-ec2/package.json @@ -2,8 +2,8 @@ "name": "aws-ec2", "description": "Start AWS EC2 spot instances with Github actions runner agent in it", "dependencies": { - "@actions/core": "^1.9.0", - "@actions/github": "^5.0.3", - "aws-sdk": "^2.1179.0" + "@actions/core": "1.9.0", + "@actions/github": "5.0.3", + "aws-sdk": "2.1179.0" } } From c0187cc09366161691a1c588d49f43fef56db944 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 22:46:51 -0700 Subject: [PATCH 49/50] Improved security --- .../workflows/sycl_linux_build_and_test.yml | 26 ++++++++++++------- .github/workflows/sycl_precommit.yml | 3 ++- devops/actions/cached_checkout/action.yml | 1 + 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/.github/workflows/sycl_linux_build_and_test.yml b/.github/workflows/sycl_linux_build_and_test.yml index 7ee30929b21bf..9785a561f41e5 100644 --- a/.github/workflows/sycl_linux_build_and_test.yml +++ b/.github/workflows/sycl_linux_build_and_test.yml @@ -166,12 +166,15 @@ jobs: runs-on: ubuntu-latest environment: aws steps: - - uses: actions/checkout@v3 - with: - path: llvm - - run: npm install ./llvm/devops/actions/aws-ec2 + - name: Setup script + run: | + mkdir -p ./aws-ec2 + wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/action.yml -P ./aws-ec2 + wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/aws-ec2.js -P ./aws-ec2 + wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/package.json -P ./aws-ec2 + npm install ./aws-ec2 - name: Start AWS EC2 runners - uses: ./llvm/devops/actions/aws-ec2 + uses: ./aws-ec2 with: runs-on-list: ${{ inputs.lts_aws_matrix }} GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} @@ -234,12 +237,15 @@ jobs: runs-on: ubuntu-latest environment: aws steps: - - uses: actions/checkout@v3 - with: - path: llvm - - run: npm install ./llvm/devops/actions/aws-ec2 + - name: Setup script + run: | + mkdir -p ./aws-ec2 + wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/action.yml -P ./aws-ec2 + wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/aws-ec2.js -P ./aws-ec2 + wget raw.githubusercontent.com/intel/llvm/sycl/devops/actions/aws-ec2/package.json -P ./aws-ec2 + npm install ./aws-ec2 - name: Stop AWS EC2 runners - uses: ./llvm/devops/actions/aws-ec2 + uses: ./aws-ec2 with: runs-on-list: ${{ inputs.lts_aws_matrix }} mode: stop diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index da4f96eea07bc..cdf083d8b0821 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -1,7 +1,7 @@ name: SYCL on: - pull_request_target: + pull_request: branches: - sycl # Do not run builds if changes are only in the following locations @@ -25,6 +25,7 @@ jobs: steps: - uses: actions/checkout@v2 with: + persist-credentials: false fetch-depth: 2 - name: Run clang-format uses: ./devops/actions/clang-format diff --git a/devops/actions/cached_checkout/action.yml b/devops/actions/cached_checkout/action.yml index c9c4633e9513b..c4ed22704ebcf 100644 --- a/devops/actions/cached_checkout/action.yml +++ b/devops/actions/cached_checkout/action.yml @@ -36,6 +36,7 @@ runs: GIT_ALTERNATE_OBJECT_DIRECTORIES: ${{ inputs.cache_path }}/${{ inputs.repository }}/.git/objects uses: actions/checkout@v2 with: + persist-credentials: false repository: ${{ inputs.repository }} ref: ${{ inputs.ref }} path: ${{ inputs.path }} From e47afb3bc513d246a279e476a989133f42f160d8 Mon Sep 17 00:00:00 2001 From: Alexander P Stasenko Date: Mon, 8 Aug 2022 23:05:36 -0700 Subject: [PATCH 50/50] Enable target env for PR --- .github/workflows/sycl_precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sycl_precommit.yml b/.github/workflows/sycl_precommit.yml index cdf083d8b0821..e9539f9a0d13a 100644 --- a/.github/workflows/sycl_precommit.yml +++ b/.github/workflows/sycl_precommit.yml @@ -1,7 +1,7 @@ name: SYCL on: - pull_request: + pull_request_target: branches: - sycl # Do not run builds if changes are only in the following locations