Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/ur-build-hw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ env:
UR_LOG_OPENCL: "level:error;flush:error"

jobs:
health-check:
name: Health Check
uses: ./.github/workflows/ur-health-check.yml
with:
runner_name: ${{ inputs.runner_name }}

adapter_build_hw:
name: Build & CTS
# run only on upstream; forks won't have the HW
Expand Down
124 changes: 124 additions & 0 deletions .github/workflows/ur-health-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# This workflow monitors runners' health by checking their load average.
# It collects data from all runners before specific jobs and aggregates it into a JSON file from the whole day.

name: UR Health Monitoring

on:
workflow_call:
inputs:
runner_name:
required: true
type: string
schedule:
- cron: '0 0 * * *' # Runs daily at midnight

jobs:
health-check:
if: github.event_name == 'workflow_call'
runs-on: ${{inputs.runner_name}}
steps:
- name: Check load average
id: check
run: |
DATE=$(date +"%Y-%m-%d_%H-%M-%S")
echo "DATE=${DATE}" >> $GITHUB_OUTPUT
mkdir -p artifacts
echo "Directory created, verifying..."
ls -la artifacts/
uptime | awk -F'load average:' '{ print $2 }' > artifacts/${DATE}_${{ runner.name }}.txt

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
echo "File created, verifying contents..."
cat artifacts/${DATE}_${{ runner.name }}.txt

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
echo "File path: artifacts/${DATE}_${{ runner.name }}.txt"

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
ls -lh artifacts/${DATE}_${{ runner.name }}.txt
- name: Upload load average artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: ${{ steps.check.outputs.DATE }}_${{ runner.name }}_load-average
path: artifacts/${{ steps.check.outputs.DATE }}_${{ runner.name }}.txt
Comment on lines 16 to 37

Check warning

Code scanning / zizmor

overly broad permissions Warning

overly broad permissions

aggregate-load-data:
if: github.event_name == 'schedule'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
Comment on lines +43 to +44

Check warning

Code scanning / zizmor

credential persistence through GitHub Actions artifacts Warning

credential persistence through GitHub Actions artifacts

- name: Calculate yesterday's date
id: date
run: |
YESTERDAY=$(date -d "yesterday" +'%Y-%m-%d')
echo "YESTERDAY=$YESTERDAY" >> $GITHUB_OUTPUT
echo "Date for artifact search: $YESTERDAY"

- name: Download all artifacts from yesterday's health-check runs
env:
GH_TOKEN: ${{ github.token }}
run: |
YESTERDAY="${{ steps.date.outputs.YESTERDAY }}"

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
echo "Searching for all workflow runs from $YESTERDAY"

curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/${{ github.repository }}/actions/workflows/ur-health-check.yml/runs?status=success&created=${YESTERDAY}" \
| jq -r '.workflow_runs[].id' > run_ids.txt

mkdir -p artifacts

# Download artifacts from each run
while read run_id; do
echo "Downloading artifacts from run $run_id"
gh run download "$run_id" \
--dir artifacts \
--pattern "${YESTERDAY}*" \
--repo "${{ github.repository }}" || echo "No matching artifacts in run $run_id"
done < run_ids.txt

echo "All artifacts downloaded:"

- name: Process and aggregate data
run: |
echo "Processing artifacts from ${{ steps.date.outputs.YESTERDAY }}"

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
ls -laR artifacts/
echo "{" > aggregated_${{ steps.date.outputs.YESTERDAY }}.json

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
first_data=true

for artifact_dir in artifacts/*/; do
for file in "$artifact_dir"*.txt; do
if [[ -f "$file" ]]; then
filename=$(basename "$file" .txt)
load=$(cat "$file" | xargs)

if [ "$first_data" = true ]; then
echo " \"$filename\": \"$load\"" >> aggregated_${{ steps.date.outputs.YESTERDAY }}.json

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
first_data=false
else
echo ", \"$filename\": \"$load\"" >> aggregated_${{ steps.date.outputs.YESTERDAY }}.json

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
fi
fi
done
done

echo "}" >> aggregated_${{ steps.date.outputs.YESTERDAY }}.json

cat aggregated_${{ steps.date.outputs.YESTERDAY }}.json

- name: Check if data exists
id: check_data
run: |
content=$(cat aggregated_${{ steps.date.outputs.YESTERDAY }}.json | tr -d ' \n')

Check notice

Code scanning / zizmor

code injection via template expansion Note

code injection via template expansion
if [ "$content" = "{}" ]; then
echo "has_data=false" >> $GITHUB_OUTPUT
echo "No data found, skipping artifact upload"
else
echo "has_data=true" >> $GITHUB_OUTPUT
echo "Data found, will upload artifact"
fi

- name: Upload aggregated data artifact
if: steps.check_data.outputs.has_data == 'true'
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: aggregated_data_${{ steps.date.outputs.YESTERDAY }}
path: aggregated_${{ steps.date.outputs.YESTERDAY }}.json
Loading