-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[
bnb
] Add bnb nightly workflow (#1282)
* add bnb nightly workflow * add matrix strategy * temp * oops * temp * oops * nit * fixes * up * up * up * add pytest cov * up * oops * put correct dir * fix * fix dir in makefile + failing test * revert * Update .github/workflows/nightly.yml * Update nightly-bnb.yml * Update log_reports.py * Update Makefile * Update .github/workflows/nightly-bnb.yml * Update .github/workflows/nightly-bnb.yml * Update .github/workflows/nightly.yml * Update nightly.yml * Update .github/workflows/nightly-bnb.yml * Update nightly-bnb.yml
- Loading branch information
1 parent
482a2a6
commit 029dcd5
Showing
3 changed files
with
243 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
name: BNB from source self-hosted runner with slow tests (scheduled) | ||
|
||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: "0 2 * * *" | ||
|
||
env: | ||
RUN_SLOW: "yes" | ||
IS_GITHUB_CI: "1" | ||
# To be able to run tests on CUDA 12.2 | ||
NVIDIA_DISABLE_REQUIRE: "1" | ||
SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }} | ||
|
||
|
||
jobs: | ||
run_all_tests_single_gpu: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
docker-image-name: ["huggingface/peft-gpu-bnb-source:latest", "huggingface/peft-gpu-bnb-latest:latest"] | ||
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci] | ||
env: | ||
CUDA_VISIBLE_DEVICES: "0" | ||
TEST_TYPE: "single_gpu_${{ matrix.docker-image-name }}" | ||
container: | ||
image: ${{ matrix.docker-image-name }} | ||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ -e NVIDIA_DISABLE_REQUIRE=true | ||
defaults: | ||
run: | ||
shell: bash | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Pip install | ||
run: | | ||
source activate peft | ||
pip install -e . --no-deps | ||
pip install pytest-reportlog pytest-cov parameterized datasets scipy einops | ||
mkdir transformers-clone && git clone https://github.com/huggingface/transformers.git transformers-clone # rename to transformers clone to avoid modules conflict | ||
- name: Run examples on single GPU | ||
run: | | ||
source activate peft | ||
make tests_examples_single_gpu_bnb | ||
- name: Run core tests on single GPU | ||
run: | | ||
source activate peft | ||
make tests_core_single_gpu_bnb | ||
- name: Run transformers tests on single GPU | ||
run: | | ||
source activate peft | ||
make transformers_tests | ||
- name: Generate Report | ||
if: always() | ||
run: | | ||
pip install slack_sdk tabulate | ||
python scripts/log_reports.py --slack_channel_name bnb-daily-ci >> $GITHUB_STEP_SUMMARY | ||
run_all_tests_multi_gpu: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
docker-image-name: ["huggingface/peft-gpu-bnb-source:latest", "huggingface/peft-gpu-bnb-latest:latest"] | ||
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci] | ||
env: | ||
CUDA_VISIBLE_DEVICES: "0,1" | ||
TEST_TYPE: "multi_gpu_${{ matrix.docker-image-name }}" | ||
container: | ||
image: ${{ matrix.docker-image-name }} | ||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ -e NVIDIA_DISABLE_REQUIRE=true | ||
defaults: | ||
run: | ||
shell: bash | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Pip install | ||
run: | | ||
source activate peft | ||
pip install -e . --no-deps | ||
pip install pytest-reportlog pytest-cov parameterized datasets scipy einops | ||
mkdir transformers-clone && git clone https://github.com/huggingface/transformers.git transformers-clone | ||
- name: Run core GPU tests on multi-gpu | ||
run: | | ||
source activate peft | ||
- name: Run examples on multi GPU | ||
run: | | ||
source activate peft | ||
make tests_examples_multi_gpu_bnb | ||
- name: Run core tests on multi GPU | ||
run: | | ||
source activate peft | ||
make tests_core_multi_gpu_bnb | ||
- name: Run transformers tests on multi GPU | ||
run: | | ||
source activate peft | ||
make transformers_tests | ||
- name: Generate Report | ||
if: always() | ||
run: | | ||
pip install slack_sdk tabulate | ||
python scripts/log_reports.py --slack_channel_name bnb-daily-ci >> $GITHUB_STEP_SUMMARY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,123 +1,136 @@ | ||
import json, os | ||
import argparse | ||
from pathlib import Path | ||
from datetime import date | ||
from tabulate import tabulate | ||
|
||
|
||
MAX_LEN_MESSAGE = 2900 # slack endpoint has a limit of 3001 characters | ||
|
||
failed = [] | ||
passed = [] | ||
|
||
group_info = [] | ||
|
||
total_num_failed = 0 | ||
empty_file = False or len(list(Path().glob("*.log"))) == 0 | ||
|
||
total_empty_files = [] | ||
|
||
for log in Path().glob("*.log"): | ||
section_num_failed = 0 | ||
i = 0 | ||
with open(log, "r") as f: | ||
for line in f: | ||
line = json.loads(line) | ||
i += 1 | ||
if line.get("nodeid", "") != "": | ||
test = line["nodeid"] | ||
if line.get("duration", None) is not None: | ||
duration = f'{line["duration"]:.4f}' | ||
if line.get("outcome", "") == "failed": | ||
section_num_failed += 1 | ||
failed.append([test, duration, log.name.split('_')[0]]) | ||
total_num_failed += 1 | ||
else: | ||
passed.append([test, duration, log.name.split('_')[0]]) | ||
empty_file = i == 0 | ||
group_info.append([str(log), section_num_failed, failed]) | ||
total_empty_files.append(empty_file) | ||
os.remove(log) | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--slack_channel_name", | ||
default="peft-ci-daily" | ||
) | ||
|
||
|
||
def main(slack_channel_name=None): | ||
failed = [] | ||
no_error_payload = { | ||
"type": "section", | ||
"text": { | ||
"type": "plain_text", | ||
"text": "🌞 There were no failures!" if not any(total_empty_files) else "Something went wrong there is at least one empty file - please check GH action results.", | ||
"emoji": True | ||
} | ||
} | ||
passed = [] | ||
|
||
group_info = [] | ||
|
||
total_num_failed = 0 | ||
empty_file = False or len(list(Path().glob("*.log"))) == 0 | ||
|
||
message = "" | ||
payload = [ | ||
{ | ||
"type": "header", | ||
total_empty_files = [] | ||
|
||
for log in Path().glob("*.log"): | ||
section_num_failed = 0 | ||
i = 0 | ||
with open(log, "r") as f: | ||
for line in f: | ||
line = json.loads(line) | ||
i += 1 | ||
if line.get("nodeid", "") != "": | ||
test = line["nodeid"] | ||
if line.get("duration", None) is not None: | ||
duration = f'{line["duration"]:.4f}' | ||
if line.get("outcome", "") == "failed": | ||
section_num_failed += 1 | ||
failed.append([test, duration, log.name.split('_')[0]]) | ||
total_num_failed += 1 | ||
else: | ||
passed.append([test, duration, log.name.split('_')[0]]) | ||
empty_file = i == 0 | ||
group_info.append([str(log), section_num_failed, failed]) | ||
total_empty_files.append(empty_file) | ||
os.remove(log) | ||
failed = [] | ||
no_error_payload = { | ||
"type": "section", | ||
"text": { | ||
"type": "plain_text", | ||
"text": "🤗 Results of the {} PEFT scheduled tests.".format(os.environ.get("TEST_TYPE", "")), | ||
} | ||
}, | ||
] | ||
if total_num_failed > 0: | ||
for i, (name, num_failed, failed_tests) in enumerate(group_info): | ||
if num_failed > 0: | ||
if num_failed == 1: | ||
message += f"*{name}: {num_failed} failed test*\n" | ||
else: | ||
message += f"*{name}: {num_failed} failed tests*\n" | ||
failed_table = [] | ||
for test in failed_tests: | ||
failed_table.append(test[0].split("::")) | ||
failed_table = tabulate(failed_table, headers=["Test Location", "Test Case", "Test Name"], showindex="always", tablefmt="grid", maxcolwidths=[12, 12, 12]) | ||
message += '\n```\n' +failed_table + '\n```' | ||
|
||
if total_empty_files[i]: | ||
message += f"\n*{name}: Warning! Empty file - please check the GitHub action job *\n" | ||
print(f'### {message}') | ||
else: | ||
payload.append(no_error_payload) | ||
|
||
if os.environ.get("TEST_TYPE", "") != "": | ||
from slack_sdk import WebClient | ||
|
||
if len(message) > MAX_LEN_MESSAGE: | ||
print(f"Truncating long message from {len(message)} to {MAX_LEN_MESSAGE}") | ||
message = message[:MAX_LEN_MESSAGE] + "..." | ||
|
||
if len(message) != 0: | ||
md_report = { | ||
"type": "section", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": message | ||
}, | ||
"text": "🌞 There were no failures!" if not any(total_empty_files) else "Something went wrong there is at least one empty file - please check GH action results.", | ||
"emoji": True | ||
} | ||
payload.append(md_report) | ||
action_button = { | ||
"type": "section", | ||
} | ||
|
||
message = "" | ||
payload = [ | ||
{ | ||
"type": "header", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": "*For more details:*" | ||
}, | ||
"accessory": { | ||
"type": "button", | ||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True}, | ||
"url": f"https://github.com/huggingface/peft/actions/runs/{os.environ['GITHUB_RUN_ID']}", | ||
}, | ||
"type": "plain_text", | ||
"text": "🤗 Results of the {} PEFT scheduled tests.".format(os.environ.get("TEST_TYPE", "")), | ||
} | ||
}, | ||
] | ||
if total_num_failed > 0: | ||
for i, (name, num_failed, failed_tests) in enumerate(group_info): | ||
if num_failed > 0: | ||
if num_failed == 1: | ||
message += f"*{name}: {num_failed} failed test*\n" | ||
else: | ||
message += f"*{name}: {num_failed} failed tests*\n" | ||
failed_table = [] | ||
for test in failed_tests: | ||
failed_table.append(test[0].split("::")) | ||
failed_table = tabulate(failed_table, headers=["Test Location", "Test Case", "Test Name"], showindex="always", tablefmt="grid", maxcolwidths=[12, 12, 12]) | ||
message += '\n```\n' +failed_table + '\n```' | ||
|
||
if total_empty_files[i]: | ||
message += f"\n*{name}: Warning! Empty file - please check the GitHub action job *\n" | ||
print(f'### {message}') | ||
else: | ||
payload.append(no_error_payload) | ||
|
||
if os.environ.get("TEST_TYPE", "") != "": | ||
from slack_sdk import WebClient | ||
|
||
if len(message) > MAX_LEN_MESSAGE: | ||
print(f"Truncating long message from {len(message)} to {MAX_LEN_MESSAGE}") | ||
message = message[:MAX_LEN_MESSAGE] + "..." | ||
|
||
if len(message) != 0: | ||
md_report = { | ||
"type": "section", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": message | ||
}, | ||
} | ||
payload.append(md_report) | ||
action_button = { | ||
"type": "section", | ||
"text": { | ||
"type": "mrkdwn", | ||
"text": "*For more details:*" | ||
}, | ||
"accessory": { | ||
"type": "button", | ||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True}, | ||
"url": f"https://github.com/huggingface/peft/actions/runs/{os.environ['GITHUB_RUN_ID']}", | ||
}, | ||
} | ||
payload.append(action_button) | ||
|
||
date_report = { | ||
"type": "context", | ||
"elements": [ | ||
{ | ||
"type": "plain_text", | ||
"text": f"Nightly {os.environ.get('TEST_TYPE')} test results for {date.today()}", | ||
}, | ||
], | ||
} | ||
payload.append(action_button) | ||
payload.append(date_report) | ||
|
||
date_report = { | ||
"type": "context", | ||
"elements": [ | ||
{ | ||
"type": "plain_text", | ||
"text": f"Nightly {os.environ.get('TEST_TYPE')} test results for {date.today()}", | ||
}, | ||
], | ||
} | ||
payload.append(date_report) | ||
print(payload) | ||
|
||
print(payload) | ||
client = WebClient(token=os.environ.get("SLACK_API_TOKEN")) | ||
client.chat_postMessage(channel=f"#{slack_channel_name}", text=message, blocks=payload) | ||
|
||
client = WebClient(token=os.environ.get("SLACK_API_TOKEN")) | ||
client.chat_postMessage(channel="#peft-ci-daily", text=message, blocks=payload) | ||
if __name__ == "__main__": | ||
args = parser.parse_args() | ||
main(args.slack_channel_name) |