From 9f1d9c89da44ec433551b499e08d99d22e8d8fc9 Mon Sep 17 00:00:00 2001 From: Marc Sun <57196510+SunMarc@users.noreply.github.com> Date: Tue, 9 Apr 2024 17:10:29 +0200 Subject: [PATCH] Fix quantization tests (#29914) * revert back to torch 2.1.1 * run test * switch to torch 2.2.1 * udapte dockerfile * fix awq tests * fix test * run quanto tests * update tests * split quantization tests * fix * fix again * final fix * fix report artifact * build docker again * Revert "build docker again" This reverts commit 399a5f9d9308da071d79034f238c719de0f3532e. * debug * revert * style * new notification system * testing notfication * rebuild docker * fix_prev_ci_results * typo * remove warning * fix typo * fix artifact name * debug * issue fixed * debug again * fix * fix time * test notif with faling test * typo * issues again * final fix ? * run all quantization tests again * remove name to clear space * revert modfiication done on workflow * fix * build docker * build only quant docker * fix quantization ci * fix * fix report * better quantization_matrix * add print * revert to the basic one --- .github/workflows/self-scheduled.yml | 36 ++- .github/workflows/slack-report.yml | 25 +- .../Dockerfile | 8 +- src/transformers/utils/quantization_config.py | 2 +- tests/quantization/autoawq/test_awq.py | 30 +-- utils/notification_service.py | 2 - utils/notification_service_quantization.py | 251 ++++++++++++++++++ 7 files changed, 324 insertions(+), 30 deletions(-) create mode 100644 utils/notification_service_quantization.py diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 3e563e94e15ca0..81620b740ba81d 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -33,7 +33,6 @@ env: jobs: setup: - if: ${{ inputs.job == 'run_tests_gpu' }} name: Setup strategy: matrix: @@ -45,6 +44,7 @@ jobs: outputs: folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} + quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }} steps: - name: Update clone working-directory: /transformers @@ -63,11 +63,19 @@ jobs: run: pip freeze - id: set-matrix + if: ${{ inputs.job == 'run_tests_gpu' }} name: Identify models to test working-directory: /transformers/tests run: | echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT + + - id: set-matrix-quantization + if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + name: Identify quantization method to test + working-directory: /transformers/tests + run: | + echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT - name: NVIDIA-SMI run: | @@ -303,16 +311,26 @@ jobs: run_tests_quantization_torch_gpu: if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} - name: Quantization tests + name: " " strategy: fail-fast: false matrix: + folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }} machine_type: [single-gpu, multi-gpu] runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] container: image: huggingface/transformers-quantization-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: + - name: Echo folder ${{ matrix.folders }} + shell: bash + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'quantization/'/'quantization_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + - name: Update clone working-directory: /transformers run: git fetch && git checkout ${{ github.sha }} @@ -337,19 +355,19 @@ jobs: - name: Run quantization tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}" if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu + name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }} + path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} run_extract_warnings: # Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic. @@ -413,4 +431,6 @@ jobs: slack_report_channel: ${{ inputs.slack_report_channel }} # This would be an empty string if `setup` is skipped. folder_slices: ${{ needs.setup.outputs.folder_slices }} - secrets: inherit \ No newline at end of file + quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} + + secrets: inherit diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index 0e964e8596a0f5..9e62417c76dfe7 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -15,6 +15,9 @@ on: folder_slices: required: true type: string + quantization_matrix: + required: true + type: string jobs: @@ -32,6 +35,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 - name: Send message to Slack + if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }} env: CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} @@ -53,7 +57,26 @@ jobs: pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ inputs.folder_slices }}" - + + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + - name: Send message to Slack for quantization workflow + if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }} + env: + CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} + ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} + SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} + CI_EVENT: scheduled + CI_SHA: ${{ github.sha }} + SETUP_STATUS: ${{ inputs.setup_status }} + # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change + # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`. + run: | + sudo apt-get install -y curl + pip install slack_sdk + pip show slack_sdk + python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" + # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - name: Failure table artifacts # Only the model testing job is concerned for this step diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile index 8a526c72981620..e1d084c4033902 100644 --- a/docker/transformers-quantization-latest-gpu/Dockerfile +++ b/docker/transformers-quantization-latest-gpu/Dockerfile @@ -9,7 +9,7 @@ SHELL ["sh", "-lc"] # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant # to be used as arguments for docker build (so far). -ARG PYTORCH='2.2.0' +ARG PYTORCH='2.2.1' # Example: `cu102`, `cu113`, etc. ARG CUDA='cu118' @@ -30,6 +30,9 @@ RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch] RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate +# needed in bnb and awq +RUN python3 -m pip install --no-cache-dir einops + # Add bitsandbytes for mixed int8 testing RUN python3 -m pip install --no-cache-dir bitsandbytes @@ -43,7 +46,8 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 # Add autoawq for quantization testing -RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.0/autoawq-0.2.0+cu118-cp38-cp38-linux_x86_64.whl +# >=v0.2.3 needed for compatibility with torch 2.2.1 +RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl # Add quanto for quantization testing RUN python3 -m pip install --no-cache-dir quanto diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index 69bb0d5272be97..d91ecef16e37e1 100644 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -789,7 +789,7 @@ def post_init(self): def get_loading_attributes(self): attibutes_dict = copy.deepcopy(self.__dict__) - loading_attibutes = ["version", "do_fuse", "modules_to_fuse", "fuse_max_seq_len"] + loading_attibutes = ["version", "do_fuse", "modules_to_fuse", "fuse_max_seq_len", "exllama_config"] loading_attibutes_dict = {i: j for i, j in attibutes_dict.items() if i in loading_attibutes} return loading_attibutes_dict diff --git a/tests/quantization/autoawq/test_awq.py b/tests/quantization/autoawq/test_awq.py index 8ed8c394f424c5..8215f3f1458173 100644 --- a/tests/quantization/autoawq/test_awq.py +++ b/tests/quantization/autoawq/test_awq.py @@ -101,7 +101,7 @@ class AwqTest(unittest.TestCase): EXPECTED_OUTPUT = "Hello my name is Katie and I am a 20 year old student at the University of North Carolina at Chapel Hill. I am a junior and I am majoring in Journalism and minoring in Spanish" EXPECTED_OUTPUT_BF16 = "Hello my name is Katie and I am a 20 year old student at the University of North Carolina at Chapel Hill. I am a junior and I am majoring in Exercise and Sport Science with a" - + EXPECTED_OUTPUT_EXLLAMA = "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very out" device_map = "cuda" # called only once for all test in this class @@ -200,11 +200,11 @@ def test_quantized_model_exllama(self): quantization_config = AwqConfig(version="exllama") quantized_model = AutoModelForCausalLM.from_pretrained( - self.model_name, quantization_config=quantization_config - ).to(torch_device) + self.model_name, quantization_config=quantization_config, device_map=torch_device + ) output = quantized_model.generate(**input_ids, max_new_tokens=40) - self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT) + self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_EXLLAMA) def test_quantized_model_no_device_map(self): """ @@ -239,7 +239,7 @@ def test_quantized_model_multi_gpu(self): quantized_model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto") - self.assertTrue(set(quantized_model.hf_device_map.values()) == {0, 1, 2, 3}) + self.assertTrue(set(quantized_model.hf_device_map.values()) == {0, 1}) output = quantized_model.generate(**input_ids, max_new_tokens=40) @@ -272,8 +272,8 @@ class AwqFusedTest(unittest.TestCase): model_name = "TheBloke/Mistral-7B-OpenOrca-AWQ" model_revision = "7048b2af77d0dd1c81b000b19d73f9cc8950b510" - custom_mapping_model_id = "TheBloke/Yi-34B-AWQ" - custom_model_revision = "f1b2cd1b7459ceecfdc1fac5bb8725f13707c589" + custom_mapping_model_id = "TheBloke/Mistral-7B-v0.1-AWQ" + custom_model_revision = "f186bcfa9edbe2a4334262ec1e67f23e53ed1ae7" mixtral_model_name = "casperhansen/mixtral-instruct-awq" mixtral_model_revision = "87dd4ec502dde74fb3a624835c776b000d190c3b" @@ -287,8 +287,8 @@ class AwqFusedTest(unittest.TestCase): "You end up exactly where you started. Where are you?" ) - EXPECTED_GENERATION = prompt + "\n\nThis is a classic puzzle that has been around for" - EXPECTED_GENERATION_CUSTOM_MODEL = "HelloWorld.java:11)\r\n\tat org" + EXPECTED_GENERATION = prompt + "\n\nYou are at the starting point.\n\nIf" + EXPECTED_GENERATION_CUSTOM_MODEL = "Hello,\n\nI have a problem with my 20" EXPECTED_GENERATION_MIXTRAL = prompt + " You're on the North Pole.\n\nThe" def tearDown(self): @@ -423,28 +423,25 @@ def test_generation_custom_model(self): fuse_max_seq_len=512, modules_to_fuse={ "attention": ["q_proj", "k_proj", "v_proj", "o_proj"], - "layernorm": ["ln1", "ln2", "norm"], "mlp": ["gate_proj", "up_proj", "down_proj"], + "layernorm": ["input_layernorm", "post_attention_layernorm", "norm"], "use_alibi": False, - "num_attention_heads": 56, + "hidden_size": 4096, + "num_attention_heads": 32, "num_key_value_heads": 8, - "hidden_size": 7168, }, ) model = AutoModelForCausalLM.from_pretrained( self.custom_mapping_model_id, quantization_config=quantization_config, - trust_remote_code=True, device_map="balanced", revision=self.custom_model_revision, ) self._check_fused_modules(model) - tokenizer = AutoTokenizer.from_pretrained( - self.custom_mapping_model_id, revision=self.custom_model_revision, trust_remote_code=True - ) + tokenizer = AutoTokenizer.from_pretrained(self.custom_mapping_model_id, revision=self.custom_model_revision) prompt = "Hello" inputs = tokenizer(prompt, return_tensors="pt").to(torch_device) @@ -452,6 +449,7 @@ def test_generation_custom_model(self): outputs = model.generate(**inputs, max_new_tokens=12) self.assertEqual(tokenizer.decode(outputs[0], skip_special_tokens=True), self.EXPECTED_GENERATION_CUSTOM_MODEL) + @unittest.skip("Not enough GPU memory on CI runners") @require_torch_multi_gpu def test_generation_mixtral_fused(self): """ diff --git a/utils/notification_service.py b/utils/notification_service.py index 5378348ee9cc9e..158e01942b81fa 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1056,7 +1056,6 @@ def prepare_reports(title, header, reports, to_truncate=True): "TensorFlow pipelines": "run_tests_tf_pipeline_gpu", "Examples directory": "run_examples_gpu", "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports", - "Quantization tests": "run_tests_quantization_torch_gpu", } if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"): @@ -1077,7 +1076,6 @@ def prepare_reports(title, header, reports, to_truncate=True): "run_pipelines_tf_gpu": "TensorFlow pipelines", "run_examples_gpu": "Examples directory", "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests", - "run_tests_quantization_torch_gpu": "Quantization tests", } # Remove some entries in `additional_files` if they are not concerned. diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py new file mode 100644 index 00000000000000..11bc57e618a7e4 --- /dev/null +++ b/utils/notification_service_quantization.py @@ -0,0 +1,251 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import json +import os +import sys +import time +from typing import Dict + +from get_ci_error_statistics import get_jobs +from notification_service import ( + Message, + handle_stacktraces, + handle_test_results, + prepare_reports, + retrieve_artifact, + retrieve_available_artifacts, +) +from slack_sdk import WebClient + + +client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"]) + + +class QuantizationMessage(Message): + def __init__( + self, + title: str, + results: Dict, + ): + self.title = title + + # Failures and success of the modeling tests + self.n_success = sum(r["success"] for r in results.values()) + self.single_gpu_failures = sum(r["failed"]["single"] for r in results.values()) + self.multi_gpu_failures = sum(r["failed"]["multi"] for r in results.values()) + self.n_failures = self.single_gpu_failures + self.multi_gpu_failures + + self.n_tests = self.n_failures + self.n_success + self.results = results + self.thread_ts = None + + @property + def payload(self) -> str: + blocks = [self.header] + + if self.n_failures > 0: + blocks.append(self.failures_overwiew) + blocks.append(self.failures_detailed) + + if self.n_failures == 0: + blocks.append(self.no_failures) + + return json.dumps(blocks) + + @property + def time(self) -> str: + all_results = self.results.values() + time_spent = [] + for r in all_results: + if len(r["time_spent"]): + time_spent.extend([x for x in r["time_spent"].split(", ") if len(x.strip())]) + total_secs = 0 + + for time in time_spent: + time_parts = time.split(":") + + # Time can be formatted as xx:xx:xx, as .xx, or as x.xx if the time spent was less than a minute. + if len(time_parts) == 1: + time_parts = [0, 0, time_parts[0]] + + hours, minutes, seconds = int(time_parts[0]), int(time_parts[1]), float(time_parts[2]) + total_secs += hours * 3600 + minutes * 60 + seconds + + hours, minutes, seconds = total_secs // 3600, (total_secs % 3600) // 60, total_secs % 60 + return f"{int(hours)}h{int(minutes)}m{int(seconds)}s" + + @property + def failures_overwiew(self) -> Dict: + return { + "type": "section", + "text": { + "type": "plain_text", + "text": ( + f"There were {self.n_failures} failures, out of {self.n_tests} tests.\n" + f"The suite ran in {self.time}." + ), + "emoji": True, + }, + "accessory": { + "type": "button", + "text": {"type": "plain_text", "text": "Check Action results", "emoji": True}, + "url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}", + }, + } + + @property + def failures_detailed(self) -> Dict: + failures = {k: v["failed"] for k, v in self.results.items()} + + individual_reports = [] + for key, value in failures.items(): + device_report = self.get_device_report(value) + if sum(value.values()): + report = f"{device_report}{key}" + individual_reports.append(report) + + header = "Single | Multi | Category\n" + failures_report = prepare_reports( + title="The following quantization tests had failures", header=header, reports=individual_reports + ) + + return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}} + + def post(self): + payload = self.payload + print("Sending the following payload") + print(json.dumps({"blocks": json.loads(payload)})) + + text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed." + + self.thread_ts = client.chat_postMessage( + channel=SLACK_REPORT_CHANNEL_ID, + blocks=payload, + text=text, + ) + + def post_reply(self): + if self.thread_ts is None: + raise ValueError("Can only post reply if a post has been made.") + + for job, job_result in self.results.items(): + if len(job_result["failures"]): + for device, failures in job_result["failures"].items(): + blocks = self.get_reply_blocks( + job, + job_result, + failures, + device, + text=f'Number of failures: {job_result["failed"][device]}', + ) + + print("Sending the following reply") + print(json.dumps({"blocks": blocks})) + + client.chat_postMessage( + channel="#transformers-ci-daily-quantization", + text=f"Results for {job}", + blocks=blocks, + thread_ts=self.thread_ts["ts"], + ) + time.sleep(1) + + +if __name__ == "__main__": + setup_status = os.environ.get("SETUP_STATUS") + SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"] + setup_failed = True if setup_status is not None and setup_status != "success" else False + + # This env. variable is set in workflow file (under the job `send_results`). + ci_event = os.environ["CI_EVENT"] + + title = f"🤗 Results of the {ci_event} tests." + + if setup_failed: + Message.error_out( + title, ci_title="", runner_not_available=False, runner_failed=False, setup_failed=setup_failed + ) + exit(0) + + arguments = sys.argv[1:][0] + try: + quantization_matrix = ast.literal_eval(arguments) + # Need to change from elements like `quantization/bnb` to `quantization_bnb` (the ones used as artifact names). + quantization_matrix = [x.replace("quantization/", "quantization_") for x in quantization_matrix] + except SyntaxError: + Message.error_out(title, ci_title="") + raise ValueError("Errored out.") + + available_artifacts = retrieve_available_artifacts() + + quantization_results = { + quant: { + "failed": {"single": 0, "multi": 0}, + "success": 0, + "time_spent": "", + "failures": {}, + "job_link": {}, + } + for quant in quantization_matrix + if f"run_tests_quantization_torch_gpu_{quant}" in available_artifacts + } + + github_actions_jobs = get_jobs( + workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"] + ) + github_actions_job_links = {job["name"]: job["html_url"] for job in github_actions_jobs} + + artifact_name_to_job_map = {} + for job in github_actions_jobs: + for step in job["steps"]: + if step["name"].startswith("Test suite reports artifacts: "): + artifact_name = step["name"][len("Test suite reports artifacts: ") :] + artifact_name_to_job_map[artifact_name] = job + break + + for quant in quantization_results.keys(): + for artifact_path in available_artifacts[f"run_tests_quantization_torch_gpu_{quant}"].paths: + artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) + if "stats" in artifact: + # Link to the GitHub Action job + job = artifact_name_to_job_map[artifact_path["path"]] + quantization_results[quant]["job_link"][artifact_path["gpu"]] = job["html_url"] + failed, success, time_spent = handle_test_results(artifact["stats"]) + quantization_results[quant]["failed"][artifact_path["gpu"]] += failed + quantization_results[quant]["success"] += success + quantization_results[quant]["time_spent"] += time_spent[1:-1] + ", " + + stacktraces = handle_stacktraces(artifact["failures_line"]) + + for line in artifact["summary_short"].split("\n"): + if line.startswith("FAILED "): + line = line[len("FAILED ") :] + line = line.split()[0].replace("\n", "") + + if artifact_path["gpu"] not in quantization_results[quant]["failures"]: + quantization_results[quant]["failures"][artifact_path["gpu"]] = [] + + quantization_results[quant]["failures"][artifact_path["gpu"]].append( + {"line": line, "trace": stacktraces.pop(0)} + ) + + message = QuantizationMessage( + title, + results=quantization_results, + ) + + message.post() + message.post_reply()