Merge branch 'main' into CI-CD/bach

janhq · Jul 19, 2024 · 8eccf65 · 8eccf65
2 parents 730e7ca + 546dba7
commit 8eccf65
Show file tree

Hide file tree

Showing 7 changed files with 249 additions and 19 deletions.
diff --git a/.github/runners/Dockerfile b/.github/runners/Dockerfile
@@ -0,0 +1,52 @@
+FROM docker.io/pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
+
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    sudo \
+    unzip \
+    curl \
+    wget \
+    git \
+    git-lfs \
+    jq \
+  && rm -rf /var/lib/apt/lists/*
+
+RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
+  unzip awscliv2.zip && \
+  ./aws/install
+
+ENV HOME=/home/runner
+
+RUN mkdir -p /home/runner
+
+ARG RUNNER_VERSION=2.317.0
+
+ARG RUNNER_UID=1000
+ARG DOCKER_GID=1001
+
+RUN adduser --disabled-password --gecos "" --uid $RUNNER_UID runner \
+    && groupadd docker --gid $DOCKER_GID \
+    && usermod -aG sudo runner \
+    && usermod -aG docker runner \
+    && echo "%sudo   ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers \
+    && echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >> /etc/sudoers
+
+# cd into the user directory, download and unzip the github actions runner
+RUN cd /home/runner && mkdir actions-runner && cd actions-runner \
+    && curl -O -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \
+    && tar xzf ./actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz
+
+RUN chown -R runner:runner /home/runner && /home/runner/actions-runner/bin/installdependencies.sh
+
+ADD ./start.sh /home/runner/start.sh
+
+RUN chmod +x /home/runner/start.sh
+
+# Add /usr/local/cuda-11.7/compat to LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH=/usr/local/cuda-12.1/compat${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
+
+ENTRYPOINT ["/bin/bash", "/home/runner/start.sh"]
+
+USER runner
diff --git a/.github/runners/start.sh b/.github/runners/start.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+RUNNER_REPO=$RUNNER_REPO
+RUNNER_PAT=$RUNNER_PAT
+RUNNER_GROUP=$RUNNER_GROUP
+RUNNER_LABELS=$RUNNER_LABELS
+RUNNER_NAME=$(hostname)
+
+cd /home/runner/actions-runner
+
+./config.sh --unattended --replace --url https://github.com/${RUNNER_REPO} --pat ${RUNNER_PAT} --name ${RUNNER_NAME} --runnergroup ${RUNNER_GROUP} --labels ${RUNNER_LABELS} --work /home/runner/actions-runner/_work
+
+cleanup() {
+    echo "Removing runner..."
+    ./config.sh remove --unattended --pat ${RUNNER_PAT}
+}
+
+trap 'cleanup; exit 130' INT
+trap 'cleanup; exit 143' TERM
+
+./run.sh & wait $!
diff --git a/.github/workflows/test-branch.yml b/.github/workflows/test-branch.yml
@@ -0,0 +1,41 @@
+name: Test - Models
+
+on:
+  push:
+    branches:
+      - 'CI-CD/bach'
+  workflow_dispatch:
+    inputs:
+      model_id:
+        description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708'
+        required: true
+        default: jan-hq/Jan-Llama3-0708
+        type: string
+      dataset_id:
+        description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
+        required: true
+        default: jan-hq/instruction-speech-conversation-test
+        type: string
+      extra_args:
+        description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
+        required: false
+        default: "--mode audio --num_rows 5"
+        type: string
+
+jobs:
+  run-test:
+    runs-on: research
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        working-directory: ./tests
+        run: |
+          python3 -m pip install --upgrade pip
+          pip3 install -r requirements.txt
+
+      - name: Run tests
+        working-directory: ./tests
+        run: |
+          python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }}
diff --git a/.github/workflows/test-models.yml b/.github/workflows/test-models.yml
@@ -0,0 +1,37 @@
+name: Test - Models
+on:
+  workflow_dispatch:
+    inputs:
+      model_id:
+        description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708'
+        required: true
+        default: jan-hq/Jan-Llama3-0708
+        type: string
+      dataset_id:
+        description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
+        required: true
+        default: jan-hq/instruction-speech-conversation-test
+        type: string
+      extra_args:
+        description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
+        required: false
+        default: "--mode audio --num_rows 5"
+        type: string
+
+jobs:
+  run-test:
+    runs-on: research
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        working-directory: ./tests
+        run: |
+          python3 -m pip install --upgrade pip
+          pip3 install -r requirements.txt
+
+      - name: Run tests
+        working-directory: ./tests
+        run: |
+          python3 test_case.py --model_dir ${{ github.event.inputs.model_id }} --data_dir ${{ github.event.inputs.dataset_id }}  ${{ github.event.inputs.extra_args }}
diff --git a/tests/README.md b/tests/README.md
@@ -8,8 +8,10 @@
 2. Run the test suite:
     ```bash
     python test_case.py --model_dir "jan-hq/Jan-Llama3-0708" \\
+                        --max_length 1024 \\
+                        --data_dir "jan-hq/instruction-speech-conversation-test" \\
                         --mode "audio" \\
-                        --num_rows 100 \\ 
+                        --num_rows 5 \\ 
     ```
 ## Test Configuration
 

diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -1,5 +1,5 @@
 datasets==2.20.0
-torch=2.3.0
+torch==2.3.0
 transformers
 vllm
 huggingface_hub==0.23.4

diff --git a/tests/test_case.py b/tests/test_case.py
@@ -9,12 +9,89 @@
 from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
 import argparse
 import os
+import sys
+from io import StringIO
+import time
+# Decorator Class
+class CustomTestResult(unittest.TestResult):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.successes = []
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self.successes.append(test)
+
+class CustomTestRunner(unittest.TextTestRunner):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.stream = StringIO()
+        self.results = []
+
+    def run(self, test):
+        result = CustomTestResult()
+        start_time = time.time()
+        test(result)
+        time_taken = time.time() - start_time
+        self.results.append((result, time_taken))
+        return result
+
+    def print_results(self):
+        print("\n=== Test Results ===")
+        total_tests = 0
+        total_successes = 0
+        total_failures = 0
+        total_errors = 0
+        total_time = 0
+
+        for result, time_taken in self.results:
+            total_tests += result.testsRun
+            total_successes += len(result.successes)
+            total_failures += len(result.failures)
+            total_errors += len(result.errors)
+            total_time += time_taken
+
+        print(f"Ran {total_tests} tests in {total_time:.3f} seconds")
+        print(f"Successes: {total_successes}")
+        print(f"Failures: {total_failures}")
+        print(f"Errors: {total_errors}")
+
+        print("\nDetailed Results:")
+        for result, time_taken in self.results:
+            # todo: add time taken for each test
+            for test in result.successes:
+                print(f"PASS: {test._testMethodName}")
+            for test, _ in result.failures:
+                print(f"FAIL: {test._testMethodName}")
+            for test, _ in result.errors:
+                test_name = getattr(test, '_testMethodName', str(test))
+                print(f"ERROR: {test_name}")
+
+        if total_failures > 0 or total_errors > 0:
+            print("\nFailure and Error Details:")
+            for result, _ in self.results:
+                for test, traceback in result.failures:
+                    print(f"\nFAILURE: {test._testMethodName}")
+                    print(traceback)
+                for test, traceback in result.errors:
+                    test_name = getattr(test, '_testMethodName', str(test))
+                    print(f"\nERROR: {test_name}")
+                    print(traceback)
+        else:
+            print("\nAll tests passed successfully!")
+
+def test_name(name):
+    def decorator(func):
+        func.__name__ = name
+        return func
+    return decorator
 
 def parse_arguments():
     parser = argparse.ArgumentParser(description="Run inference on a Sound-To-Text Model.")
-    parser.add_argument("--model_dir", type=str, required=True, help="Hugging Face model link or local_dir")
-    parser.add_argument("--model_save_dir", type=str, required=True, help="Local directory that model is saved")
+    parser.add_argument("--model_dir", type=str, default="jan-hq/Jan-Llama3-0708", help="Hugging Face model link or local_dir")
+    parser.add_argument("--max_length", type=int, default=1024, help="Maximum length of the output")
     parser.add_argument("--data_dir", type=str, required=True, help="Hugging Face model repository link or Data path")
+    parser.add_argument("--cache_dir", type=str, default=".", help="Absolute path to save the model and dataset")
     parser.add_argument("--mode", type=str, default="audio", help="Mode of the model (audio or text)")
     parser.add_argument("--num_rows", type=int, default=5, help="Number of dataset rows to process")
     parser.add_argument("--output_file", type=str, default="output/", help="Output file path")
@@ -29,23 +106,20 @@ def setUpClass(cls):
         cls.save_dir_output = f'{args.output_file}/{model_name}-{args.mode}-Result.csv'
         if not os.path.exists(args.output_file):
             os.makedirs(args.output_file)
-        cls.sampling_params = SamplingParams(temperature=0.0, max_tokens=1024, skip_special_tokens=False)
-        model_dir = ""
-        if os.path.exists(args.model_save_dir):
-            model_dir = args.model_save_dir
+        cls.sampling_params = SamplingParams(temperature=0.0, max_tokens=args.max_length, skip_special_tokens=False)
+        # Download model
+        model_save_dir = os.path.join(args.cache_dir, args.model_dir)
+        if not os.path.exists(model_save_dir):
+            snapshot_download(args.model_dir, local_dir=model_save_dir, max_workers=64)
         else:
-            # Download model
-            if not os.path.exists(args.model_dir):
-                snapshot_download(args.model_dir, local_dir=args.model_dir, max_workers=64)
-            else:
-                print(f"Found {args.model_dir}. Skipping download.")
-            model_dir = args.model_dir
+            print(f"Found {model_save_dir}. Skipping download.")
         # Model loading using vllm
         cls.tokenizer = AutoTokenizer.from_pretrained(model_dir)
         cls.llm = LLM(model_dir, tokenizer=model_dir, gpu_memory_utilization=0.3)
 
         # Load dataset
-        cls.dataset = load_dataset(args.data_dir, cache_dir=".cache/")['train']
+        data_save_dir = os.path.join(args.cache_dir, args.data_dir)
+        cls.dataset = load_dataset(args.data_dir, split='train')
         cls.num_rows = min(args.num_rows, len(cls.dataset))
         cls.inference_results = []
         if args.mode == "audio":
@@ -87,6 +161,7 @@ def vllm_qna_inference(self, sample_id):
 
 
     #     return input_str, output_based_on_input, expected_output_str, output_token_ids
+    @test_name("Output validation (non-empty, correct type)")
     def test_model_output(self):
         for text_input_str, output_based_on_sound, expected_output_str, output_token_ids in self.inference_results:
             # Test 1: Check if output is not empty
@@ -103,7 +178,7 @@ def test_model_output(self):
             # output_words = set(output_based_on_sound.lower().split())
             # relevance_score = corpus_bleu(output_words, reference_words)
             # self.assertGreater(relevance_score, 0.3)
-
+    @test_name("Test Special Tokens Handling")
     def test_special_tokens(self):
         # Test 5: Check if special tokens are handled correctly
         special_tokens = [self.tokenizer.bos_token, self.tokenizer.eos_token, self.tokenizer.pad_token]
@@ -118,12 +193,12 @@ def test_special_tokens(self):
     #     results = [self.inference_results[0][1] for _ in range(3)]  
     #     self.assertEqual(results[0], results[1])
     #     self.assertEqual(results[1], results[2])
-
+    @test_name("Test for NaN outputs")
     def test_no_nan_outputs(self):
         # Test 7: Check for NaN outputs
         for _, output, _, _ in self.inference_results:
             self.assertFalse(any(np.isnan(float(word)) for word in output.split() if word.replace('.', '').isdigit()))
-
+    @test_name("Test for EOS token generation")
     def test_eos_token_generation(self):
         # Test 8: Check if EOS token is generated
         for _, output_based_on_sound, _, output_token_ids in self.inference_results:
@@ -141,4 +216,6 @@ def test_eos_token_generation(self):
 
 
 if __name__ == "__main__":
-    unittest.main(argv=['first-arg-is-ignored'], exit=False)
+    runner = CustomTestRunner(stream=sys.stdout, verbosity=2)
+    unittest.main(argv=['first-arg-is-ignored'], exit=False, testRunner=runner)
+    runner.print_results()