mosaicml · dakinggg · Aug 30, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
@@ -34,7 +34,7 @@ jobs:
       uses: actions/checkout@v3
       with:
         repository: mosaicml/ci-testing
-        ref: v0.2.0
+        ref: v0.1.2
         path: ./ci-testing
     - uses: ./ci-testing/.github/actions/code-quality
       with:

@@ -16,7 +16,7 @@ jobs:
       uses: actions/checkout@v3
       with:
         repository: mosaicml/ci-testing
-        ref: v0.2.0
+        ref: v0.1.2
         path: ./ci-testing
     - uses: ./ci-testing/.github/actions/coverage
       with:

@@ -20,9 +20,19 @@ jobs:
         - name: "2.3.1_cu121"
           base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
           dep_groups: "[all]"
+          te_commit: b5a7c9f
         - name: "2.3.1_cu121_aws"
           base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04-aws
           dep_groups: "[all]"
+          te_commit: b5a7c9f
+        - name: "2.4.0_cu124"
+          base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04
+          dep_groups: "[all]"
+          te_commit: 901e5d2
+        - name: "2.4.0_cu124_aws"
+          base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
+          dep_groups: "[all]"
+          te_commit: 901e5d2
     steps:
 
     - name: Checkout
@@ -89,3 +99,4 @@ jobs:
           BRANCH_NAME=${{ github.head_ref || github.ref_name }}
           BASE_IMAGE=${{ matrix.base_image }}
           DEP_GROUPS=${{ matrix.dep_groups }}
+          TE_COMMIT=${{ matrix.te_commit }}
@@ -29,7 +29,7 @@ jobs:
     - name: Checkout code
       uses: actions/checkout@v2
     - name: Run PR CPU Tests
-      uses: mosaicml/ci-testing/.github/actions/pytest-cpu@v0.2.0
+      uses: mosaicml/ci-testing/.github/actions/pytest-cpu@v0.1.2
       with:
         name: ${{ matrix.name }}
         container: ${{ matrix.container }}

@@ -27,10 +27,10 @@ jobs:
           markers: "gpu"
           pip_deps: "[all]"
           pytest_command: "coverage run -m pytest"
-          ci_repo_gpu_test_ref: v0.2.0
+          ci_repo_gpu_test_ref: v0.1.2
     steps:
     - name: Run PR GPU Tests
-      uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.2.0
+      uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.2
       with:
         container: ${{ matrix.container }}
         git_repo: mosaicml/llm-foundry
@@ -56,10 +56,10 @@ jobs:
           markers: "gpu"
           pip_deps: "[all]"
           pytest_command: "coverage run -m pytest"
-          ci_repo_gpu_test_ref: v0.2.0
+          ci_repo_gpu_test_ref: v0.1.2
     steps:
     - name: Run PR GPU Tests
-      uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.2.0
+      uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.2
       with:
         container: ${{ matrix.container }}
         git_repo: mosaicml/llm-foundry
@@ -85,10 +85,10 @@ jobs:
           markers: "gpu"
           pip_deps: "[all]"
           pytest_command: "coverage run -m pytest"
-          ci_repo_gpu_test_ref: v0.2.0
+          ci_repo_gpu_test_ref: v0.1.2
     steps:
     - name: Run PR GPU Tests
-      uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.2.0
+      uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.2
       with:
         container: ${{ matrix.container }}
         git_repo: mosaicml/llm-foundry

@@ -32,7 +32,7 @@ jobs:
       uses: actions/checkout@v3
       with:
         repository: mosaicml/ci-testing
-        ref: v0.2.0
+        ref: v0.1.2
         path: ./ci-testing
     - uses: ./ci-testing/.github/actions/smoketest
       with:

@@ -6,6 +6,7 @@ FROM $BASE_IMAGE
 
 ARG BRANCH_NAME
 ARG DEP_GROUPS
+ARG TE_COMMIT
 
 ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0"
 
@@ -15,7 +16,7 @@ ADD https://raw.githubusercontent.com/mosaicml/llm-foundry/$BRANCH_NAME/setup.py
 RUN rm setup.py
 
 # Install TransformerEngine
-RUN NVTE_FRAMEWORK=pytorch CMAKE_BUILD_PARALLEL_LEVEL=4 MAX_JOBS=4 pip install git+https://github.com/NVIDIA/TransformerEngine.git@b5a7c9f
+RUN NVTE_FRAMEWORK=pytorch CMAKE_BUILD_PARALLEL_LEVEL=4 MAX_JOBS=4 pip install git+https://github.com/NVIDIA/TransformerEngine.git@$TE_COMMIT
 
 # Install and uninstall foundry to cache foundry requirements
 RUN git clone -b $BRANCH_NAME https://github.com/mosaicml/llm-foundry.git

@@ -158,7 +158,7 @@ def export_to_onnx(
         ort_session = ort.InferenceSession(str(output_file))
 
         for key, value in sample_input.items():
-            sample_input[key] = value.cpu().numpy()
+            sample_input[key] = value.cpu().numpy()  # pyright: ignore
 
         loaded_model_out = ort_session.run(None, sample_input)
 

@@ -57,7 +57,7 @@
     'accelerate>=0.25,<0.34',  # for HF inference `device_map`
     'transformers>=4.43.2,<4.44',
     'mosaicml-streaming>=0.8.1,<0.9',
-    'torch>=2.3.0,<2.4',
+    'torch>=2.3.0,<2.4.1',
     'datasets>=2.19,<2.20',
     'fsspec==2023.6.0',  # newer version results in a bug in datasets that duplicates data
     'sentencepiece==0.2.0',

diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py
@@ -85,7 +85,7 @@ def test_onnx_export(tie_word_embeddings: bool, tmp_path: pathlib.Path):
     ort_session = ort.InferenceSession(str(tmp_path / 'mpt.onnx'))
 
     for key, value in sample_input.items():
-        sample_input[key] = value.cpu().numpy()
+        sample_input[key] = value.cpu().numpy()  # pyright: ignore
 
     loaded_model_out = ort_session.run(None, sample_input)