diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index 34e91cebb..ae53d3f38 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -8,6 +8,7 @@ name: Llama Benchmarking Tests on: workflow_dispatch: + pull_request: schedule: # Weekdays at 4:00 AM UTC = 9:00 PM PST. - cron: "0 4 * * 1-5" @@ -76,14 +77,14 @@ jobs: iree-base-runtime - name: Run llama tests - run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/index.html + run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/llm/llama/benchmark/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }} - publish_dir: ./out/llm/llama/benchmarks - destination_dir: ./llm/llama/benchmarks + publish_dir: ./out/llm/llama/benchmark + destination_dir: ./llm/llama/benchmark keep_files: true - name: Upload llama executable files diff --git a/docs/developer_guide.md b/docs/developer_guide.md index 6cd5f83a8..73aee61f7 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -15,8 +15,8 @@ sudo apt update && sudo apt install -y clang lld Install: -``` -python-is-python3 python3-venv python3-dev +```bash +sudo apt install python-is-python3 python3-venv python3-dev ```
@@ -24,6 +24,8 @@ python-is-python3 python3-venv python3-dev Or, alternatively, use `pyenv` to manage a separate python installation for more control over its version: +The following instructions are taken from pyenv's guide here: https://github.com/pyenv/pyenv?tab=readme-ov-file#a-getting-pyenv + First, install pyenv and its dependencies. ```bash diff --git a/sharktank/tests/models/llama/benchmark_amdgpu_test.py b/sharktank/tests/models/llama/benchmark_amdgpu_test.py index 125a0cfdc..751615a85 100644 --- a/sharktank/tests/models/llama/benchmark_amdgpu_test.py +++ b/sharktank/tests/models/llama/benchmark_amdgpu_test.py @@ -197,7 +197,6 @@ def testBenchmark8B_f16_Decomposed(self): ) @skipif_run_quick_llama_test - @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException) def testBenchmark8B_f16_Non_Decomposed_Prefill(self): output_file_name = self.dir_path_8b / "f16_torch_prefill" output_mlir = self.llama8b_f16_torch_sdpa_artifacts.create_file( @@ -780,7 +779,9 @@ def testBenchmark405B_f16_TP8_Decomposed(self): cwd=self.repo_root, ) - @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException) + @pytest.mark.xfail( + reason="Benchmarking Error", strict=True, raises=IreeBenchmarkException + ) def testBenchmark405B_f16_TP8_Non_Decomposed(self): output_file_name = self.dir_path_405b / "f16_torch" output_mlir = self.llama405b_f16_torch_sdpa_artifacts.create_file( @@ -828,7 +829,9 @@ def testBenchmark405B_f16_TP8_Non_Decomposed(self): cwd=self.repo_root, ) - @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException) + @pytest.mark.xfail( + reason="KeyError in theta.py", strict=True, raises=ExportMlirException + ) def testBenchmark405B_fp8_TP8_Decomposed(self): output_file_name = self.dir_path_405b / "fp8_decomposed" output_mlir = self.llama405b_fp8_decomposed_artifacts.create_file( @@ -874,7 +877,9 @@ def testBenchmark405B_fp8_TP8_Decomposed(self): cwd=self.repo_root, ) - @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException) + @pytest.mark.xfail( + reason="KeyError in theta.py", strict=True, raises=ExportMlirException + ) def testBenchmark405B_fp8_TP8_Non_Decomposed(self): output_file_name = self.dir_path_405b / "fp8_torch" output_mlir = self.llama405b_fp8_torch_sdpa_artifacts.create_file(