From a6cb4423bb0a2a221e82019e5db6adf8b215684f Mon Sep 17 00:00:00 2001
From: "Xida Ren (Cedar)" <cedar.ren@gmail.com>
Date: Thu, 21 Nov 2024 19:21:14 -0500
Subject: [PATCH 1/2] Fix some linux developer_guide.md comments that didn't
 show up at time of merge (#588)

[skip ci]

Clicked merge before some of #575 's comments showed up.
---
 docs/developer_guide.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/docs/developer_guide.md b/docs/developer_guide.md
index 6cd5f83a8..73aee61f7 100644
--- a/docs/developer_guide.md
+++ b/docs/developer_guide.md
@@ -15,8 +15,8 @@ sudo apt update && sudo apt install -y clang lld
 
 Install:
 
-```
-python-is-python3 python3-venv python3-dev
+```bash
+sudo apt install python-is-python3 python3-venv python3-dev
 ```
 
 <details>
@@ -24,6 +24,8 @@ python-is-python3 python3-venv python3-dev
 <summary> Or, alternatively, use `pyenv` to manage a separate python installation for more control over its version: </summary>
 
 
+The following instructions are taken from pyenv's guide here: https://github.com/pyenv/pyenv?tab=readme-ov-file#a-getting-pyenv
+
 First, install pyenv and its dependencies.
 
 ```bash

From fd15aa5a30f842472faf819e60285c38e8adfa7b Mon Sep 17 00:00:00 2001
From: Avinash Sharma <avinash.sharma@amd.com>
Date: Thu, 21 Nov 2024 17:26:09 -0800
Subject: [PATCH 2/2] Fix publish dir of llama tests, update xpass 8b test and
 405b fp8 test failures (#580)

Fixes publish dir of llama tests to `out/llm/llama/benchmarks`, update
xpass 8b test (`testBenchmark8B_f16_Non_Decomposed_Prefill`) and 405b
fp8 test failures (`testBenchmark405B_fp8_TP8_Decomposed` and
`testBenchmark405B_fp8_TP8_Non_Decomposed`).

---------

Signed-off-by: aviator19941 <avinash.sharma@amd.com>
Co-authored-by: saienduri <77521230+saienduri@users.noreply.github.com>
---
 .github/workflows/ci-llama-large-tests.yaml         |  7 ++++---
 .../tests/models/llama/benchmark_amdgpu_test.py     | 13 +++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml
index 34e91cebb..ae53d3f38 100644
--- a/.github/workflows/ci-llama-large-tests.yaml
+++ b/.github/workflows/ci-llama-large-tests.yaml
@@ -8,6 +8,7 @@ name: Llama Benchmarking Tests
 
 on:
   workflow_dispatch:
+  pull_request:
   schedule:
     # Weekdays at 4:00 AM UTC = 9:00 PM PST.
     - cron: "0 4 * * 1-5"
@@ -76,14 +77,14 @@ jobs:
             iree-base-runtime
 
       - name: Run llama tests
-        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/index.html
+        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/llm/llama/benchmark/index.html
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
         with:
           github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
-          publish_dir: ./out/llm/llama/benchmarks
-          destination_dir: ./llm/llama/benchmarks
+          publish_dir: ./out/llm/llama/benchmark
+          destination_dir: ./llm/llama/benchmark
           keep_files: true
 
       - name: Upload llama executable files
diff --git a/sharktank/tests/models/llama/benchmark_amdgpu_test.py b/sharktank/tests/models/llama/benchmark_amdgpu_test.py
index 125a0cfdc..751615a85 100644
--- a/sharktank/tests/models/llama/benchmark_amdgpu_test.py
+++ b/sharktank/tests/models/llama/benchmark_amdgpu_test.py
@@ -197,7 +197,6 @@ def testBenchmark8B_f16_Decomposed(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
     def testBenchmark8B_f16_Non_Decomposed_Prefill(self):
         output_file_name = self.dir_path_8b / "f16_torch_prefill"
         output_mlir = self.llama8b_f16_torch_sdpa_artifacts.create_file(
@@ -780,7 +779,9 @@ def testBenchmark405B_f16_TP8_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(
+        reason="Benchmarking Error", strict=True, raises=IreeBenchmarkException
+    )
     def testBenchmark405B_f16_TP8_Non_Decomposed(self):
         output_file_name = self.dir_path_405b / "f16_torch"
         output_mlir = self.llama405b_f16_torch_sdpa_artifacts.create_file(
@@ -828,7 +829,9 @@ def testBenchmark405B_f16_TP8_Non_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(
+        reason="KeyError in theta.py", strict=True, raises=ExportMlirException
+    )
     def testBenchmark405B_fp8_TP8_Decomposed(self):
         output_file_name = self.dir_path_405b / "fp8_decomposed"
         output_mlir = self.llama405b_fp8_decomposed_artifacts.create_file(
@@ -874,7 +877,9 @@ def testBenchmark405B_fp8_TP8_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(
+        reason="KeyError in theta.py", strict=True, raises=ExportMlirException
+    )
     def testBenchmark405B_fp8_TP8_Non_Decomposed(self):
         output_file_name = self.dir_path_405b / "fp8_torch"
         output_mlir = self.llama405b_fp8_torch_sdpa_artifacts.create_file(