From e0b62109cf7959d9f8b7229ad43897f7f9b2fdad Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 27 Feb 2025 12:24:55 -0800
Subject: [PATCH 1/3] Fail the benchmark job if the export step fails

---
 .github/workflows/android-perf.yml                         | 7 +++++--
 .github/workflows/apple-perf.yml                           | 7 +++++--
 .../benchmark/android-llm-device-farm-test-spec.yml.j2     | 2 +-
 .../default-ios-device-farm-appium-test-spec.yml.j2        | 2 +-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index 201fb3b7a8f..4b2b17d37fa 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -20,7 +20,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
@@ -36,7 +36,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
@@ -173,6 +173,9 @@ jobs:
       upload-artifact-to-s3: true
       secrets-env: EXECUTORCH_HF_TOKEN
       script: |
+        # TESTING
+        exit 1
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         echo "::group::Setting up dev environment"
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index ea88be441cb..c0665577e86 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -20,7 +20,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
@@ -36,7 +36,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
@@ -175,6 +175,9 @@ jobs:
       script: |
         set -eux
 
+        # TESTING
+        exit 1
+
         echo "::group::Setting up CI environment"
         .ci/scripts/setup-conda.sh
 
diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
index ae25a071e5c..da928cabc9e 100644
--- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
+++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
@@ -12,7 +12,7 @@ phases:
       - echo "The benchmark config is {{ benchmark_config_id }}"
 
       # Download the model from S3
-      - curl -s --fail '{{ model_path }}' -o model.zip
+      - curl -s --fail '{{ model_path }}' -o model.zip || false
       - unzip model.zip && ls -la
 
       # Copy the model to sdcard. This prints too much progress info when the files
diff --git a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
index 05816685638..67fc2862934 100644
--- a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
+++ b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
@@ -14,7 +14,7 @@ phases:
       - echo "The benchmark config is {{ benchmark_config_id }}"
 
       # Download the model from S3
-      - curl -s --fail '{{ model_path }}' -o model.zip
+      - curl -s --fail '{{ model_path }}' -o model.zip || false
       - unzip model.zip && ls -la
 
       # Extract the app

From 353073110c17368826a92d3ea243f8f402ce5c76 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 27 Feb 2025 14:21:23 -0800
Subject: [PATCH 2/3] Try to run it in the test phase

---
 .../benchmark/android-llm-device-farm-test-spec.yml.j2     | 6 +++++-
 .../default-ios-device-farm-appium-test-spec.yml.j2        | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
index da928cabc9e..1ed5ede738c 100644
--- a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
+++ b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
@@ -12,7 +12,7 @@ phases:
       - echo "The benchmark config is {{ benchmark_config_id }}"
 
       # Download the model from S3
-      - curl -s --fail '{{ model_path }}' -o model.zip || false
+      - curl -s --fail '{{ model_path }}' -o model.zip
       - unzip model.zip && ls -la
 
       # Copy the model to sdcard. This prints too much progress info when the files
@@ -35,6 +35,10 @@ phases:
 
   test:
     commands:
+      # Fail the test if the model doesn't exist, doing it here so that AWS can report the status back
+      - echo "Verify model"
+      - curl -I --fail '{{ model_path }}' || false
+
       # By default, the following ADB command is used by Device Farm to run your Instrumentation test.
       # Please refer to Android's documentation for more options on running instrumentation tests with adb:
       # https://developer.android.com/studio/test/command-line#run-tests-with-adb
diff --git a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
index 67fc2862934..a24c0257100 100644
--- a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
+++ b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
@@ -14,7 +14,7 @@ phases:
       - echo "The benchmark config is {{ benchmark_config_id }}"
 
       # Download the model from S3
-      - curl -s --fail '{{ model_path }}' -o model.zip || false
+      - curl -s --fail '{{ model_path }}' -o model.zip
       - unzip model.zip && ls -la
 
       # Extract the app
@@ -34,6 +34,11 @@ phases:
   # The test phase includes commands that run your test suite execution.
   test:
     commands:
+      # Fail the test if the model doesn't exist, doing it here so that AWS can report the status back
+      - echo "Verify model"
+      - curl -I --fail '{{ model_path }}' || false
+
+      # Run the benchmark
       - xcodebuild test-without-building -destination id=$DEVICEFARM_DEVICE_UDID -xctestrun $DEVICEFARM_TEST_PACKAGE_PATH/*.xctestrun -derivedDataPath $DEVICEFARM_LOG_DIR
 
   # The post test phase includes are commands that are run after your tests are executed.

From 5cb328d616be30e0c40159a64f38bdc0d0c172b9 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 27 Feb 2025 18:40:42 -0800
Subject: [PATCH 3/3] Ready for review

---
 .github/workflows/android-perf.yml | 3 ---
 .github/workflows/apple-perf.yml   | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index 4b2b17d37fa..d3a16428b57 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -173,9 +173,6 @@ jobs:
       upload-artifact-to-s3: true
       secrets-env: EXECUTORCH_HF_TOKEN
       script: |
-        # TESTING
-        exit 1
-
         # The generic Linux job chooses to use base env, not the one setup by the image
         echo "::group::Setting up dev environment"
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index c0665577e86..df29e44eac1 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -175,9 +175,6 @@ jobs:
       script: |
         set -eux
 
-        # TESTING
-        exit 1
-
         echo "::group::Setting up CI environment"
         .ci/scripts/setup-conda.sh