pytorch · namannandan · Mar 20, 2023 · Mar 2, 2023 · Mar 2, 2023 · Mar 3, 2023
diff --git a/.github/workflows/benchmark_nightly_cpu.yml → .github/workflows/benchmark_nightly.yml b/.github/workflows/benchmark_nightly_cpu.yml → .github/workflows/benchmark_nightly.yml
@@ -1,4 +1,4 @@
-name: Benchmark torchserve cpu nightly
+name: Benchmark torchserve nightly
 
 on:
   # run every day at 2:15am
@@ -7,7 +7,13 @@ on:
 
 jobs:
   nightly:
-    runs-on: [self-hosted, cpu]
+    strategy:
+      fail-fast: false
+      matrix:
+        hardware: [cpu, gpu, inf1]
+    runs-on:
+      - self-hosted
+      - ${{ matrix.hardware }}
     timeout-minutes: 1320
     steps:
       - name: Clean up previous run
@@ -32,16 +38,26 @@ jobs:
         uses: actions/checkout@v3
       - name: Install dependencies
         run: |
-            sudo apt-get update -y
-            sudo apt-get install -y apache2-utils
-            pip install -r benchmarks/requirements-ab.txt
-            export omp_num_threads=1
+          sudo apt-get update -y
+          sudo apt-get install -y apache2-utils
+          pip install -r benchmarks/requirements-ab.txt
       - name: Benchmark cpu nightly
+        if: ${{ matrix.hardware == 'cpu' }}
+        env:
+          OMP_NUM_THREADS: 1
         run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_cpu.yaml --skip false
+      - name: Benchmark gpu nightly
+        if: ${{ matrix.hardware == 'gpu' }}
+        run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_gpu.yaml --skip false
+      - name: Benchmark inf1 nightly
+        if: ${{ matrix.hardware == 'inf1' }}
+        env:
+          NEURON_RT_NUM_CORES: 4
+        run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_neuron.yaml --skip false
       - name: Save benchmark artifacts
         uses: actions/upload-artifact@v2
         with:
-          name: nightly cpu artifact
+          name: nightly ${{ matrix.hardware }} artifact
           path: /tmp/ts_benchmark
       - name: Download benchmark artifacts for auto validation
         uses: dawidd6/action-download-artifact@v2
@@ -50,19 +66,19 @@ jobs:
           workflow_conclusion: success
           if_no_artifact_found: ignore
           path: /tmp/ts_artifacts
-          name: cpu_benchmark_validation
+          name: ${{ matrix.hardware }}_benchmark_validation
       - name: Update benchmark artifacts for auto validation
-        run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/cpu_benchmark_validation
+        run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
       - name: Upload the updated benchmark artifacts for auto validation
         uses: actions/upload-artifact@v2
         with:
-          name: cpu_benchmark_validation
+          name: ${{ matrix.hardware }}_benchmark_validation
           path: /tmp/ts_artifacts
       - name: Open issue on failure
-        if: ${{ failure() && github.event_name  == 'schedule' }}
+        if: ${{ failure() && github.event_name  == 'schedule' && matrix.hardware == 'cpu' }}
         uses: dacbd/create-issue-action@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
-          title: Nightly CPU benchmark failed
+          title: Nightly ${{ matrix.hardware }} benchmark failed
           body:  Commit ${{ github.sha }} daily scheduled [CI run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) failed, please check why
           assignees: ''
diff --git a/.github/workflows/benchmark_nightly_gpu.yml b/.github/workflows/benchmark_nightly_gpu.yml
diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py
@@ -95,9 +95,9 @@ def load_config(self):
                 report_cmd = v
 
         self.bm_config["model_config_path"] = (
-            "{}/cpu".format(MODEL_JSON_CONFIG_PATH)
-            if self.bm_config["hardware"] == "cpu"
-            else "{}/gpu".format(MODEL_JSON_CONFIG_PATH)
+            "{}/{}".format(MODEL_JSON_CONFIG_PATH, self.bm_config["hardware"])
+            if self.bm_config["hardware"] in ["cpu", "gpu", "neuron"]
+            else "{}/cpu".format(MODEL_JSON_CONFIG_PATH)
         )
 
         if self.skip_ts_install:

diff --git a/benchmarks/benchmark_config_neuron.yaml b/benchmarks/benchmark_config_neuron.yaml
@@ -0,0 +1,45 @@
+# Torchserve version is to be installed. It can be one of the options
+#  - branch : "master"
+#  - nightly: "2022.3.16"
+#  - release: "0.5.3"
+# Nightly build will be installed if "ts_version" is not specifiged
+#ts_version:
+#    branch: &ts_version "master"
+
+# a list of model configure yaml files defined in benchmarks/models_config
+# or a list of model configure yaml files with full path
+models:
+  - "bert_neuron.yaml"
+
+# benchmark on "cpu", "gpu" or "neuron".
+# "cpu" is set if "hardware" is not specified
+hardware: &hardware "neuron"
+
+# load prometheus metrics report to remote storage or local different path if "metrics_cmd" is set.
+# the command line to load prometheus metrics report to remote system.
+# Here is an example of AWS cloudwatch command:
+# Note:
+#    - keep the values order as the same as the command definition.
+#    - set up the command before enabling `metrics_cmd`.
+#      For example, aws client and AWS credentials need to be setup before trying this example.
+metrics_cmd:
+  - "cmd": "aws cloudwatch put-metric-data"
+  - "--namespace": ["torchserve_benchmark_nightly_", *hardware]
+  - "--region": "us-east-2"
+  - "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
+
+# load report to remote storage or local different path if "report_cmd" is set.
+# the command line to load report to remote storage.
+# Here is an example of AWS cloudwatch command:
+# Note:
+#    - keep the values order as the same as the command.
+#    - set up the command before enabling `report_cmd`.
+#      For example, aws client, AWS credentials and S3 bucket
+#      need to be setup before trying this example.
+#    - "today()" is a keyword to apply current date in the path
+#      For example, the dest path in the following example is
+#      s3://torchserve-model-serving/benchmark/2022-03-18/gpu
+report_cmd:
+  - "cmd": "aws s3 cp --recursive"
+  - "source": '/tmp/ts_benchmark/'
+  - "dest": ['s3://torchserve-benchmark/nightly', "today()", *hardware]
diff --git a/benchmarks/models_config/bert_neuron.yaml b/benchmarks/models_config/bert_neuron.yaml
@@ -1,22 +1,68 @@
 ---
-bert_inf1:
+bert_neuron_batch_1:
     scripted_mode:
         benchmark_engine: "ab"
-        compile_per_batch_size: True
+        url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_1.mar
         workers:
             - 4
         batch_delay: 100
         batch_size:
             - 1
+        input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "neuron"
+
+bert_neuron_batch_2:
+    scripted_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_2.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
             - 2
+        input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "neuron"
+
+bert_neuron_batch_4:
+    scripted_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_4.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
             - 4
+        input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "neuron"
+
+bert_neuron_batch_8:
+    scripted_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_8.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
             - 8
-        input: "./benchmarks/automated/tests/resources/neuron-bert/input"
+        input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
         requests: 10000
         concurrency: 100
         backend_profiling: False
-        exec_env: "aws_neuron_pytorch_p36"
+        exec_env: "local"
         processors:
-            - "inferentia"
-instance_types:
-    - "inf1.6xlarge"
+            - "neuron"
diff --git a/examples/Huggingface_Transformers/Download_Transformer_models.py b/examples/Huggingface_Transformers/Download_Transformer_models.py
@@ -20,7 +20,14 @@
 
 
 def transformers_model_dowloader(
-    mode, pretrained_model_name, num_labels, do_lower_case, max_length, torchscript
+    mode,
+    pretrained_model_name,
+    num_labels,
+    do_lower_case,
+    max_length,
+    torchscript,
+    hardware,
+    batch_size,
 ):
     """This function, save the checkpoint, config file along with tokenizer config and vocab files
     of a transformer model of your choice.
@@ -98,11 +105,27 @@ def transformers_model_dowloader(
             add_special_tokens=True,
             return_tensors="pt",
         )
-        input_ids = inputs["input_ids"].to(device)
-        attention_mask = inputs["attention_mask"].to(device)
         model.to(device).eval()
-        traced_model = torch.jit.trace(model, (input_ids, attention_mask))
-        torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
+        if hardware == "neuron":
+            import torch_neuron
+
+            input_ids = torch.cat([inputs["input_ids"]] * batch_size, 0).to(device)
+            attention_mask = torch.cat([inputs["attention_mask"]] * batch_size, 0).to(
+                device
+            )
+            traced_model = torch_neuron.trace(model, (input_ids, attention_mask))
+            torch.jit.save(
+                traced_model,
+                os.path.join(
+                    NEW_DIR,
+                    "traced_{}_model_neuron_batch_{}.pt".format(model_name, batch_size),
+                ),
+            )
+        else:
+            input_ids = inputs["input_ids"].to(device)
+            attention_mask = inputs["attention_mask"].to(device)
+            traced_model = torch.jit.trace(model, (input_ids, attention_mask))
+            torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
     return
 
 
@@ -124,7 +147,16 @@ def transformers_model_dowloader(
         torchscript = True
     else:
         torchscript = False
+    hardware = settings.get("hardware")
+    batch_size = int(settings.get("batch_size", "1"))
 
     transformers_model_dowloader(
-        mode, model_name, num_labels, do_lower_case, max_length, torchscript
+        mode,
+        model_name,
+        num_labels,
+        do_lower_case,
+        max_length,
+        torchscript,
+        hardware,
+        batch_size,
     )