Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BERT nightly benchmark on Inferentia1 #2167

Merged
merged 11 commits into from
Mar 20, 2023
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Benchmark torchserve cpu nightly
name: Benchmark torchserve nightly

on:
# run every day at 2:15am
Expand All @@ -7,7 +7,13 @@ on:

jobs:
nightly:
runs-on: [self-hosted, cpu]
strategy:
fail-fast: false
matrix:
hardware: [cpu, gpu, inf1]
runs-on:
- self-hosted
- ${{ matrix.hardware }}
timeout-minutes: 1320
steps:
- name: Clean up previous run
Expand All @@ -32,16 +38,26 @@ jobs:
uses: actions/checkout@v3
- name: Install dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y apache2-utils
pip install -r benchmarks/requirements-ab.txt
export omp_num_threads=1
sudo apt-get update -y
sudo apt-get install -y apache2-utils
pip install -r benchmarks/requirements-ab.txt
- name: Benchmark cpu nightly
if: ${{ matrix.hardware == 'cpu' }}
env:
OMP_NUM_THREADS: 1
run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_cpu.yaml --skip false
- name: Benchmark gpu nightly
if: ${{ matrix.hardware == 'gpu' }}
run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_gpu.yaml --skip false
- name: Benchmark inf1 nightly
if: ${{ matrix.hardware == 'inf1' }}
env:
NEURON_RT_NUM_CORES: 4
run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_neuron.yaml --skip false
- name: Save benchmark artifacts
uses: actions/upload-artifact@v2
with:
name: nightly cpu artifact
name: nightly ${{ matrix.hardware }} artifact
path: /tmp/ts_benchmark
- name: Download benchmark artifacts for auto validation
uses: dawidd6/action-download-artifact@v2
Expand All @@ -50,19 +66,19 @@ jobs:
workflow_conclusion: success
if_no_artifact_found: ignore
path: /tmp/ts_artifacts
name: cpu_benchmark_validation
name: ${{ matrix.hardware }}_benchmark_validation
- name: Update benchmark artifacts for auto validation
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/cpu_benchmark_validation
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
- name: Upload the updated benchmark artifacts for auto validation
uses: actions/upload-artifact@v2
with:
name: cpu_benchmark_validation
name: ${{ matrix.hardware }}_benchmark_validation
path: /tmp/ts_artifacts
- name: Open issue on failure
if: ${{ failure() && github.event_name == 'schedule' }}
if: ${{ failure() && github.event_name == 'schedule' && matrix.hardware == 'cpu' }}
uses: dacbd/create-issue-action@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
title: Nightly CPU benchmark failed
title: Nightly ${{ matrix.hardware }} benchmark failed
body: Commit ${{ github.sha }} daily scheduled [CI run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) failed, please check why
assignees: ''
60 changes: 0 additions & 60 deletions .github/workflows/benchmark_nightly_gpu.yml

This file was deleted.

6 changes: 3 additions & 3 deletions benchmarks/auto_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def load_config(self):
report_cmd = v

self.bm_config["model_config_path"] = (
"{}/cpu".format(MODEL_JSON_CONFIG_PATH)
if self.bm_config["hardware"] == "cpu"
else "{}/gpu".format(MODEL_JSON_CONFIG_PATH)
"{}/{}".format(MODEL_JSON_CONFIG_PATH, self.bm_config["hardware"])
if self.bm_config["hardware"] in ["cpu", "gpu", "neuron"]
else "{}/cpu".format(MODEL_JSON_CONFIG_PATH)
)

if self.skip_ts_install:
Expand Down
45 changes: 45 additions & 0 deletions benchmarks/benchmark_config_neuron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Torchserve version is to be installed. It can be one of the options
# - branch : "master"
# - nightly: "2022.3.16"
# - release: "0.5.3"
# Nightly build will be installed if "ts_version" is not specifiged
#ts_version:
# branch: &ts_version "master"

# a list of model configure yaml files defined in benchmarks/models_config
# or a list of model configure yaml files with full path
models:
- "bert_neuron.yaml"

# benchmark on "cpu", "gpu" or "neuron".
# "cpu" is set if "hardware" is not specified
hardware: &hardware "neuron"

# load prometheus metrics report to remote storage or local different path if "metrics_cmd" is set.
# the command line to load prometheus metrics report to remote system.
# Here is an example of AWS cloudwatch command:
# Note:
# - keep the values order as the same as the command definition.
# - set up the command before enabling `metrics_cmd`.
# For example, aws client and AWS credentials need to be setup before trying this example.
metrics_cmd:
- "cmd": "aws cloudwatch put-metric-data"
- "--namespace": ["torchserve_benchmark_nightly_", *hardware]
- "--region": "us-east-2"
- "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'

# load report to remote storage or local different path if "report_cmd" is set.
# the command line to load report to remote storage.
# Here is an example of AWS cloudwatch command:
# Note:
# - keep the values order as the same as the command.
# - set up the command before enabling `report_cmd`.
# For example, aws client, AWS credentials and S3 bucket
# need to be setup before trying this example.
# - "today()" is a keyword to apply current date in the path
# For example, the dest path in the following example is
# s3://torchserve-model-serving/benchmark/2022-03-18/gpu
report_cmd:
- "cmd": "aws s3 cp --recursive"
- "source": '/tmp/ts_benchmark/'
- "dest": ['s3://torchserve-benchmark/nightly', "today()", *hardware]
60 changes: 53 additions & 7 deletions benchmarks/models_config/bert_neuron.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,68 @@
---
bert_inf1:
bert_neuron_batch_1:
scripted_mode:
benchmark_engine: "ab"
compile_per_batch_size: True
url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_1.mar
workers:
- 4
batch_delay: 100
batch_size:
- 1
input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "neuron"

bert_neuron_batch_2:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_2.mar
workers:
- 4
batch_delay: 100
batch_size:
- 2
input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "neuron"

bert_neuron_batch_4:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_4.mar
workers:
- 4
batch_delay: 100
batch_size:
- 4
input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "local"
processors:
- "neuron"

bert_neuron_batch_8:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/BERTSeqClassification_torchscript_neuron_batch_8.mar
workers:
- 4
batch_delay: 100
batch_size:
- 8
input: "./benchmarks/automated/tests/resources/neuron-bert/input"
input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
requests: 10000
concurrency: 100
backend_profiling: False
exec_env: "aws_neuron_pytorch_p36"
exec_env: "local"
processors:
- "inferentia"
instance_types:
- "inf1.6xlarge"
- "neuron"
44 changes: 38 additions & 6 deletions examples/Huggingface_Transformers/Download_Transformer_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@


def transformers_model_dowloader(
mode, pretrained_model_name, num_labels, do_lower_case, max_length, torchscript
mode,
pretrained_model_name,
num_labels,
do_lower_case,
max_length,
torchscript,
hardware,
batch_size,
):
"""This function, save the checkpoint, config file along with tokenizer config and vocab files
of a transformer model of your choice.
Expand Down Expand Up @@ -98,11 +105,27 @@ def transformers_model_dowloader(
add_special_tokens=True,
return_tensors="pt",
)
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)
model.to(device).eval()
traced_model = torch.jit.trace(model, (input_ids, attention_mask))
torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
if hardware == "neuron":
import torch_neuron

input_ids = torch.cat([inputs["input_ids"]] * batch_size, 0).to(device)
attention_mask = torch.cat([inputs["attention_mask"]] * batch_size, 0).to(
device
)
traced_model = torch_neuron.trace(model, (input_ids, attention_mask))
torch.jit.save(
traced_model,
os.path.join(
NEW_DIR,
"traced_{}_model_neuron_batch_{}.pt".format(model_name, batch_size),
),
)
else:
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)
traced_model = torch.jit.trace(model, (input_ids, attention_mask))
torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
return


Expand All @@ -124,7 +147,16 @@ def transformers_model_dowloader(
torchscript = True
else:
torchscript = False
hardware = settings.get("hardware")
batch_size = int(settings.get("batch_size", "1"))

transformers_model_dowloader(
mode, model_name, num_labels, do_lower_case, max_length, torchscript
mode,
model_name,
num_labels,
do_lower_case,
max_length,
torchscript,
hardware,
batch_size,
)
Loading