Skip to content

Commit

Permalink
Revert changes to FastPitch and BERT on PyTorch
Browse files Browse the repository at this point in the history
  • Loading branch information
shakandrew committed Aug 13, 2021
1 parent a860701 commit 6a64283
Show file tree
Hide file tree
Showing 16 changed files with 438 additions and 441 deletions.
8 changes: 4 additions & 4 deletions PyTorch/LanguageModeling/BERT/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.12-py3
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.06-py3
FROM nvcr.io/nvidia/tritonserver:20.06-v1-py3-clientsdk as trt
FROM ${FROM_IMAGE_NAME}
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract
Expand All @@ -33,8 +33,8 @@ RUN pip install /workspace/install/python/tensorrtserver*.whl
WORKDIR /workspace/bert
RUN pip install --upgrade --no-cache-dir pip \
&& pip install --no-cache-dir \
tqdm boto3 requests six ipdb h5py html2text nltk onnxruntime tokenizers==0.7\
git+https://github.com/NVIDIA/dllogger@a20b622 wget
tqdm boto3 requests six ipdb h5py html2text nltk progressbar onnxruntime \
git+https://github.com/NVIDIA/dllogger wget

RUN apt-get install -y iputils-ping

Expand Down
2 changes: 1 addition & 1 deletion PyTorch/SpeechSynthesis/FastPitch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ RUN apt-get update && apt-get install -y libb64-dev libb64-0d

# Install Triton Client Python API and copy Perf Client
COPY --from=triton-client /workspace/install/ /workspace/install/
RUN find /workspace/install/python/ -iname triton*manylinux*.whl -exec pip install {}[all] \;
RUN pip install /workspace/install/python/triton*.whl

# Setup environment variables to access Triton Client binaries and libs
ENV PATH /workspace/install/bin:${PATH}
Expand Down
2 changes: 1 addition & 1 deletion PyTorch/SpeechSynthesis/FastPitch/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def main():
with torch.no_grad():
if generator is not None:
b = batches[0]
mel, *_ = generator(b['text'], b['text_lens'])
mel, *_ = generator(b['text'])
if waveglow is not None:
audios = waveglow(mel, sigma=args.sigma_infer).float()
_ = denoiser(audios, strength=args.denoising_strength)
Expand Down
10 changes: 1 addition & 9 deletions PyTorch/SpeechSynthesis/FastPitch/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,11 @@
import os
import re
import time
import warnings
from collections import defaultdict, OrderedDict
from contextlib import contextmanager

import numpy as np
try:
import nvidia_dlprof_pytorch_nvtx as pyprof
except ModuleNotFoundError:
try:
import pyprof
except ModuleNotFoundError:
warnings.warn('PyProf is unavailable')

import nvidia_dlprof_pytorch_nvtx as pyprof
import torch
import torch.cuda.profiler as profiler
import torch.distributed as dist
Expand Down
54 changes: 45 additions & 9 deletions PyTorch/SpeechSynthesis/FastPitch/triton/calculate_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
r"""
Using `calculate_metrics.py` script, you can obtain model accuracy/error metrics using defined `MetricsCalculator` class.
Data provided to `MetricsCalculator` are obtained from dump files
Data provided to `MetricsCalculator` are obtained from npz dump files
stored in directory pointed by `--dump-dir` argument.
Above files are prepared by `run_inference_on_fw.py` and `run_inference_on_triton.py` scripts.
Expand All @@ -40,24 +40,49 @@
import string
from pathlib import Path

import numpy as np

# method from PEP-366 to support relative import in executed modules

if __package__ is None:
__package__ = Path(__file__).parent.name

from .deployment_toolkit.args import ArgParserGenerator
from .deployment_toolkit.core import BaseMetricsCalculator, load_from_file
from .deployment_toolkit.dump import JsonDumpReader
from .deployment_toolkit.dump import pad_except_batch_axis

LOGGER = logging.getLogger("calculate_metrics")
TOTAL_COLUMN_NAME = "_total_"


def get_data(dump_dir, prefix):
"""Loads and concatenates dump files for given prefix (ex. inputs, outputs, labels, ids)"""
dump_dir = Path(dump_dir)
npz_files = sorted(dump_dir.glob(f"{prefix}*.npz"))
data = None
if npz_files:
# assume that all npz files with given prefix contain same set of names
names = list(np.load(npz_files[0].as_posix()).keys())
# calculate target shape
target_shape = {
name: tuple(np.max([np.load(npz_file.as_posix())[name].shape for npz_file in npz_files], axis=0))
for name in names
}
# pad and concatenate data
data = {
name: np.concatenate(
[pad_except_batch_axis(np.load(npz_file.as_posix())[name], target_shape[name]) for npz_file in npz_files]
)
for name in names
}
return data


def main():
logging.basicConfig(level=logging.INFO)

parser = argparse.ArgumentParser(description="Run models with given dataloader", allow_abbrev=False)
parser.add_argument("--metrics", help="Path to python module containing metrics calculator", required=True)
parser.add_argument("--metrics", help=f"Path to python module containing metrics calculator", required=True)
parser.add_argument("--csv", help="Path to csv file", required=True)
parser.add_argument("--dump-dir", help="Path to directory with dumped outputs (and labels)", required=True)

Expand All @@ -68,18 +93,29 @@ def main():

args = parser.parse_args()

LOGGER.info("args:")
LOGGER.info(f"args:")
for key, value in vars(args).items():
LOGGER.info(f" {key} = {value}")

MetricsCalculator = load_from_file(args.metrics, "metrics", "MetricsCalculator")
metrics_calculator: BaseMetricsCalculator = ArgParserGenerator(MetricsCalculator).from_args(args)

reader = JsonDumpReader(args.dump_dir)
for ids, x, y_true, y_pred in reader.iterate_over(["ids", "inputs", "labels", "outputs"]):
ids = list(ids["ids"]) if ids is not None else None
metrics_calculator.update(ids=ids, x=x, y_pred=y_pred, y_real=y_true)
metrics = metrics_calculator.metrics
ids = get_data(args.dump_dir, "ids")["ids"]
x = get_data(args.dump_dir, "inputs")
y_true = get_data(args.dump_dir, "labels")
y_pred = get_data(args.dump_dir, "outputs")

common_keys = list({k for k in (y_true or [])} & {k for k in (y_pred or [])})
for key in common_keys:
if y_true[key].shape != y_pred[key].shape:
LOGGER.warning(
f"Model predictions and labels shall have equal shapes. "
f"y_pred[{key}].shape={y_pred[key].shape} != "
f"y_true[{key}].shape={y_true[key].shape}"
)

metrics = metrics_calculator.calc(ids=ids, x=x, y_pred=y_pred, y_real=y_true)
metrics = {TOTAL_COLUMN_NAME: len(ids), **metrics}

metric_names_with_space = [name for name in metrics if any([c in string.whitespace for c in name])]
if metric_names_with_space:
Expand Down
202 changes: 202 additions & 0 deletions PyTorch/SpeechSynthesis/FastPitch/triton/config_model_on_triton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#!/usr/bin/env python3

# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

r"""
To configure model on Triton, you can use `config_model_on_triton.py` script.
This will prepare layout of Model Repository, including Model Configuration.
```shell script
python ./triton/config_model_on_triton.py \
--model-repository /model_repository \
--model-path /models/exported/model.onnx \
--model-format onnx \
--model-name ResNet50 \
--model-version 1 \
--max-batch-size 32 \
--precision fp16 \
--backend-accelerator trt \
--load-model explicit \
--timeout 120 \
--verbose
```
If Triton server to which we prepare model repository is running with **explicit model control mode**,
use `--load-model` argument to send request load_model request to Triton Inference Server.
If server is listening on non-default address or port use `--server-url` argument to point server control endpoint.
If it is required to use HTTP protocol to communicate with Triton server use `--http` argument.
To improve inference throughput you can use
[dynamic batching](https://github.com/triton-inference-server/server/blob/master/docs/model_configuration.md#dynamic-batcher)
for your model by providing `--preferred-batch-sizes` and `--max-queue-delay-us` parameters.
For models which doesn't support batching, set `--max-batch-sizes` to 0.
By default Triton will [automatically obtain inputs and outputs definitions](https://github.com/triton-inference-server/server/blob/master/docs/model_configuration.md#auto-generated-model-configuration).
but for TorchScript ang TF GraphDef models script uses file with I/O specs. This file is automatically generated
when the model is converted to ScriptModule (either traced or scripted).
If there is a need to pass different than default path to I/O spec file use `--io-spec` CLI argument.
I/O spec file is yaml file with below structure:
```yaml
- inputs:
- name: input
dtype: float32 # np.dtype name
shape: [None, 224, 224, 3]
- outputs:
- name: probabilities
dtype: float32
shape: [None, 1001]
- name: classes
dtype: int32
shape: [None, 1]
```
"""

import argparse
import logging
import time

from model_navigator import Accelerator, Format, Precision
from model_navigator.args import str2bool
from model_navigator.log import set_logger, log_dict
from model_navigator.triton import ModelConfig, TritonClient, TritonModelStore

LOGGER = logging.getLogger("config_model")


def _available_enum_values(my_enum):
return [item.value for item in my_enum]


def main():
parser = argparse.ArgumentParser(
description="Create Triton model repository and model configuration", allow_abbrev=False
)
parser.add_argument("--model-repository", required=True, help="Path to Triton model repository.")
parser.add_argument("--model-path", required=True, help="Path to model to configure")

# TODO: automation
parser.add_argument(
"--model-format",
required=True,
choices=_available_enum_values(Format),
help="Format of model to deploy",
)
parser.add_argument("--model-name", required=True, help="Model name")
parser.add_argument("--model-version", default="1", help="Version of model (default 1)")
parser.add_argument(
"--max-batch-size",
type=int,
default=32,
help="Maximum batch size allowed for inference. "
"A max_batch_size value of 0 indicates that batching is not allowed for the model",
)
# TODO: automation
parser.add_argument(
"--precision",
type=str,
default=Precision.FP16.value,
choices=_available_enum_values(Precision),
help="Model precision (parameter used only by Tensorflow backend with TensorRT optimization)",
)

# Triton Inference Server endpoint
parser.add_argument(
"--server-url",
type=str,
default="grpc://localhost:8001",
help="Inference server URL in format protocol://host[:port] (default grpc://localhost:8001)",
)
parser.add_argument(
"--load-model",
choices=["none", "poll", "explicit"],
help="Loading model while Triton Server is in given model control mode",
)
parser.add_argument(
"--timeout", default=120, help="Timeout in seconds to wait till model load (default=120)", type=int
)

# optimization related
parser.add_argument(
"--backend-accelerator",
type=str,
choices=_available_enum_values(Accelerator),
default=Accelerator.TRT.value,
help="Select Backend Accelerator used to serve model",
)
parser.add_argument("--number-of-model-instances", type=int, default=1, help="Number of model instances per GPU")
parser.add_argument(
"--preferred-batch-sizes",
type=int,
nargs="*",
help="Batch sizes that the dynamic batcher should attempt to create. "
"In case --max-queue-delay-us is set and this parameter is not, default value will be --max-batch-size",
)
parser.add_argument(
"--max-queue-delay-us",
type=int,
default=0,
help="Max delay time which dynamic batcher shall wait to form a batch (default 0)",
)
parser.add_argument(
"--capture-cuda-graph",
type=int,
default=0,
help="Use cuda capture graph (used only by TensorRT platform)",
)

parser.add_argument("-v", "--verbose", help="Provide verbose logs", type=str2bool, default=False)
args = parser.parse_args()

set_logger(verbose=args.verbose)
log_dict("args", vars(args))

config = ModelConfig.create(
model_path=args.model_path,
# model definition
model_name=args.model_name,
model_version=args.model_version,
model_format=args.model_format,
precision=args.precision,
max_batch_size=args.max_batch_size,
# optimization
accelerator=args.backend_accelerator,
gpu_engine_count=args.number_of_model_instances,
preferred_batch_sizes=args.preferred_batch_sizes or [],
max_queue_delay_us=args.max_queue_delay_us,
capture_cuda_graph=args.capture_cuda_graph,
)

model_store = TritonModelStore(args.model_repository)
model_store.deploy_model(model_config=config, model_path=args.model_path)

if args.load_model != "none":
client = TritonClient(server_url=args.server_url, verbose=args.verbose)
client.wait_for_server_ready(timeout=args.timeout)

if args.load_model == "explicit":
client.load_model(model_name=args.model_name)

if args.load_model == "poll":
time.sleep(15)

client.wait_for_model(model_name=args.model_name, model_version=args.model_version, timeout_s=args.timeout)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.6.13-4-g623fb7c3
0.5.0-2-gd556907
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def add_args_for_fn_signature(parser, fn) -> argparse.ArgumentParser:
if parameter.annotation == bool:
argument_kwargs["type"] = str2bool
argument_kwargs["choices"] = [0, 1]
elif isinstance(parameter.annotation, type(Optional[Any])):
elif type(parameter.annotation) == type(Union): # isinstance(parameter.annotation, type(Optional[Any])):
types = [type_ for type_ in parameter.annotation.__args__ if not isinstance(None, type_)]
if len(types) != 1:
raise RuntimeError(
Expand Down
Loading

0 comments on commit 6a64283

Please sign in to comment.