diff --git a/examples/.config/model_params_keras_3x.json b/examples/.config/model_params_keras_3x.json new file mode 100644 index 00000000000..bac8a06b4a3 --- /dev/null +++ b/examples/.config/model_params_keras_3x.json @@ -0,0 +1,18 @@ +{ + "keras": { + "resnetv2_50": { + "model_src_dir": "keras/image_recognition/resnet_v2_50/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/resnetv2_50_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + }, + "inception_v3": { + "model_src_dir": "keras/image_recognition/inception_v3/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/inception_v3_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + } + } +} diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 70e1497e508..550f4d01219 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -8,6 +8,20 @@ "batch_size": 64, "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb" }, + "distilbert_base": { + "model_src_dir": "nlp/distilbert_base/quantization/ptq", + "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", + "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", + "main_script": "main.py", + "batch_size": 128 + }, + "distilbert_base_sq": { + "model_src_dir": "nlp/distilbert_base/quantization/ptq", + "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", + "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", + "main_script": "main.py", + "batch_size": 128 + }, "opt_125m_sq": { "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant", "dataset_location": "", @@ -29,6 +43,42 @@ "main_script": "main.py", "batch_size": 1 }, + "transformer_lt": { + "model_src_dir": "nlp/transformer_lt/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/data", + "input_model": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb", + "main_script": "main.py", + "batch_size": 64 + }, + "inception_v3": { + "model_src_dir": "image_recognition/inception_v3/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-trained-models/inceptionv3/fp32/freezed_inceptionv3.pb", + "main_script": "main.py", + "batch_size": 32, + "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb" + }, + "mobilenetv2": { + "model_src_dir": "image_recognition/mobilenet_v2/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "resnetv2_50": { + "model_src_dir": "image_recognition/resnet_v2_50/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_resnet_v2_50.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "vgg16": { + "model_src_dir": "image_recognition/vgg16/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb", + "main_script": "main.py", + "batch_size": 32 + }, "ViT": { "model_src_dir": "image_recognition/vision_transformer/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", @@ -42,6 +92,63 @@ "input_model": "/tf_dataset/tensorflow/graphsage/graphsage_frozen_model.pb", "main_script": "main.py", "batch_size": 1000 + }, + "faster_rcnn_resnet50": { + "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "mask_rcnn_inception_v2": { + "model_src_dir": "object_detection/mask_rcnn_inception_v2/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/mask_rcnn_inception_v2/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "mask_rcnn_inception_v2_ckpt": { + "model_src_dir": "object_detection/mask_rcnn_inception_v2/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/mask_rcnn_inception_v2", + "main_script": "main.py", + "batch_size": 10 + }, + "ssd_mobilenet_v1": { + "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "ssd_mobilenet_v1_ckpt": { + "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1", + "main_script": "main.py", + "batch_size": 10 + }, + "wide_deep_large_ds": { + "model_src_dir": "recommendation/wide_deep_large_ds/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/wide_deep_large_ds/dataset", + "input_model": "/tf_dataset/tensorflow/wide_deep_large_ds/fp32_optimized_graph.pb", + "main_script": "main.py", + "batch_size": 256, + "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/wide_deep_fp32_pretrained_model.pb" + }, + "3dunet-mlperf": { + "model_src_dir": "semantic_image_segmentation/3dunet-mlperf/quantization/ptq", + "dataset_location": "/tf_dataset2/models/tensorflow/3dunet/build", + "input_model": "/tf_dataset2/models/tensorflow/3dunet/3dunet_dynamic_ndhwc.pb", + "main_script": "main.py", + "batch_size": 100 + }, + "style_transfer": { + "model_src_dir": "style_transfer/arbitrary_style_transfer/quantization/ptq", + "dataset_location": "style_images,content_images", + "input_model": "/tf_dataset/tensorflow/style_transfer/arbitrary_style_transfer/model.ckpt", + "main_script": "main.py", + "batch_size": 1 } } } diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md new file mode 100644 index 00000000000..34eb64fcf74 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md @@ -0,0 +1,75 @@ +Step-by-Step +============ + +This document list steps of reproducing inception_v3 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The models is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model + + Download pre-trained PB + ```shell + wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/inceptionv3_fp32_pretrained_model.pb \ + --output_model=./nc_inception_v3.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_inception_v3.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_inception_v3.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py new file mode 100644 index 00000000000..ecfca2348cd --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py @@ -0,0 +1,511 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py new file mode 100644 index 00000000000..9b0f737b619 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py @@ -0,0 +1,144 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=299, width=299), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset) + + # maybe we need to exclud bf16 + quant_config = StaticQuantConfig() + conv_config = StaticQuantConfig(weight_dtype="fp32", act_dtype="fp32") + quant_config.set_local("v0/cg/conv0/conv2d/Conv2D", conv_config) + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=299, width=299), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + def eval(model): + top1 = TopKMetric(k=1) + return evaluate(model, dataloader, top1) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md new file mode 100644 index 00000000000..25755074a06 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md @@ -0,0 +1,108 @@ +Step-by-Step +============ + +This document list steps of reproducing mobilenet_v2 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The model is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model + +The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz + tar -xvf mobilenet_v2_1.4_224.tgz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=mobilenet_v2 \ + --output_file=/tmp/mobilenet_v2_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ + --input_checkpoint=./mobilenet_v2.ckpt \ + --input_binary=true \ + --output_graph=./frozen_mobilenet_v2.pb \ + --output_node_names=MobilenetV2/Predictions/Reshape_1 + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/frozen_mobilenet_v2.pb \ + --output_model=./nc_mobilenet_v2.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py new file mode 100644 index 00000000000..ecfca2348cd --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py @@ -0,0 +1,511 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py new file mode 100644 index 00000000000..fd3a07937de --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py @@ -0,0 +1,142 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + return evaluate(model, dataloader, top1) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md new file mode 100644 index 00000000000..bc07e651f96 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md @@ -0,0 +1,107 @@ +Step-by-Step +============ + +This document list steps of reproducing resnet_v2_50 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The models is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model +The resnet_v2_50 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz + tar -xvf resnet_v2_50_2017_04_14.tar.gz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=resnet_v2_50 \ + --output_file=/tmp/resnet_v2_50_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/resnet_v2_50_inf_graph.pb \ + --input_checkpoint=./resnet_v2_50.ckpt \ + --input_binary=true \ + --output_graph=./frozen_resnet_v2_50.pb \ + --output_node_names=resnet_v2_50/predictions/Reshape_1 + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/frozen_resnet_v2_50.pb \ + --output_model=./nc_resnet_v2_50.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_resnet_v2_50.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_resnet_v2_50.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py new file mode 100644 index 00000000000..ecfca2348cd --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py @@ -0,0 +1,511 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py new file mode 100644 index 00000000000..bb82476fced --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py @@ -0,0 +1,143 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--diagnose', dest='diagnose', action='store_true', help='use Neural Insights to diagnose tuning and benchmark.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig() + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + return evaluate(model, dataloader, top1) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md new file mode 100644 index 00000000000..00e00c7846d --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md @@ -0,0 +1,108 @@ +Step-by-Step +============ + +This document list steps of reproducing vgg16 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The model is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model + +The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz + tar -xvf vgg_16_2016_08_28.tar.gz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=vgg_16 \ + --output_file=/tmp/vgg_16_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/vgg_16_inf_graph.pb \ + --input_checkpoint=./vgg_16.ckpt \ + --input_binary=true \ + --output_graph=./frozen_vgg16.pb \ + --output_node_names=vgg_16/fc8/squeezed + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/frozen_vgg16.pb \ + --output_model=./nc_vgg16.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_vgg16.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_vgg16.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py new file mode 100644 index 00000000000..17b4d9cec5e --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py @@ -0,0 +1,581 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class ResizeCropImagenet(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height (int): Height of the result + width (int): Width of the result + random_crop (bool, default=False): whether to random crop + resize_side (int, default=256):desired shape after resize operation + random_flip_left_right (bool, default=False): whether to random flip left and right + mean_value (list, default=[0.0,0.0,0.0]):means for each channel + scale (float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + resize_method="bilinear", + random_flip_left_right=False, + mean_value=[0.0, 0.0, 0.0], + scale=1.0, + data_format="channels_last", + subpixels="RGB", + ): + """Initialize `TensorflowResizeCropImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.random_crop = random_crop + self.random_flip_left_right = random_flip_left_right + self.resize_side = resize_side + self.resize_method = resize_method + self.data_format = data_format + self.subpixels = subpixels + + # sample is (images, labels) + def __call__(self, sample): + """Convert `TensorflowResizeCropImagenetTransform` feature.""" + image, label = sample + shape = tf.shape(input=image) + + height = ( + tf.cast(shape[0], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[1], dtype=tf.float32) + ) + width = ( + tf.cast(shape[1], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[2], dtype=tf.float32) + ) + scale = tf.cond( + pred=tf.greater(height, width), + true_fn=lambda: self.resize_side / width, + false_fn=lambda: self.resize_side / height, + ) + + scale = tf.cast(scale, dtype=tf.float32) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) + + if self.subpixels == "BGR" and self.data_format == "channels_first": + # 'RGB'->'BGR' + image = tf.cond( + tf.equal(tf.rank(image), 3), + lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), + lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1), + ) + elif self.subpixels == "BGR": + # 'RGB'->'BGR' + image = image[..., ::-1] + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) + image = tf.squeeze(image) + shape = tf.shape(input=image) + if self.random_crop: + y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32) + x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32) + else: + y0 = (shape[0] - self.height) // 2 + x0 = (shape[1] - self.width) // 2 + + image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width) + image.set_shape([self.height, self.width, 3]) + if self.random_flip_left_right: + image = tf.image.random_flip_left_right(image) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py new file mode 100644 index 00000000000..ffe960e1b1e --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py @@ -0,0 +1,146 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + ResizeCropImagenet, + LabelShift, + TFDataLoader, + TopKMetric +) + + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + if postprocess: + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + ResizeCropImagenet(height=224, width=224, mean_value=[123.68, 116.78, 103.94]), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig() + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + ResizeCropImagenet(height=224, width=224, mean_value=[123.68, 116.78, 103.94]), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + postprocess = LabelShift(label_shift=1) + return evaluate(model, dataloader, top1, postprocess) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md index 7dcf3e7a363..0d4fa041690 100644 --- a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md @@ -40,10 +40,10 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_11_0/HF- ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in ` examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/image_recognition/tensorflow_models/ + cd examples/3.x_api/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./vision_transformer/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset @@ -60,7 +60,7 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_11_0/HF- ## 1. Quantization ```shell -bash run_quant.sh --input_model= --output_model=./output --dataset_location= +bash run_quant.sh --input_model=./HF-ViT-Base16-Img224-frozen.pb --output_model=./output --dataset_location= ``` @@ -69,7 +69,7 @@ bash run_quant.sh --input_model= --outpu ### Benchmark the fp32 model ```shell -bash run_benchmark.sh --input_model= --mode=accuracy --dataset_location= --batch_size=32 +bash run_benchmark.sh --input_model=./HF-ViT-Base16-Img224-frozen.pb --mode=accuracy --dataset_location= --batch_size=32 ``` ### Benchmark the int8 model diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py index 49b4771c61a..92b2ea0fb2a 100644 --- a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py @@ -146,7 +146,6 @@ def run(self): with tf.io.gfile.GFile(args.input_graph, "rb") as f: sm.ParseFromString(f.read()) graph_def = sm.meta_graphs[0].graph_def - postprocess = ShiftRescale() q_model = quantize_model(graph_def, quant_config, calib_dataloader) q_model.save(args.output_graph) diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py new file mode 100644 index 00000000000..c52d2bd4218 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py @@ -0,0 +1,567 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Converts ImageNet data to TFRecords file format with Example protos. + +The raw ImageNet data set is expected to reside in JPEG files located in the +following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + ... + +where 'n01440764' is the unique synset label associated with +these images. + +The training data set consists of 1000 sub-directories (i.e. labels) +each containing 1200 JPEG images for a total of 1.2M JPEG images. + +The evaluation data set consists of 1000 sub-directories (i.e. labels) +each containing 50 JPEG images for a total of 50K JPEG images. + +This TensorFlow script converts the training and evaluation data into +a sharded data set consisting of 1024 and 128 TFRecord files, respectively. + + train_directory/train-00000-of-01024 + train_directory/train-00001-of-01024 + ... + train_directory/train-00127-of-01024 + +and + + validation_directory/validation-00000-of-00128 + validation_directory/validation-00001-of-00128 + ... + validation_directory/validation-00127-of-00128 + +Each validation TFRecord file contains ~390 records. Each training TFREcord +file contains ~1250 records. Each record within the TFRecord file is a +serialized Example proto. The Example proto contains the following fields: + + image/encoded: string containing JPEG encoded image in RGB colorspace + image/height: integer, image height in pixels + image/width: integer, image width in pixels + image/colorspace: string, specifying the colorspace, always 'RGB' + image/channels: integer, specifying the number of channels, always 3 + image/format: string, specifying the format, always'JPEG' + + image/filename: string containing the basename of the image file + e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' + image/class/label: integer specifying the index in a classification layer. + The label ranges from [1, 1000] where 0 is not used. + image/class/synset: string specifying the unique ID of the label, + e.g. 'n01440764' + image/class/text: string specifying the human-readable version of the label + e.g. 'red fox, Vulpes vulpes' + +Note that the length of xmin is identical to the length of xmax, ymin and ymax +for each example. + +Running this script using 16 threads may take around ~2.5 hours on a HP Z420. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datetime import datetime +import os +import random +import sys +import threading + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +tf.compat.v1.disable_eager_execution() + + +tf.compat.v1.app.flags.DEFINE_string('raw_directory', None, + 'Raw data directory') + +tf.compat.v1.app.flags.DEFINE_string('output_directory', None, + 'Output data directory') + +tf.compat.v1.app.flags.DEFINE_integer('shards', 1, + 'Number of shards in TFRecord files.') + +tf.compat.v1.app.flags.DEFINE_string('subset', 'validation', + 'Subset of imagenet, can be validation/train') + +tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1, + 'Number of threads to preprocess the images.') + +# The labels file contains a list of valid labels are held in this file. +# Assumes that the file contains entries as such: +# n01440764 +# n01443537 +# n01484850 +# where each line corresponds to a label expressed as a synset. We map +# each synset contained in the file to an integer (based on the alphabetical +# ordering). See below for details. +tf.compat.v1.app.flags.DEFINE_string('labels_file', + 'imagenet_lsvrc_2015_synsets.txt', + 'Labels file') + +# This file containing mapping from synset to human-readable label. +# Assumes each line of the file looks like: +# +# n02119247 black fox +# n02119359 silver fox +# n02119477 red fox, Vulpes fulva +# +# where each line corresponds to a unique mapping. Note that each line is +# formatted as \t. +tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file', + 'imagenet_metadata.txt', + 'ImageNet metadata file') + +FLAGS = tf.compat.v1.app.flags.FLAGS + + +def _int64_feature(value): + """Wrapper for inserting int64 features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + +def _float_feature(value): + """Wrapper for inserting float features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + +def _bytes_feature(value): + """Wrapper for inserting bytes features into Example proto.""" + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _convert_to_example(filename, image_buffer, label, synset, human, + height, width): + """Build an Example proto for an example. + + Args: + filename: string, path to an image file, e.g., '/path/to/example.JPG' + image_buffer: string, JPEG encoding of RGB image + label: integer, identifier for the ground truth for the network + synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' + human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' + height: integer, image height in pixels + width: integer, image width in pixels + Returns: + Example proto + """ + + colorspace = b'RGB' + channels = 3 + image_format = b'JPEG' + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': _int64_feature(height), + 'image/width': _int64_feature(width), + 'image/colorspace': _bytes_feature(colorspace), + 'image/channels': _int64_feature(channels), + 'image/class/label': _int64_feature(label), + 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')), + 'image/class/text': _bytes_feature(bytes(human,'utf-8')), + 'image/format': _bytes_feature(image_format), + 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')), + 'image/encoded': _bytes_feature(image_buffer)})) + return example + + +class ImageCoder(object): + """Helper class that provides TensorFlow image coding utilities.""" + + def __init__(self): + # Create a single Session to run all image coding calls. + self._sess = tf.compat.v1.Session() + + # Initializes function that converts PNG to JPEG data. + self._png_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_png(self._png_data, channels=3) + self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that converts CMYK JPEG data to RGB JPEG data. + self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_jpeg(self._cmyk_data, channels=0) + self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that decodes RGB JPEG data. + self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string) + self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) + + def png_to_jpeg(self, image_data): + return self._sess.run(self._png_to_jpeg, + feed_dict={self._png_data: image_data}) + + def cmyk_to_rgb(self, image_data): + return self._sess.run(self._cmyk_to_rgb, + feed_dict={self._cmyk_data: image_data}) + + def decode_jpeg(self, image_data): + image = self._sess.run(self._decode_jpeg, + feed_dict={self._decode_jpeg_data: image_data}) + assert len(image.shape) == 3 + assert image.shape[2] == 3 + return image + + +def _is_png(filename): + """Determine if a file contains a PNG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a PNG. + """ + # File list from: + # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU + return 'n02105855_2933.JPEG' in filename + + +def _is_cmyk(filename): + """Determine if file contains a CMYK JPEG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a JPEG encoded with CMYK color space. + """ + # File list from: + # https://github.com/cytsai/ilsvrc-cmyk-image-list + blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', + 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', + 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', + 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', + 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', + 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', + 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', + 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', + 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', + 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', + 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] + return filename.split('/')[-1] in blacklist + + +def _process_image(filename, coder): + """Process a single image file. + + Args: + filename: string, path to an image file e.g., '/path/to/example.JPG'. + coder: instance of ImageCoder to provide TensorFlow image coding utils. + Returns: + image_buffer: string, JPEG encoding of RGB image. + height: integer, image height in pixels. + width: integer, image width in pixels. + """ + # Read the image file. + image_data = tf.io.gfile.GFile(filename, 'rb').read() + + # Clean the dirty data. + if _is_png(filename): + # 1 image is a PNG. + print('Converting PNG to JPEG for %s' % filename) + image_data = coder.png_to_jpeg(image_data) + elif _is_cmyk(filename): + # 22 JPEG images are in CMYK colorspace. + print('Converting CMYK to RGB for %s' % filename) + image_data = coder.cmyk_to_rgb(image_data) + + # Decode the RGB JPEG. + image = coder.decode_jpeg(image_data) + + # Check that image converted to RGB + assert len(image.shape) == 3 + height = image.shape[0] + width = image.shape[1] + assert image.shape[2] == 3 + + return image_data, height, width + + +def _process_image_files_batch(coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards): + """Processes and saves list of images as TFRecord in 1 thread. + + Args: + coder: instance of ImageCoder to provide TensorFlow image coding utils. + thread_index: integer, unique batch to run index is within [0, len(ranges)). + ranges: list of pairs of integers specifying ranges of each batches to + analyze in parallel. + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + # Each thread produces N shards where N = int(num_shards / num_threads). + # For instance, if num_shards = 128, and the num_threads = 2, then the first + # thread would produce shards [0, 64). + num_threads = len(ranges) + assert not num_shards % num_threads + num_shards_per_batch = int(num_shards / num_threads) + + shard_ranges = np.linspace(ranges[thread_index][0], + ranges[thread_index][1], + num_shards_per_batch + 1).astype(int) + num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] + + counter = 0 + for s in xrange(num_shards_per_batch): + # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' + shard = thread_index * num_shards_per_batch + s + output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) + output_file = os.path.join(FLAGS.output_directory, output_filename) + writer = tf.io.TFRecordWriter(output_file) + + shard_counter = 0 + files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE + for i in files_in_shard: + filename = filenames[i] + label = labels[i] + synset = synsets[i] + human = humans[i] + + image_buffer, height, width = _process_image(filename, coder) + + example = _convert_to_example(filename, image_buffer, label, synset, human, height, width) + writer.write(example.SerializeToString()) + shard_counter += 1 + counter += 1 + + if not counter % 1000: + print('%s [thread %d]: Processed %d of %d images in thread batch.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + writer.close() + print('%s [thread %d]: Wrote %d images to %s' % + (datetime.now(), thread_index, shard_counter, output_file)) + sys.stdout.flush() + shard_counter = 0 + print('%s [thread %d]: Wrote %d images to %d shards.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + +def _process_image_files(name, filenames, synsets, labels, humans, num_shards): + """Process and save list of images as TFRecord of Example protos. + + Args: + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + assert len(filenames) == len(synsets) + assert len(filenames) == len(labels) + assert len(filenames) == len(humans) + + # Break all images into batches with a [ranges[i][0], ranges[i][1]]. + spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) + ranges = [] + threads = [] + for i in xrange(len(spacing) - 1): + ranges.append([spacing[i], spacing[i+1]]) + + # Launch a thread for each batch. + print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) + sys.stdout.flush() + + # Create a mechanism for monitoring when all threads are finished. + coord = tf.train.Coordinator() + + # Create a generic TensorFlow-based utility for converting all image codings. + coder = ImageCoder() + + threads = [] + for thread_index in xrange(len(ranges)): + args = (coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards) + t = threading.Thread(target=_process_image_files_batch, args=args) + t.start() + threads.append(t) + + # Wait for all the threads to terminate. + coord.join(threads) + print('%s: Finished writing all %d images in data set.' % + (datetime.now(), len(filenames))) + sys.stdout.flush() + + +def _find_image_files(data_dir, labels_file): + """Build a list of all images files and labels in the data set. + + Args: + data_dir: string, path to the root directory of images. + + Assumes that the ImageNet data set resides in JPEG files located in + the following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + + where 'n01440764' is the unique synset label associated with these images. + + labels_file: string, path to the labels file. + + The list of valid labels are held in this file. Assumes that the file + contains entries as such: + n01440764 + n01443537 + n01484850 + where each line corresponds to a label expressed as a synset. We map + each synset contained in the file to an integer (based on the alphabetical + ordering) starting with the integer 1 corresponding to the synset + contained in the first line. + + The reason we start the integer labels at 1 is to reserve label 0 as an + unused background class. + + Returns: + filenames: list of strings; each string is a path to an image file. + synsets: list of strings; each string is a unique WordNet ID. + labels: list of integer; each integer identifies the ground truth. + """ + print('Determining list of input files and labels from %s.' % data_dir) + challenge_synsets = [l.strip() for l in + tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()] + + labels = [] + filenames = [] + synsets = [] + + # Leave label index 0 empty as a background class. + label_index = 1 + + # Construct the list of JPEG files and labels. + for synset in challenge_synsets: + jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) + matching_files = tf.io.gfile.glob(jpeg_file_path) + + labels.extend([label_index] * len(matching_files)) + synsets.extend([synset] * len(matching_files)) + filenames.extend(matching_files) + + if not label_index % 100: + print('Finished finding files in %d of %d classes.' % ( + label_index, len(challenge_synsets))) + label_index += 1 + + # Shuffle the ordering of all image files in order to guarantee + # random ordering of the images with respect to label in the + # saved TFRecord files. Make the randomization repeatable. + shuffled_index = range(len(filenames)) + random.seed(12345) + + random.shuffle(list(range(len(shuffled_index)))) + + filenames = [filenames[i] for i in shuffled_index] + synsets = [synsets[i] for i in shuffled_index] + labels = [labels[i] for i in shuffled_index] + + print('Found %d JPEG files across %d labels inside %s.' % + (len(filenames), len(challenge_synsets), data_dir)) + return filenames, synsets, labels + + +def _find_human_readable_labels(synsets, synset_to_human): + """Build a list of human-readable labels. + + Args: + synsets: list of strings; each string is a unique WordNet ID. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + + Returns: + List of human-readable strings corresponding to each synset. + """ + humans = [] + for s in synsets: + assert s in synset_to_human, ('Failed to find: %s' % s) + humans.append(synset_to_human[s]) + return humans + + +def _process_dataset(name, directory, num_shards, synset_to_human): + """Process a complete data set and save it as a TFRecord. + + Args: + name: string, unique identifier specifying the data set. + directory: string, root path to the data set. + num_shards: integer number of shards for this data set. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) + humans = _find_human_readable_labels(synsets, synset_to_human) + + _process_image_files(name, filenames, synsets, labels, + humans, num_shards) + + +def _build_synset_lookup(imagenet_metadata_file): + """Build lookup for synset to human-readable label. + + Args: + imagenet_metadata_file: string, path to file containing mapping from + synset to human-readable label. + + Assumes each line of the file looks like: + + n02119247 black fox + n02119359 silver fox + n02119477 red fox, Vulpes fulva + + where each line corresponds to a unique mapping. Note that each line is + formatted as \t. + + Returns: + Dictionary of synset to human labels, such as: + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() + synset_to_human = {} + for l in lines: + if l: + parts = l.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + return synset_to_human + + +def main(unused_argv): + assert not FLAGS.shards % FLAGS.num_threads, ( + 'Please make the FLAGS.num_threads commensurate with FLAGS.shards') + + print('Saving results to %s' % FLAGS.output_directory) + + # Build a map from synset to human-readable label. + synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) + + if(FLAGS.raw_directory != None): + _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human) + +if __name__ == '__main__': + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh new file mode 100644 index 00000000000..f9baa85ab07 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download and preprocess ImageNet Challenge 2012 +# training and validation data set. +# +# The final output of this script are sharded TFRecord files containing +# serialized Example protocol buffers. See build_imagenet_data.py for +# details of how the Example protocol buffers contain the ImageNet data. +# +# The final output of this script appears as such: +# +# data_dir/train-00000-of-01024 +# data_dir/train-00001-of-01024 +# ... +# data_dir/train-00127-of-01024 +# +# and +# +# data_dir/validation-00000-of-00128 +# data_dir/validation-00001-of-00128 +# ... +# data_dir/validation-00127-of-00128 +# +# Note that this script may take several hours to run to completion. The +# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending +# on the speed of your machine. Please be patient. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# + +set -e + +if [ -z "$1" ]; then + echo "usage download_and_convert_imagenet.sh [data dir]" + exit +fi + +# Create the output and temporary directories. +DATA_DIR="${1%/}" +SCRATCH_DIR="${DATA_DIR}/raw-data/" +mkdir -p "${DATA_DIR}" +mkdir -p "${SCRATCH_DIR}" +WORK_DIR="$0.runfiles/__main__" + +# Download the ImageNet data. +LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt" +DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh" +"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" + +# Note the locations of the train and validation data. +TRAIN_DIRECTORY="${SCRATCH_DIR}train/" +VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" + +# Preprocess the validation data by moving the images into the appropriate +# sub-directory based on the label (synset) of the image. +echo "Organizing the validation data into sub-directories." +PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py" +VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt" + +"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" + +# Convert the XML files for bounding box annotations into a single CSV. +echo "Extracting bounding box information from XML." +BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py" +BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" +BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" + +"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ + | sort >"${BOUNDING_BOX_FILE}" +echo "Finished downloading and preprocessing the ImageNet data." + +# Build the TFRecords version of the ImageNet data. +BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" +OUTPUT_DIRECTORY="${DATA_DIR}" +IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt" + +"${BUILD_SCRIPT}" \ + --train_directory="${TRAIN_DIRECTORY}" \ + --validation_directory="${VALIDATION_DIRECTORY}" \ + --output_directory="${OUTPUT_DIRECTORY}" \ + --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ + --labels_file="${LABELS_FILE}" \ + --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh new file mode 100644 index 00000000000..c780e179f93 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download ImageNet Challenge 2012 training and validation data set. +# +# Downloads and decompresses raw images and bounding boxes. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# +# usage: +# ./download_imagenet.sh [dirname] +set -e + +if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then + cat < Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=./inception_v3_keras + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/keras/cv/ + # convert validation subset + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd inception_v3/quantization/ptq + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run Command + +## Quantization + ```shell + bash run_quant.sh --input_model=./inception_v3_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1 + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32 + ``` diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py new file mode 100644 index 00000000000..b8cd01593c6 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py @@ -0,0 +1,543 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py new file mode 100644 index 00000000000..5f8b08d6e11 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py @@ -0,0 +1,144 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time + +import numpy as np +import tensorflow as tf + +from neural_compressor.utils import logger +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, + LabelShift, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +height = width = 299 +eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.keras.Model): The input model will be the objection of tf.keras.Model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + latency_list = [] + metric = TopKMetric() + postprocess = LabelShift(label_shift=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + start = time.time() + predictions = model.predict_on_batch(inputs) + end = time.time() + latency_list.append(end - start) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import StaticQuantConfig + + set_random_seed(9527) + quant_config = StaticQuantConfig() + q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + logger.info("Save quantized model to {}.".format(FLAGS.output_model)) + + if FLAGS.benchmark: + from neural_compressor.tensorflow import Model + + inc_model = Model(FLAGS.input_model) + if FLAGS.mode == 'performance': + evaluate(inc_model.model) + else: + accuracy = evaluate(inc_model.model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..abf63dc93b4 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py @@ -0,0 +1,35 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +from tensorflow.keras.applications.inception_v3 import InceptionV3 +def get_inception_v3_model(saved_path): + model = InceptionV3(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_inception_v3_model(args.output_model) diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2f0697d8502 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +intel-extension-for-tensorflow[cpu] diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..43b1636c839 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..7e3ed727f71 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh b/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh new file mode 100644 index 00000000000..4aad5d69a3f --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# set -x + +OUTPUT_DIR="./data" +SUBSET="validation" +SHARDS=1 + +help() +{ + cat <<- EOF + Desc: Convert prepared raw imagnet dataset to tfrecord + -h --help help info + --output_dir Output data directory + default: './data' + --raw_dir Raw data directory + --shards Number of shards in TFRecord files. + default: '1' + --subset Subset of imagenet, can be validation/train. + default: 'validation' +EOF + exit 0 +} + +function main { + init_params "$@" + convert_dataset +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --output_dir=*) + OUTPUT_DIR=$(echo $var |cut -f2 -d=) + ;; + --raw_dir=*) + RAW_DIR=$(echo $var |cut -f2 -d=) + ;; + --shards=*) + SHARDS=$(echo $var |cut -f2 -d=) + ;; + --subset=*) + SUBSET=$(echo $var |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +# convert dataset +function convert_dataset { + if [ ! -d ${OUTPUT_DIR} ]; then + mkdir ${OUTPUT_DIR} + fi + python imagenet_prepare/build_imagenet_data.py \ + --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \ + --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \ + --output_directory ${OUTPUT_DIR} \ + --subset ${SUBSET} \ + --raw_directory ${RAW_DIR} \ + --shards ${SHARDS} +} + +main "$@" + diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md new file mode 100644 index 00000000000..a276ef7cd0d --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md @@ -0,0 +1,65 @@ +Step-by-Step +============ + +This document is used to enable Tensorflow Keras models using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install Requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=./resnetv2_50_keras + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/keras/cv/ + # convert validation subset + bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnetv2_50/quantization/ptq + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run Command + +## Quantization + ```shell + bash run_quant.sh --input_model=./resnetv2_50_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 + ``` + diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py new file mode 100644 index 00000000000..b8cd01593c6 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py @@ -0,0 +1,543 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py new file mode 100644 index 00000000000..7fc6a2cdf10 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py @@ -0,0 +1,143 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time + +import numpy as np +import tensorflow as tf + +from neural_compressor.utils import logger +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, + LabelShift +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +height = width = 224 +eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.keras.Model): The input model will be the objection of tf.keras.Model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + latency_list = [] + metric = TopKMetric() + postprocess = LabelShift(label_shift=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + start = time.time() + predictions = model.predict_on_batch(inputs) + end = time.time() + latency_list.append(end - start) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import StaticQuantConfig + + set_random_seed(9527) + quant_config = StaticQuantConfig() + q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + logger.info("Save quantized model to {}.".format(FLAGS.output_model)) + + if FLAGS.benchmark: + from neural_compressor.tensorflow import Model + + inc_model = Model(FLAGS.input_model) + if FLAGS.mode == 'performance': + evaluate(inc_model.model) + else: + accuracy = evaluate(inc_model.model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..f8cd505f965 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py @@ -0,0 +1,35 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import tensorflow as tf +def get_resnet50_v2_model(saved_path): + model = tf.keras.applications.ResNet50V2(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_resnet50_v2_model(args.output_model) diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..8b7b47da969 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow>=2.11.1 +intel-extension-for-tensorflow[cpu] diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..d464b019f8e --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh @@ -0,0 +1,50 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..7e3ed727f71 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md index f82b696bd07..41a673fc834 100644 --- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md @@ -74,7 +74,7 @@ bash prepare_dataset.sh --output_dir=./data ### Convert the dataset to TF Record format After the dataset is downloaded by either of ways above, the dataset should be converted to files of TF Record format. ```shell -python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=./data/eval.tf_record +python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=data/eval.tf_record ``` # Run Command @@ -82,11 +82,11 @@ python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v ## Quantization ```shell - bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=/path/to/evaluation/dataset + bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=data ``` ## Benchmark ```shell - bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=64 - bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=64 + bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=data --batch_size=64 + bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=data --batch_size=64 ``` \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py index b5403618f40..7f99b3507fc 100644 --- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py @@ -134,8 +134,5 @@ def eval(model): q_model = quantize_model(model, quant_config, dataloader) q_model.save(FLAGS.output_model) - dataset = ModelZooBertDataset(root=data_path, label_file=label_path) - dataloader = ModelZooBertDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) - if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md new file mode 100644 index 00000000000..544e954371e --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md @@ -0,0 +1,130 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of Transformer-LT. This example can run on Intel CPUs and GPUs. + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### 2. Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### 3. Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### 4. Prepare Dataset & Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_2_0/transformer-lt-official-fp32-inference.tar.gz +tar -zxvf transformer-lt-official-fp32-inference.tar.gz +cd transformer-lt-official-fp32-inference +tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz +``` + +Dataset is in data folder, pretrained model is in graph folder. + +#### Automatic dataset & model download +Run the `prepare_dataset_model.sh` script located in `examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq`. + +```shell +cd examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq +bash prepare_dataset_model.sh +``` + +## Run Command +### Quantization + +```shell +bash run_quant.sh --input_model=./model/fp32_graphdef.pb --dataset_location=./data --output_model=./model/int8_graphdef.pb +``` +### Benchmark +```shell +bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=performance + +bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=accuracy --batch_size=1 +``` + +Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow. +========================= + +This is a tutorial of how to enable transformer-lt model with Intel® Neural Compressor. + +### q_dataloader Part Adaption +Below dataset class uses getitem to provide the model with input. + +```python +class Dataset(object): + def __init__(self, *args): + # initialize dataset related info here + ... + + def __getitem__(self, index): + data = self.batch[index] + label = self.ref_lines[index] + return data[0], label + + def __len__(self): + return len(self.batch) +``` + +### Evaluation Part Adaption +We evaluate the model with BLEU score, its source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py + +Here we set the input tensor and output tensors name into *inputs* and *outputs* args. +In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. + +### Code update +After prepare step is done, we add tune code to generate quantized model. + +#### Tune +```python + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn) + + quant_config = StaticQuantConfig() + model = Model(graph) + model.input_tensor_names = ['input_tensor'] + model.output_tensor_names = ['model/Transformer/strided_slice_19'] + q_model = quantize_model(model, quant_config, calib_dataloader) + try: + q_model.save(FLAGS.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) +``` +#### Benchmark +```python + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + acc = eval_func(graph) + if FLAGS.mode == 'accuracy': + print('Accuracy is {:.3f}'.format(acc)) +``` diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py new file mode 100644 index 00000000000..58a93090e7a --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py @@ -0,0 +1,258 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +import re +import six +import sys +import time +import numpy as np +import unicodedata +import pandas as pd +from absl import app +import tensorflow as tf +from argparse import ArgumentParser + +from utils import metrics +from utils import tokenizer +from utils.tokenizer import Subtokenizer +from neural_compressor.tensorflow.utils import BaseDataLoader + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +flags.DEFINE_integer("batch_size", 64, + "run batch size") + +flags.DEFINE_string("input_graph", None, + "The path of input model file.") + +flags.DEFINE_string("inputs_file", None, + "File saved to an output file.") + +flags.DEFINE_string("reference_file", None, + "File containing reference translation.") + +flags.DEFINE_string("vocab_file", None, + "Path to subtoken vocabulary file.") + +flags.DEFINE_string("output_model", None, + "The output model of the quantized model.") + +flags.DEFINE_bool('tune', False, + 'whether to tune the model') + +flags.DEFINE_bool('benchmark', False, + 'whether to benchmark the model') + +flags.DEFINE_string("mode", 'performance', + "One of three options: 'performance'/'accuracy'.") + +flags.DEFINE_integer("iters", 100, + "The iteration used for benchmark.") + +class UnicodeRegex(object): + def __init__(self): + punctuation = self.property_chars("P") + self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])") + self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])") + self.symbol_re = re.compile("([" + self.property_chars("S") + "])") + + def property_chars(self, prefix): + return "".join(six.unichr(x) for x in range(sys.maxunicode) + if unicodedata.category(six.unichr(x)).startswith(prefix)) + +uregex = UnicodeRegex() + +def bleu_tokenize(string): + string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string) + string = uregex.punct_nondigit_re.sub(r" \1 \2", string) + string = uregex.symbol_re.sub(r" \1 ", string) + return string.split() + +class bleu(object): + def __init__(self): + self.translations = [] + self.labels = [] + + def reset(self): + self.translations = [] + self.labels = [] + + def update(self, pred, label): + if len(label) != len(pred): + raise ValueError("Reference and translation files have different number " + "of lines. If training only a few steps (100-200), the " + "translation may be empty.") + label = [x.lower() for x in label] + pred = [x.lower() for x in pred] + label = [bleu_tokenize(x) for x in label] + pred = [bleu_tokenize(x) for x in pred] + self.labels.extend(label) + self.translations.extend(pred) + + def result(self): + return metrics.compute_bleu(self.labels, self.translations) * 100 + +def collate_fn(batch): + """Puts each data field into a pd frame with outer dimension batch size""" + elem = batch[0] + if isinstance(elem, tuple): + batch = zip(*batch) + return [collate_fn(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + return [list(elem) for elem in batch] + elif isinstance(elem, str): + return batch + else: + return pd.DataFrame(batch).fillna(0).values.astype(np.int32) + +def load_graph(file_name): + tf.compat.v1.logging.info('Loading graph from: ' + file_name) + with tf.io.gfile.GFile(file_name, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name='') + return graph + +def eval_func(infer_graph, iteration=-1): + if isinstance(infer_graph, tf.compat.v1.GraphDef): + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(infer_graph, name='') + infer_graph = graph + + subtokenizer = Subtokenizer(FLAGS.vocab_file) + input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') + output_tensor = infer_graph.get_tensor_by_name(\ + 'model/Transformer/strided_slice_19:0') + + ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + dataloader = BaseDataLoader(dataset=ds, batch_size=FLAGS.batch_size, collate_fn=collate_fn) + config = tf.compat.v1.ConfigProto() + config.use_per_session_threads = 1 + config.inter_op_parallelism_threads = 1 + sess = tf.compat.v1.Session(graph=infer_graph, config=config) + iteration=-1 + time_list = [] + bleu_eval = bleu() + predictions = [] + labels = [] + warmup = 10 + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + assert iteration >= warmup, 'iteration must be larger than warmup' + + for idx, (input_data, label) in enumerate(dataloader): + if idx < iteration or iteration == -1: + time_start = time.time() + out = sess.run([output_tensor], {input_tensor: input_data}) + duration = time.time() - time_start + time_list.append(duration) + predictions.append(out) + labels.extend(label) + else: + break + + latency = np.array(time_list[warmup: ]).mean() / FLAGS.batch_size + if FLAGS.benchmark and FLAGS.mode == 'performance': + print('Batch size = {}'.format(FLAGS.batch_size)) + print('Latency: {:.3f} ms'.format(latency * 1000)) + print('Throughput: {:.3f} items/sec'.format(1./ latency)) + + # only calculate accuracy when running out all predictions + if iteration == -1: + decode = [] + for i,tr in enumerate(predictions): + for j,itr in enumerate(tr): + for k, otr in enumerate(itr): + try: + index = list(otr).index(tokenizer.EOS_ID) + decode.append(subtokenizer.decode(otr[:index])) + except: + decode.append(subtokenizer.decode(otr)) + bleu_eval.update(decode, labels) + return bleu_eval.result() + +class Dataset(object): + def __init__(self, inputs_file, reference_file, vocab_file): + with tf.io.gfile.GFile(inputs_file) as f: + records = f.read().split("\n") + inputs = [record.strip() for record in records] + if not inputs[-1]: + inputs.pop() + + self.ref_lines = tokenizer.native_to_unicode( + tf.io.gfile.GFile(reference_file).read()).strip().splitlines() + + subtokenizer = Subtokenizer(vocab_file) + self.batch = [] + token_lens=[] + for i, line in enumerate(inputs): + enc = subtokenizer.encode(line, add_eos=True) + token_lens.append((i, len(enc))) + + sorted_by_token_input_lens = sorted(token_lens, key=lambda x: x[1], reverse=True) + + sorted_inputs = [None] * len(sorted_by_token_input_lens) + sorted_keys = [0] * len(sorted_by_token_input_lens) + + lines = [] + for i, (index, _) in enumerate(sorted_by_token_input_lens): + sorted_inputs[i] = inputs[index] + sorted_keys[index] = i + enc=subtokenizer.encode(sorted_inputs[i], add_eos=True) + lines.append([enc]) + for i in sorted_keys: + self.batch.append(lines[i]) + + def __getitem__(self, index): + data = self.batch[index] + label = self.ref_lines[index] + return data[0], label + + def __len__(self): + return len(self.batch) + +def main(_): + graph = load_graph(FLAGS.input_graph) + if FLAGS.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn) + + quant_config = StaticQuantConfig() + model = Model(graph) + model.input_tensor_names = ['input_tensor'] + model.output_tensor_names = ['model/Transformer/strided_slice_19'] + q_model = quantize_model(model, quant_config, calib_dataloader) + try: + q_model.save(FLAGS.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) + + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + acc = eval_func(graph) + if FLAGS.mode == 'accuracy': + print('Accuracy is {:.3f}'.format(acc)) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh new file mode 100644 index 00000000000..3d47dbad80c --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# set -x + +DATA_DIR="../data" +MODEL_DIR="../model" + +help() +{ + cat <<- EOF + Desc: Prepare bert dataset + -h --help help info + --data_dir Output data directory + default: './data' + --model_dir Output model directory + default: './model' +EOF + exit 0 +} + +function main { + init_params "$@" + prepare +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --data_dir=*) + DATA_DIR=$(echo $var |cut -f2 -d=) + ;; + --model_dir=*) + MODEL_DIR=$(echo $var |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +# prepare data and model +function prepare { + if [ ! -d ${DATA_DIR} ]; then + echo '${DATA_DIR} already exists, please check...' + fi + if [ ! -d ${MODEL_DIR} ]; then + echo '${MODEL_DIR} already exists, please check...' + fi + wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_2_0/transformer-lt-official-fp32-inference.tar.gz + tar -zxvf transformer-lt-official-fp32-inference.tar.gz + cd transformer-lt-official-fp32-inference + tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz + mv transformer_lt_official_fp32_pretrained_model/data ${DATA_DIR} + mv transformer_lt_official_fp32_pretrained_model/graph ${MODEL_DIR} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..87bc4c7d5c1 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_benchmark + +} + +# init params +function init_params { + iters=100 + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo ${var} |cut -f2 -d=) + ;; + esac + done + +} + +function define_mode { + + if [[ ${mode} == "accuracy" ]]; then + mode="accuracy" + elif [[ ${mode} == "performance" ]]; then + mode="performance" + else + echo "Error: No such mode: ${mode}" + exit 1 + fi +} + +# run_benchmark +function run_benchmark { + python main.py \ + --input_graph=${input_model} \ + --inputs_file=${dataset_location}/newstest2014.en \ + --reference_file=${dataset_location}/newstest2014.de \ + --vocab_file=${dataset_location}/vocab.txt \ + --benchmark \ + --mode=${mode} \ + --iters=${iters} \ + --batch_size=${batch_size} +} + +main "$@" + diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..2f2075cf346 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_graph=${input_model} \ + --inputs_file=${dataset_location}/newstest2014.en \ + --reference_file=${dataset_location}/newstest2014.de \ + --vocab_file=${dataset_location}/vocab.txt \ + --output_model=${output_model} \ + --tune +} + +main "$@" + diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py new file mode 100644 index 00000000000..3e41f985c63 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py @@ -0,0 +1,490 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functions for calculating loss, accuracy, and other model metrics. + +Metrics: + - Padded loss, accuracy, and negative log perplexity. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/metrics.py + - BLEU approximation. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py + - ROUGE score. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/rouge.py +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import math + +import numpy as np +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + + +def _pad_tensors_to_same_length(x, y): + """Pad x and y so that the results have the same length (second dimension).""" + with tf.name_scope("pad_to_same_length"): + x_length = tf.shape(x)[1] + y_length = tf.shape(y)[1] + + max_length = tf.maximum(x_length, y_length) + + x = tf.pad(x, [[0, 0], [0, max_length - x_length], [0, 0]]) + y = tf.pad(y, [[0, 0], [0, max_length - y_length]]) + return x, y + + +def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size): + """Calculate cross entropy loss while ignoring padding. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch_size, length_labels] + smoothing: Label smoothing constant, used to determine the on and off values + vocab_size: int size of the vocabulary + Returns: + Returns the cross entropy loss and weight tensors: float32 tensors with + shape [batch_size, max(length_logits, length_labels)] + """ + with tf.name_scope("loss", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + + # Calculate smoothing cross entropy + with tf.name_scope("smoothing_cross_entropy", values=[logits, labels]): + confidence = 1.0 - smoothing + low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1) + soft_targets = tf.one_hot( + tf.cast(labels, tf.int32), + depth=vocab_size, + on_value=confidence, + off_value=low_confidence) + xentropy = tf.nn.softmax_cross_entropy_with_logits_v2( + logits=logits, labels=soft_targets) + + # Calculate the best (lowest) possible value of cross entropy, and + # subtract from the cross entropy loss. + normalizing_constant = -( + confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * + low_confidence * tf.log(low_confidence + 1e-20)) + xentropy -= normalizing_constant + + weights = tf.to_float(tf.not_equal(labels, 0)) + return xentropy * weights, weights + + +def _convert_to_eval_metric(metric_fn): + """Wrap a metric fn that returns scores and weights as an eval metric fn. + + The input metric_fn returns values for the current batch. The wrapper + aggregates the return values collected over all of the batches evaluated. + + Args: + metric_fn: function that returns scores and weights for the current batch's + logits and predicted labels. + + Returns: + function that aggregates the scores and weights from metric_fn. + """ + def problem_metric_fn(*args): + """Returns an aggregation of the metric_fn's returned values.""" + (scores, weights) = metric_fn(*args) + + # The tf.metrics.mean function assures correct aggregation. + return tf.metrics.mean(scores, weights) + return problem_metric_fn + + +def get_eval_metrics(logits, labels, params): + """Return dictionary of model evaluation metrics.""" + metrics = { + "accuracy": _convert_to_eval_metric(padded_accuracy)(logits, labels), + "accuracy_top5": _convert_to_eval_metric(padded_accuracy_top5)( + logits, labels), + "accuracy_per_sequence": _convert_to_eval_metric( + padded_sequence_accuracy)(logits, labels), + "neg_log_perplexity": _convert_to_eval_metric(padded_neg_log_perplexity)( + logits, labels, params["vocab_size"]), + } + + if not params["use_tpu"]: + # TPU does not support tf.py_func + metrics.update({ + "approx_bleu_score": _convert_to_eval_metric( + bleu_score)(logits, labels), + "rouge_2_fscore": _convert_to_eval_metric( + rouge_2_fscore)(logits, labels), + "rouge_L_fscore": _convert_to_eval_metric( + rouge_l_fscore)(logits, labels), + }) + + # Prefix each of the metric names with "metrics/". This allows the metric + # graphs to display under the "metrics" category in TensorBoard. + metrics = {"metrics/%s" % k: v for k, v in six.iteritems(metrics)} + return metrics + + +def padded_accuracy(logits, labels): + """Percentage of times that predictions matches labels on non-0s.""" + with tf.variable_scope("padded_accuracy", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + outputs = tf.to_int32(tf.argmax(logits, axis=-1)) + padded_labels = tf.to_int32(labels) + return tf.to_float(tf.equal(outputs, padded_labels)), weights + + +def padded_accuracy_topk(logits, labels, k): + """Percentage of times that top-k predictions matches labels on non-0s.""" + with tf.variable_scope("padded_accuracy_topk", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + effective_k = tf.minimum(k, tf.shape(logits)[-1]) + _, outputs = tf.nn.top_k(logits, k=effective_k) + outputs = tf.to_int32(outputs) + padded_labels = tf.to_int32(labels) + padded_labels = tf.expand_dims(padded_labels, axis=-1) + padded_labels += tf.zeros_like(outputs) # Pad to same shape. + same = tf.to_float(tf.equal(outputs, padded_labels)) + same_topk = tf.reduce_sum(same, axis=-1) + return same_topk, weights + + +def padded_accuracy_top5(logits, labels): + return padded_accuracy_topk(logits, labels, 5) + + +def padded_sequence_accuracy(logits, labels): + """Percentage of times that predictions matches labels everywhere (non-0).""" + with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + outputs = tf.to_int32(tf.argmax(logits, axis=-1)) + padded_labels = tf.to_int32(labels) + not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights + axis = list(range(1, len(outputs.get_shape()))) + correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) + return correct_seq, tf.constant(1.0) + + +def padded_neg_log_perplexity(logits, labels, vocab_size): + """Average log-perplexity excluding padding 0s. No smoothing.""" + num, den = padded_cross_entropy_loss(logits, labels, 0, vocab_size) + return -num, den + + +def bleu_score(logits, labels): + """Approximate BLEU score computation between labels and predictions. + + An approximate BLEU scoring method since we do not glue word pieces or + decode the ids and tokenize the output. By default, we use ngram order of 4 + and use brevity penalty. Also, this does not have beam search. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch-size, length_labels] + + Returns: + bleu: int, approx bleu score + """ + predictions = tf.to_int32(tf.argmax(logits, axis=-1)) + # TODO: Look into removing use of py_func + bleu = tf.py_func(compute_bleu, (labels, predictions), tf.float32) + return bleu, tf.constant(1.0) + + +def _get_ngrams_with_counter(segment, max_order): + """Extracts all n-grams up to a given maximum order from an input segment. + + Args: + segment: text segment from which n-grams will be extracted. + max_order: maximum length in tokens of the n-grams returned by this + methods. + + Returns: + The Counter containing all n-grams upto max_order in segment + with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in xrange(1, max_order + 1): + for i in xrange(0, len(segment) - order + 1): + ngram = tuple(segment[i:i + order]) + ngram_counts[ngram] += 1 + return ngram_counts + + +def compute_bleu(reference_corpus, translation_corpus, max_order=4, + use_bp=True): + """Computes BLEU score of translated segments against one or more references. + + Args: + reference_corpus: list of references for each translation. Each + reference should be tokenized into a list of tokens. + translation_corpus: list of translations to score. Each translation + should be tokenized into a list of tokens. + max_order: Maximum n-gram order to use when computing BLEU score. + use_bp: boolean, whether to apply brevity penalty. + + Returns: + BLEU score. + """ + reference_length = 0 + translation_length = 0 + bp = 1.0 + geo_mean = 0 + + matches_by_order = [0] * max_order + possible_matches_by_order = [0] * max_order + precisions = [] + + for (references, translations) in zip(reference_corpus, translation_corpus): + reference_length += len(references) + translation_length += len(translations) + ref_ngram_counts = _get_ngrams_with_counter(references, max_order) + translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) + + overlap = dict((ngram, + min(count, translation_ngram_counts[ngram])) + for ngram, count in ref_ngram_counts.items()) + + for ngram in overlap: + matches_by_order[len(ngram) - 1] += overlap[ngram] + for ngram in translation_ngram_counts: + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ + ngram] + + precisions = [0] * max_order + smooth = 1.0 + + for i in xrange(0, max_order): + if possible_matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ + i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if max(precisions) > 0: + p_log_sum = sum(math.log(p) for p in precisions if p) + geo_mean = math.exp(p_log_sum / max_order) + + if use_bp: + ratio = translation_length / reference_length + bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bleu = geo_mean * bp + return np.float32(bleu) + + +def rouge_2_fscore(logits, labels): + """ROUGE-2 F1 score computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + logits: tensor, model predictions + labels: tensor, gold output. + + Returns: + rouge2_fscore: approx rouge-2 f1 score. + """ + predictions = tf.to_int32(tf.argmax(logits, axis=-1)) + # TODO: Look into removing use of py_func + rouge_2_f_score = tf.py_func(rouge_n, (predictions, labels), tf.float32) + return rouge_2_f_score, tf.constant(1.0) + + +def _get_ngrams(n, text): + """Calculates n-grams. + + Args: + n: which n-grams to calculate + text: An array of tokens + + Returns: + A set of n-grams + """ + ngram_set = set() + text_length = len(text) + max_index_ngram_start = text_length - n + for i in range(max_index_ngram_start + 1): + ngram_set.add(tuple(text[i:i + n])) + return ngram_set + + +def rouge_n(eval_sentences, ref_sentences, n=2): + """Computes ROUGE-N f1 score of two text collections of sentences. + + Source: https://www.microsoft.com/en-us/research/publication/ + rouge-a-package-for-automatic-evaluation-of-summaries/ + + Args: + eval_sentences: Predicted sentences. + ref_sentences: Sentences from the reference set + n: Size of ngram. Defaults to 2. + + Returns: + f1 score for ROUGE-N + """ + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + eval_ngrams = _get_ngrams(n, eval_sentence) + ref_ngrams = _get_ngrams(n, ref_sentence) + ref_count = len(ref_ngrams) + eval_count = len(eval_ngrams) + + # Count the overlapping ngrams between evaluated and reference + overlapping_ngrams = eval_ngrams.intersection(ref_ngrams) + overlapping_count = len(overlapping_ngrams) + + # Handle edge case. This isn't mathematically correct, but it's good enough + if eval_count == 0: + precision = 0.0 + else: + precision = float(overlapping_count) / eval_count + if ref_count == 0: + recall = 0.0 + else: + recall = float(overlapping_count) / ref_count + f1_scores.append(2.0 * ((precision * recall) / (precision + recall + 1e-8))) + + # return overlapping_count / reference_count + return np.mean(f1_scores, dtype=np.float32) + + +def rouge_l_fscore(predictions, labels): + """ROUGE scores computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + predictions: tensor, model predictions + labels: tensor, gold output. + + Returns: + rouge_l_fscore: approx rouge-l f1 score. + """ + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels), + tf.float32) + return rouge_l_f_score, tf.constant(1.0) + + +def rouge_l_sentence_level(eval_sentences, ref_sentences): + """Computes ROUGE-L (sentence level) of two collections of sentences. + + Source: https://www.microsoft.com/en-us/research/publication/ + rouge-a-package-for-automatic-evaluation-of-summaries/ + + Calculated according to: + R_lcs = LCS(X,Y)/m + P_lcs = LCS(X,Y)/n + F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) + + where: + X = reference summary + Y = Candidate summary + m = length of reference summary + n = length of candidate summary + + Args: + eval_sentences: The sentences that have been picked by the summarizer + ref_sentences: The sentences from the reference set + + Returns: + A float: F_lcs + """ + + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + m = float(len(ref_sentence)) + n = float(len(eval_sentence)) + lcs = _len_lcs(eval_sentence, ref_sentence) + f1_scores.append(_f_lcs(lcs, m, n)) + return np.mean(f1_scores, dtype=np.float32) + + +def _len_lcs(x, y): + """Returns the length of the Longest Common Subsequence between two seqs. + + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: sequence of words + y: sequence of words + + Returns + integer: Length of LCS between x and y + """ + table = _lcs(x, y) + n, m = len(x), len(y) + return table[n, m] + + +def _lcs(x, y): + """Computes the length of the LCS between two seqs. + + The implementation below uses a DP programming algorithm and runs + in O(nm) time where n = len(x) and m = len(y). + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: collection of words + y: collection of words + + Returns: + Table of dictionary of coord and len lcs + """ + n, m = len(x), len(y) + table = dict() + for i in range(n + 1): + for j in range(m + 1): + if i == 0 or j == 0: + table[i, j] = 0 + elif x[i - 1] == y[j - 1]: + table[i, j] = table[i - 1, j - 1] + 1 + else: + table[i, j] = max(table[i - 1, j], table[i, j - 1]) + return table + + +def _f_lcs(llcs, m, n): + """Computes the LCS-based F-measure score. + + Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Args: + llcs: Length of LCS + m: number of words in reference summary + n: number of words in candidate summary + + Returns: + Float. LCS-based F-measure score + """ + r_lcs = llcs / m + p_lcs = llcs / n + beta = p_lcs / (r_lcs + 1e-12) + num = (1 + (beta ** 2)) * r_lcs * p_lcs + denom = r_lcs + ((beta ** 2) * p_lcs) + f_lcs = num / (denom + 1e-12) + return f_lcs diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py new file mode 100644 index 00000000000..33f144b23fd --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py @@ -0,0 +1,620 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines Subtokenizer class to encode and decode strings.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import sys +import unicodedata + +import numpy as np +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + +PAD = "" +PAD_ID = 0 +EOS = "" +EOS_ID = 1 +RESERVED_TOKENS = [PAD, EOS] + +# Set of characters that will be used in the function _escape_token() (see func +# docstring for more details). +# This set is added to the alphabet list to ensure that all escaped tokens can +# be encoded. +_ESCAPE_CHARS = set(u"\\_u;0123456789") +# Regex for the function _unescape_token(), the inverse of _escape_token(). +# This is used to find "\u", "\\", and "\###;" substrings in the token. +_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);") + +_UNDEFINED_UNICODE = u"\u3013" + +# Set contains all letter and number characters. +_ALPHANUMERIC_CHAR_SET = set( + six.unichr(i) for i in xrange(sys.maxunicode) + if (unicodedata.category(six.unichr(i)).startswith("L") or + unicodedata.category(six.unichr(i)).startswith("N"))) + +# min_count is the minimum number of times a subtoken must appear in the data +# before before it is added to the vocabulary. The value is found using binary +# search to obtain the target vocabulary size. +_MIN_MIN_COUNT = 1 # min value to use when binary searching for min_count +_MAX_MIN_COUNT = 1000 # max value to use when binary searching for min_count + + +class Subtokenizer(object): + """Encodes and decodes strings to/from integer IDs.""" + + def __init__(self, vocab_file, reserved_tokens=None): + """Initializes class, creating a vocab file if data_files is provided.""" + tf.compat.v1.logging.info("Initializing Subtokenizer from file %s." % + vocab_file) + + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + self.subtoken_list = _load_vocab_file(vocab_file, reserved_tokens) + self.alphabet = _generate_alphabet_dict(self.subtoken_list) + self.subtoken_to_id_dict = _list_to_index_dict(self.subtoken_list) + + self.max_subtoken_length = 0 + for subtoken in self.subtoken_list: + self.max_subtoken_length = max(self.max_subtoken_length, len(subtoken)) + + # Create cache to speed up subtokenization + self._cache_size = 2 ** 20 + self._cache = [(None, None)] * self._cache_size + + @staticmethod + def init_from_files( + vocab_file, files, target_vocab_size, threshold, min_count=None, + file_byte_limit=1e6, reserved_tokens=None, correct_strip=True): + """Create subtoken vocabulary based on files, and save vocab to file. + + Args: + vocab_file: String name of vocab file to store subtoken vocabulary. + files: List of file paths that will be used to generate vocabulary. + target_vocab_size: target vocabulary size to generate. + threshold: int threshold of vocabulary size to accept. + min_count: int minimum count to use for generating the vocabulary. The min + count is the minimum number of times a subtoken should appear in the + files before it is added to the vocabulary. If set to none, this value + is found using binary search. + file_byte_limit: (Default 1e6) Maximum number of bytes of sample text that + will be drawn from the files. + reserved_tokens: List of string tokens that are guaranteed to be at the + beginning of the subtoken vocabulary list. + correct_strip: Whether to convert text to unicode before strip. + + Returns: + Subtokenizer object + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + if tf.io.gfile.exists(vocab_file): + tf.compat.v1.logging.info("Vocab file already exists (%s)" % vocab_file) + else: + tf.compat.v1.logging.info("Begin steps to create subtoken vocabulary...") + token_counts = _count_tokens(files, file_byte_limit, correct_strip) + alphabet = _generate_alphabet_dict(token_counts) + subtoken_list = _generate_subtokens_with_target_vocab_size( + token_counts, alphabet, target_vocab_size, threshold, min_count, + reserved_tokens) + tf.compat.v1.logging.info("Generated vocabulary with %d subtokens." % + len(subtoken_list)) + _save_vocab_file(vocab_file, subtoken_list) + return Subtokenizer(vocab_file) + + def encode(self, raw_string, add_eos=False): + """Encodes a string into a list of int subtoken ids.""" + ret = [] + tokens = _split_string_to_tokens(native_to_unicode(raw_string)) + for token in tokens: + ret.extend(self._token_to_subtoken_ids(token)) + if add_eos: + ret.append(EOS_ID) + return ret + + def _token_to_subtoken_ids(self, token): + """Encode a single token into a list of subtoken ids.""" + cache_location = hash(token) % self._cache_size + cache_key, cache_value = self._cache[cache_location] + if cache_key == token: + return cache_value + + ret = _split_token_to_subtokens( + _escape_token(token, self.alphabet), self.subtoken_to_id_dict, + self.max_subtoken_length) + ret = [self.subtoken_to_id_dict[subtoken_id] for subtoken_id in ret] + + self._cache[cache_location] = (token, ret) + return ret + + def decode(self, subtokens): + """Converts list of int subtokens ids into a string.""" + if isinstance(subtokens, np.ndarray): + # Note that list(subtokens) converts subtokens to a python list, but the + # items remain as np.int32. This converts both the array and its items. + subtokens = subtokens.tolist() + + if not subtokens: + return "" + + assert isinstance(subtokens, list) and isinstance(subtokens[0], int), ( + "Subtokens argument passed into decode() must be a list of integers.") + + return _unicode_to_native( + _join_tokens_to_string(self._subtoken_ids_to_tokens(subtokens))) + + def _subtoken_ids_to_tokens(self, subtokens): + """Convert list of int subtoken ids to a list of string tokens.""" + escaped_tokens = "".join([ + self.subtoken_list[s] for s in subtokens + if s < len(self.subtoken_list)]) + escaped_tokens = escaped_tokens.split("_") + + # All tokens in the vocabulary list have been escaped (see _escape_token()) + # so each token must be unescaped when decoding. + ret = [] + for token in escaped_tokens: + if token: + ret.append(_unescape_token(token)) + return ret + + +def _save_vocab_file(vocab_file, subtoken_list): + """Save subtokens to file.""" + with tf.io.gfile.GFile(vocab_file, mode="w") as f: + for subtoken in subtoken_list: + f.write("'%s'\n" % _unicode_to_native(subtoken)) + + +def _load_vocab_file(vocab_file, reserved_tokens=None): + """Load vocabulary while ensuring reserved tokens are at the top.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + subtoken_list = [] + with tf.io.gfile.GFile(vocab_file, mode="r") as f: + for line in f: + subtoken = native_to_unicode(line.strip()) + subtoken = subtoken[1:-1] # Remove surrounding single-quotes + if subtoken in reserved_tokens: + continue + subtoken_list.append(native_to_unicode(subtoken)) + return reserved_tokens + subtoken_list + + +def native_to_unicode(s): + """Convert string to unicode (required in Python 2).""" + try: # Python 2 + return s if isinstance(s, unicode) else s.decode("utf-8") + except NameError: # Python 3 + return s + + +def _unicode_to_native(s): + """Convert string from unicode to native format (required in Python 2).""" + try: # Python 2 + return s.encode("utf-8") if isinstance(s, unicode) else s + except NameError: # Python 3 + return s + + +def _split_string_to_tokens(text): + """Splits text to a list of string tokens.""" + if not text: + return [] + ret = [] + token_start = 0 + # Classify each character in the input string + is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text] + for pos in xrange(1, len(text)): + if is_alnum[pos] != is_alnum[pos - 1]: + token = text[token_start:pos] + if token != u" " or token_start == 0: + ret.append(token) + token_start = pos + final_token = text[token_start:] + ret.append(final_token) + return ret + + +def _join_tokens_to_string(tokens): + """Join a list of string tokens into a single string.""" + token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens] + ret = [] + for i, token in enumerate(tokens): + if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: + ret.append(u" ") + ret.append(token) + return "".join(ret) + + +def _escape_token(token, alphabet): + r"""Replace characters that aren't in the alphabet and append "_" to token. + + Apply three transformations to the token: + 1. Replace underline character "_" with "\u", and backslash "\" with "\\". + 2. Replace characters outside of the alphabet with "\###;", where ### is the + character's Unicode code point. + 3. Appends "_" to mark the end of a token. + + Args: + token: unicode string to be escaped + alphabet: list of all known characters + + Returns: + escaped string + """ + token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + ret = [c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) for c in token] + return u"".join(ret) + "_" + + +def _unescape_token(token): + r"""Replaces escaped characters in the token with their unescaped versions. + + Applies inverse transformations as _escape_token(): + 1. Replace "\u" with "_", and "\\" with "\". + 2. Replace "\###;" with the unicode character the ### refers to. + + Args: + token: escaped string + + Returns: + unescaped string + """ + + def match(m): + r"""Returns replacement string for matched object. + + Matched objects contain one of the strings that matches the regex pattern: + r"\\u|\\\\|\\([0-9]+);" + The strings can be '\u', '\\', or '\###;' (### is any digit number). + + m.group(0) refers to the entire matched string ('\u', '\\', or '\###;'). + m.group(1) refers to the first parenthesized subgroup ('###'). + + m.group(0) exists for all match objects, while m.group(1) exists only for + the string '\###;'. + + This function looks to see if m.group(1) exists. If it doesn't, then the + matched string must be '\u' or '\\' . In this case, the corresponding + replacement ('_' and '\') are returned. Note that in python, a single + backslash is written as '\\', and double backslash as '\\\\'. + + If m.group(1) exists, then use the integer in m.group(1) to return a + unicode character. + + Args: + m: match object + + Returns: + String to replace matched object with. + """ + # Check if the matched strings are '\u' or '\\'. + if m.group(1) is None: + return u"_" if m.group(0) == u"\\u" else u"\\" + + # If m.group(1) exists, try and return unicode character. + try: + return six.unichr(int(m.group(1))) + except (ValueError, OverflowError) as _: + return _UNDEFINED_UNICODE + + # Use match function to replace escaped substrings in the token. + return _UNESCAPE_REGEX.sub(match, token) + + +def _count_tokens(files, file_byte_limit=1e6, correct_strip=True): + """Return token counts of words in the files. + + Samples file_byte_limit bytes from each file, and counts the words that appear + in the samples. The samples are semi-evenly distributed across the file. + + Args: + files: List of filepaths + file_byte_limit: Max number of bytes that will be read from each file. + correct_strip: Whether to convert text to unicode before strip. This affects + vocabulary generation for PY2. Sets correct_strip to False in PY2 to + reproduce previous common public result. Sets correct_strip to True will + let PY2 and PY3 get a consistent vocabulary. + + Returns: + Dictionary mapping tokens to the number of times they appear in the sampled + lines from the files. + """ + token_counts = collections.defaultdict(int) + + for filepath in files: + with tf.io.gfile.GFile(filepath, mode="r") as reader: + file_byte_budget = file_byte_limit + counter = 0 + lines_to_skip = int(reader.size() / (file_byte_budget * 2)) + for line in reader: + if counter < lines_to_skip: + counter += 1 + else: + if file_byte_budget < 0: + break + if correct_strip: + line = native_to_unicode(line) + line = line.strip() + file_byte_budget -= len(line) + counter = 0 + + # Add words to token counts + for token in _split_string_to_tokens(native_to_unicode(line)): + token_counts[token] += 1 + return token_counts + + +def _list_to_index_dict(lst): + """Create dictionary mapping list items to their indices in the list.""" + return {item: n for n, item in enumerate(lst)} + + +def _split_token_to_subtokens(token, subtoken_dict, max_subtoken_length): + """Splits a token into subtokens defined in the subtoken dict.""" + ret = [] + start = 0 + token_len = len(token) + while start < token_len: + # Find the longest subtoken, so iterate backwards. + for end in xrange(min(token_len, start + max_subtoken_length), start, -1): + subtoken = token[start:end] + if subtoken in subtoken_dict: + ret.append(subtoken) + start = end + break + else: # Did not break + # If there is no possible encoding of the escaped token then one of the + # characters in the token is not in the alphabet. This should be + # impossible and would be indicative of a bug. + raise ValueError("Was unable to split token \"%s\" into subtokens." % + token) + return ret + + +def _generate_subtokens_with_target_vocab_size( + token_counts, alphabet, target_size, threshold, min_count=None, + reserved_tokens=None): + """Generate subtoken vocabulary close to the target size.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + if min_count is not None: + tf.compat.v1.logging.info( + "Using min_count=%d to generate vocab with target size %d" % + (min_count, target_size)) + return _generate_subtokens( + token_counts, alphabet, min_count, reserved_tokens=reserved_tokens) + + def bisect(min_val, max_val): + """Recursive function to binary search for subtoken vocabulary.""" + cur_count = (min_val + max_val) // 2 + tf.compat.v1.logging.info("Binary search: trying min_count=%d (%d %d)" % + (cur_count, min_val, max_val)) + subtoken_list = _generate_subtokens( + token_counts, alphabet, cur_count, reserved_tokens=reserved_tokens) + + val = len(subtoken_list) + tf.compat.v1.logging.info( + "Binary search: min_count=%d resulted in %d tokens" % (cur_count, val)) + + within_threshold = abs(val - target_size) < threshold + if within_threshold or min_val >= max_val or cur_count < 2: + return subtoken_list + if val > target_size: + other_subtoken_list = bisect(cur_count + 1, max_val) + else: + other_subtoken_list = bisect(min_val, cur_count - 1) + + # Return vocabulary dictionary with the closest number of tokens. + other_val = len(other_subtoken_list) + if abs(other_val - target_size) < abs(val - target_size): + return other_subtoken_list + return subtoken_list + + tf.compat.v1.logging.info("Finding best min_count to get target size of %d" % + target_size) + return bisect(_MIN_MIN_COUNT, _MAX_MIN_COUNT) + + +def _generate_alphabet_dict(iterable, reserved_tokens=None): + """Create set of characters that appear in any element in the iterable.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + alphabet = {c for token in iterable for c in token} + alphabet |= {c for token in reserved_tokens for c in token} + alphabet |= _ESCAPE_CHARS # Add escape characters to alphabet set. + return alphabet + + +def _count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length): + """Count number of times subtokens appear, and generate new subtokens. + + Args: + token_counts: dict mapping tokens to the number of times they appear in the + original files. + alphabet: list of allowed characters. Used to escape the tokens, which + guarantees that all tokens can be split into subtokens. + subtoken_dict: dict mapping subtokens to ids. + max_subtoken_length: maximum length of subtoken in subtoken_dict. + + Returns: + A defaultdict mapping subtokens to the number of times they appear in the + tokens. The dict may contain new subtokens. + """ + subtoken_counts = collections.defaultdict(int) + for token, count in six.iteritems(token_counts): + token = _escape_token(token, alphabet) + subtokens = _split_token_to_subtokens( + token, subtoken_dict, max_subtoken_length) + + # Generate new subtokens by taking substrings from token. + start = 0 + for subtoken in subtokens: + for end in xrange(start + 1, len(token) + 1): + new_subtoken = token[start:end] + subtoken_counts[new_subtoken] += count + start += len(subtoken) + + return subtoken_counts + + +def _filter_and_bucket_subtokens(subtoken_counts, min_count): + """Return a bucketed list of subtokens that are filtered by count. + + Args: + subtoken_counts: defaultdict mapping subtokens to their counts + min_count: int count used to filter subtokens + + Returns: + List of subtoken sets, where subtokens in set i have the same length=i. + """ + # Create list of buckets, where subtokens in bucket i have length i. + subtoken_buckets = [] + for subtoken, count in six.iteritems(subtoken_counts): + if count < min_count: # Filter out subtokens that don't appear enough + continue + while len(subtoken_buckets) <= len(subtoken): + subtoken_buckets.append(set()) + subtoken_buckets[len(subtoken)].add(subtoken) + return subtoken_buckets + + +def _gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens=None): + """Generate candidate subtokens ordered by count, and new max subtoken length. + + Add subtokens to the candidate list in order of length (longest subtokens + first). When a subtoken is added, the counts of each of its prefixes are + decreased. Prefixes that don't appear much outside the subtoken are not added + to the candidate list. + + For example: + subtoken being added to candidate list: 'translate' + subtoken_counts: {'translate':10, 't':40, 'tr':16, 'tra':12, ...} + min_count: 5 + + When 'translate' is added, subtoken_counts is updated to: + {'translate':0, 't':30, 'tr':6, 'tra': 2, ...} + + The subtoken 'tra' will not be added to the candidate list, because it appears + twice (less than min_count) outside of 'translate'. + + Args: + subtoken_counts: defaultdict mapping str subtokens to int counts + min_count: int minimum count requirement for subtokens + alphabet: set of characters. Each character is added to the subtoken list to + guarantee that all tokens can be encoded. + reserved_tokens: list of tokens that will be added to the beginning of the + returned subtoken list. + + Returns: + List of candidate subtokens in decreasing count order, and maximum subtoken + length + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + # Create a list of (count, subtoken) for each candidate subtoken. + subtoken_candidates = [] + + # Use bucketted list to iterate through subtokens in order of length. + # subtoken_buckets[i] = set(subtokens), where each subtoken has length i. + subtoken_buckets = _filter_and_bucket_subtokens(subtoken_counts, min_count) + max_subtoken_length = len(subtoken_buckets) - 1 + + # Go through the list in reverse order to consider longer subtokens first. + for subtoken_len in xrange(max_subtoken_length, 0, -1): + for subtoken in subtoken_buckets[subtoken_len]: + count = subtoken_counts[subtoken] + + # Possible if this subtoken is a prefix of another token. + if count < min_count: + continue + + # Ignore alphabet/reserved tokens, which will be added manually later. + if subtoken not in alphabet and subtoken not in reserved_tokens: + subtoken_candidates.append((count, subtoken)) + + # Decrement count of the subtoken's prefixes (if a longer subtoken is + # added, its prefixes lose priority to be added). + for end in xrange(1, subtoken_len): + subtoken_counts[subtoken[:end]] -= count + + # Add alphabet subtokens (guarantees that all strings are encodable). + subtoken_candidates.extend((subtoken_counts.get(a, 0), a) for a in alphabet) + + # Order subtoken candidates by decreasing count. + subtoken_list = [t for _, t in sorted(subtoken_candidates, reverse=True)] + + # Add reserved tokens to beginning of the list. + subtoken_list = reserved_tokens + subtoken_list + return subtoken_list, max_subtoken_length + + +def _generate_subtokens( + token_counts, alphabet, min_count, num_iterations=4, + reserved_tokens=None): + """Create a list of subtokens in decreasing order of frequency. + + Args: + token_counts: dict mapping str tokens -> int count + alphabet: set of characters + min_count: int minimum number of times a subtoken must appear before it is + added to the vocabulary. + num_iterations: int number of iterations to generate new tokens. + reserved_tokens: list of tokens that will be added to the beginning to the + returned subtoken list. + + Returns: + Sorted list of subtokens (most frequent first) + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + # Use alphabet set to create initial list of subtokens + subtoken_list = reserved_tokens + list(alphabet) + max_subtoken_length = 1 + + # On each iteration, segment all words using the subtokens defined in + # subtoken_dict, count how often the resulting subtokens appear, and update + # the dictionary with subtokens w/ high enough counts. + for i in xrange(num_iterations): + tf.compat.v1.logging.info("\tGenerating subtokens: iteration %d" % i) + # Generate new subtoken->id dictionary using the new subtoken list. + subtoken_dict = _list_to_index_dict(subtoken_list) + + # Create dict mapping subtoken->count, with additional subtokens created + # from substrings taken from the tokens. + subtoken_counts = _count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length) + + # Generate new list of subtokens sorted by subtoken count. + subtoken_list, max_subtoken_length = _gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens) + + tf.compat.v1.logging.info("\tVocab size: %d" % len(subtoken_list)) + return subtoken_list diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py new file mode 100644 index 00000000000..f757389f30d --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py @@ -0,0 +1,182 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test Subtokenizer and string helper methods.""" + +import collections +import tempfile + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.transformer.utils import tokenizer + + +class SubtokenizerTest(tf.test.TestCase): + + def _init_subtokenizer(self, vocab_list): + temp_file = tempfile.NamedTemporaryFile(delete=False) + with tf.io.gfile.GFile(temp_file.name, "w") as w: + for subtoken in vocab_list: + w.write("'%s'" % subtoken) + w.write("\n") + return tokenizer.Subtokenizer(temp_file.name, reserved_tokens=[]) + + def test_encode(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + s = "testing 123" + encoded_list = subtokenizer.encode(s) + self.assertEqual([1, 2, 0], encoded_list) + + def test_decode(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + encoded_list = [1, 2, 0] # testing 123 + decoded_str = subtokenizer.decode(encoded_list) + self.assertEqual("testing 123", decoded_str) + + def test_subtoken_ids_to_tokens(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + encoded_list = [1, 2, 0] # testing 123 + token_list = subtokenizer._subtoken_ids_to_tokens(encoded_list) + self.assertEqual([u"testing", u"123"], token_list) + + +class StringHelperTest(tf.test.TestCase): + + def test_split_string_to_tokens(self): + text = "test? testing 123." + + tokens = tokenizer._split_string_to_tokens(text) + self.assertEqual(["test", "? ", "testing", "123", "."], tokens) + + def test_join_tokens_to_string(self): + tokens = ["test", "? ", "testing", "123", "."] + + s = tokenizer._join_tokens_to_string(tokens) + self.assertEqual("test? testing 123.", s) + + def test_escape_token(self): + token = u"abc_\\4" + alphabet = set("abc_\\u;") + + escaped_token = tokenizer._escape_token(token, alphabet) + self.assertEqual("abc\\u\\\\\\52;_", escaped_token) + + def test_unescape_token(self): + escaped_token = u"Underline: \\u, Backslash: \\\\, Unicode: \\52;" + + unescaped_token = tokenizer._unescape_token(escaped_token) + self.assertEqual( + "Underline: _, Backslash: \\, Unicode: 4", unescaped_token) + + def test_list_to_index_dict(self): + lst = ["test", "strings"] + + d = tokenizer._list_to_index_dict(lst) + self.assertDictEqual({"test": 0, "strings": 1}, d) + + def test_split_token_to_subtokens(self): + token = "abc" + subtoken_dict = {"a": 0, "b": 1, "c": 2, "ab": 3} + max_subtoken_length = 2 + + subtokens = tokenizer._split_token_to_subtokens( + token, subtoken_dict, max_subtoken_length) + self.assertEqual(["ab", "c"], subtokens) + + def test_generate_alphabet_dict(self): + s = ["testing", "123"] + reserved_tokens = ["???"] + + alphabet = tokenizer._generate_alphabet_dict(s, reserved_tokens) + self.assertIn("?", alphabet) + self.assertIn("t", alphabet) + self.assertIn("e", alphabet) + self.assertIn("s", alphabet) + self.assertIn("i", alphabet) + self.assertIn("n", alphabet) + self.assertIn("g", alphabet) + self.assertIn("1", alphabet) + self.assertIn("2", alphabet) + self.assertIn("3", alphabet) + + def test_count_and_gen_subtokens(self): + token_counts = {"abc": 5} + alphabet = set("abc_") + subtoken_dict = {"a": 0, "b": 1, "c": 2, "_": 3} + max_subtoken_length = 2 + + subtoken_counts = tokenizer._count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length) + + self.assertIsInstance(subtoken_counts, collections.defaultdict) + self.assertDictEqual( + {"a": 5, "b": 5, "c": 5, "_": 5, "ab": 5, "bc": 5, "c_": 5, + "abc": 5, "bc_": 5, "abc_": 5}, subtoken_counts) + + def test_filter_and_bucket_subtokens(self): + subtoken_counts = collections.defaultdict( + int, {"a": 2, "b": 4, "c": 1, "ab": 6, "ac": 3, "abbc": 5}) + min_count = 3 + + subtoken_buckets = tokenizer._filter_and_bucket_subtokens( + subtoken_counts, min_count) + + self.assertEqual(len(subtoken_buckets[0]), 0) + self.assertEqual(set("b"), subtoken_buckets[1]) + self.assertEqual(set(["ab", "ac"]), subtoken_buckets[2]) + self.assertEqual(len(subtoken_buckets[3]), 0) + self.assertEqual(set(["abbc"]), subtoken_buckets[4]) + + def test_gen_new_subtoken_list(self): + subtoken_counts = collections.defaultdict( + int, {"translate": 10, "t": 40, "tr": 16, "tra": 12}) + min_count = 5 + alphabet = set("translate") + reserved_tokens = ["reserved", "tokens"] + + subtoken_list, max_token_length = tokenizer._gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens) + + # Check that "tra" isn"t in the list (its count should be decremented to 2, + # so it should not be added to the canddiate list). + self.assertNotIn("tra", subtoken_list) + + self.assertIn("tr", subtoken_list) + self.assertIn("t", subtoken_list) + + self.assertEqual(len("translate"), max_token_length) + + def test_generate_subtokens(self): + token_counts = {"ab": 1, "bc": 3, "abc": 5} + alphabet = set("abc_") + min_count = 100 + num_iterations = 1 + reserved_tokens = ["reserved", "tokens"] + + vocab_list = tokenizer._generate_subtokens( + token_counts, alphabet, min_count, num_iterations, reserved_tokens) + + # Check that reserved tokens are at the front of the list + self.assertEqual(vocab_list[:2], reserved_tokens) + + # Check that each character in alphabet is in the vocab list + for c in alphabet: + self.assertIn(c, vocab_list) + + +if __name__ == "__main__": + tf.test.main() diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md new file mode 100644 index 00000000000..b7b90b6f8ec --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md @@ -0,0 +1,133 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.9 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api/tensorflow/object_detection +pip install -r requirements.txt +cd faster_rcnn_resnet50/quantization/ptq +``` + +### Install Protocol Buffer Compiler + +`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow +[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. + +Usage: +```shell +cd examples/3.x_api/tensorflow/object_detection +. prepare_dataset.sh +cd faster_rcnn_resnet50/quantization/ptq +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run + +## 1. Quantization + + ```shell + bash run_quant.sh --input_model=./faster_rcnn_resnet50_fp32_coco_pretrained_model/frozen_inference_graph.pb --output_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +## 2. Benchmark + ```shell + # run performance benchmark + bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + + # run accuracy benchmark + bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy + ``` + +Details of enabling Intel® Neural Compressor on faster_rcnn_resnet50 for Tensorflow. +========================= + +This is a tutorial of how to enable faster_rcnn_resnet50 model with Intel® Neural Compressor. +## User Code Analysis +User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For faster_rcnn_resnet50, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*. + +### Code update + +After prepare step is done, we just need update main.py like below. +```python + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) +``` + +The quantization.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py new file mode 100644 index 00000000000..2f9369798df --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union + +import numpy as np +from pycocotools import coco, cocoeval, mask + +from neural_compressor.utils import logger + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ["bbox", "segmentation"] + if detection_type not in supported_detection_types: + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset["images"] = [img for img in self.dataset["images"]] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": + for idx, ann in enumerate(annotations): + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": + for idx, ann in enumerate(annotations): + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) + + results.dataset["annotations"] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + print("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print("-pe", _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) + + # calculate recall + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t, :, k, a, m] = np.array(ss) + # exit(0) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, + } + toc = time.time() + print("DONE (t={:0.2f}s).".format(toc - tic)) + + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)["name"] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") + if len(groundtruth_boxes.shape) != 2: + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") + if groundtruth_boxes.shape[1] != 4: + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, + } + if groundtruth_masks is not None: + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + if len(detection_boxes.shape) != 2: + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") + if detection_boxes.shape[1] != 4: + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) + return detections_list diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py new file mode 100644 index 00000000000..32e55adb3fd --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py @@ -0,0 +1,655 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import cv2 +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +interpolation_map = { + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, +} + +category_map = { + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", +} + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ResizeTFTransform(object): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation="bilinear"): + """Initialize `ResizeTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + self.interpolation = interpolation + + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) + return (image, label) + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + + if anno_path: + import os + import yaml + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k, v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set([cat for cat in self.category_map]) # index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + + detections = [] + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) + detections.append(detection) + + bboxes, str_labels, int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from coco_tools import COCOEvalWrapper, COCOWrapper + + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False + ) + box_metrics.update(box_per_category_ap) + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} + + return box_metrics[self.map_key] + + +class ParseDecodeCoco: # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features["image/encoded"] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + + +class COCORecordDataset(object): + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord" + ) + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + + +class TFDataLoader(object): + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py new file mode 100644 index 00000000000..0ca37671fd6 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py @@ -0,0 +1,128 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +from __future__ import division + +import time + +import numpy as np +import tensorflow as tf + +from argparse import ArgumentParser +from data_process import( + COCOmAPv2, + COCORecordDataset, + ComposeTransform, + ResizeTFTransform, + TFDataLoader, +) + +arg_parser = ArgumentParser(description='Parse args') + +arg_parser.add_argument('-g', + "--input-graph", + help='Specify the input graph.', + dest='input_graph') +arg_parser.add_argument('--config', type=str, default='') +arg_parser.add_argument('--dataset_location', type=str, default='') +arg_parser.add_argument('--output_model', type=str, default='') +arg_parser.add_argument('--mode', type=str, default='performance') +arg_parser.add_argument('--batch_size', type=int, default=10) +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations') +arg_parser.add_argument('--tune', action='store_true', default=False) +arg_parser.add_argument('--benchmark', dest='benchmark', + action='store_true', help='run benchmark') +args = arg_parser.parse_args() + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph): The input model graph. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + warmup = 5 + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3}) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list[warmup:]).mean() / args.batch_size + return latency + + eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=600)])) + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size) + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=600)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=args.batch_size) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..6c2115f58ff --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location "${dataset_location}" \ + --batch_size ${batch_size} \ + --iters ${iters} \ + --benchmark +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..559d695f768 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph "${input_model}" \ + --output_model "${output_model}" \ + --dataset_location "${dataset_location}" \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md new file mode 100644 index 00000000000..9ec8ae2ad78 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md @@ -0,0 +1,142 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.6 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api/tensorflow/object_detection +pip install -r requirements.txt +cd mask_rcnn_inception_v2/quantization/ptq +``` + +### Install Protocol Buffer Compiler + +`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow +[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Model + +```shell +wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz +tar -xvzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. + +Usage: +```shell +cd examples/3.x_api/tensorflow/object_detection/ +. prepare_dataset.sh +cd mask_rcnn_inception_v2/quantization/ptq +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run + +Now we support both pb and ckpt formats. + +## 1. Quantization +### For PB format + + ```shell + bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +### For ckpt format + + ```shell + bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/ --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +## 2. Benchmark + ```shell + # run performance benchmark + bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + + # run accuracy benchmark + bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy + ``` + +Details of enabling Intel® Neural Compressor on mask_rcnn_inception_v2 for Tensorflow. +========================= + +This is a tutorial of how to enable mask_rcnn_inception_v2 model with Intel® Neural Compressor. +## User Code Analysis +User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For mask_rcnn_inception_v2, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*. + +### Code update + +After prepare step is done, we just need update main.py like below. +```python + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) +``` + +The quantization.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py new file mode 100644 index 00000000000..2f9369798df --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union + +import numpy as np +from pycocotools import coco, cocoeval, mask + +from neural_compressor.utils import logger + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ["bbox", "segmentation"] + if detection_type not in supported_detection_types: + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset["images"] = [img for img in self.dataset["images"]] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": + for idx, ann in enumerate(annotations): + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": + for idx, ann in enumerate(annotations): + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) + + results.dataset["annotations"] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + print("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print("-pe", _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) + + # calculate recall + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t, :, k, a, m] = np.array(ss) + # exit(0) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, + } + toc = time.time() + print("DONE (t={:0.2f}s).".format(toc - tic)) + + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)["name"] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") + if len(groundtruth_boxes.shape) != 2: + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") + if groundtruth_boxes.shape[1] != 4: + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, + } + if groundtruth_masks is not None: + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + if len(detection_boxes.shape) != 2: + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") + if detection_boxes.shape[1] != 4: + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) + return detections_list diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py new file mode 100644 index 00000000000..8d0a074ee82 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py @@ -0,0 +1,767 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import cv2 +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +interpolation_map = { + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, +} + +category_map = { + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", +} + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ResizeWithRatio(): + """Resize image with aspect ratio and pad it to max shape(optional). + + If the image is padded, the label will be processed at the same time. + The input image should be np.array. + + Args: + min_dim (int, default=800): + Resizes the image such that its smaller dimension == min_dim + max_dim (int, default=1365): + Ensures that the image longest side doesn't exceed this value + padding (bool, default=False): + If true, pads image with zeros so its size is max_dim x max_dim + + Returns: + tuple of processed image and label + """ + + def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0): + """Initialize `ResizeWithRatio` class.""" + self.min_dim = min_dim + self.max_dim = max_dim + self.padding = padding + self.constant_value = constant_value + + def __call__(self, sample): + """Resize the image with ratio in sample.""" + image, label = sample + height, width = image.shape[:2] + scale = 1 + if self.min_dim: + scale = max(1, self.min_dim / min(height, width)) + if self.max_dim: + image_max = max(height, width) + if round(image_max * scale) > self.max_dim: + scale = self.max_dim / image_max + if scale != 1: + image = cv2.resize(image, (round(height * scale), round(width * scale))) + + bbox, str_label, int_label, image_id = label + + if self.padding: + h, w = image.shape[:2] + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] + if not isinstance(bbox, np.ndarray): + bbox = np.array(bbox) + resized_box = bbox * [height, width, height, width] * scale + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] + bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] + image = np.pad(image, pad_param, mode="constant", constant_values=self.constant_value) + return image, (bbox, str_label, int_label, image_id) + + +class TensorflowResizeWithRatio(): + """Resize image with aspect ratio and pad it to max shape(optional). + + If the image is padded, the label will be processed at the same time. + The input image should be np.array or tf.Tensor. + + Args: + min_dim (int, default=800): + Resizes the image such that its smaller dimension == min_dim + max_dim (int, default=1365): + Ensures that the image longest side doesn't exceed this value + padding (bool, default=False): + If true, pads image with zeros so its size is max_dim x max_dim + + Returns: + tuple of processed image and label + """ + + def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0): + """Initialize `TensorflowResizeWithRatio` class.""" + self.min_dim = min_dim + self.max_dim = max_dim + self.padding = padding + self.constant_value = constant_value + + def __call__(self, sample): + """Resize the image with ratio in sample.""" + image, label = sample + if isinstance(image, tf.Tensor): + shape = tf.shape(input=image) + height = tf.cast(shape[0], dtype=tf.float32) + width = tf.cast(shape[1], dtype=tf.float32) + scale = 1 + if self.min_dim: + scale = tf.maximum(1.0, tf.cast(self.min_dim / tf.math.minimum(height, width), dtype=tf.float32)) + if self.max_dim: + image_max = tf.cast(tf.maximum(height, width), dtype=tf.float32) + scale = tf.cond( + pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), + true_fn=lambda: self.max_dim / image_max, + false_fn=lambda: scale, + ) + image = tf.image.resize(image, (tf.math.round(height * scale), tf.math.round(width * scale))) + bbox, str_label, int_label, image_id = label + + if self.padding: + shape = tf.shape(input=image) + h = tf.cast(shape[0], dtype=tf.float32) + w = tf.cast(shape[1], dtype=tf.float32) + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] + resized_box = bbox * [height, width, height, width] * scale + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] + bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] + image = tf.pad(image, pad_param, constant_values=self.constant_value) + else: + transform = ResizeWithRatio(self.min_dim, self.max_dim, self.padding) + image, (bbox, str_label, int_label, image_id) = transform(sample) + return image, (bbox, str_label, int_label, image_id) + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class LabelBalanceCOCORecordFilter(object): + """The label balance filter for COCO Record.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return tf.math.equal(len(label[0]), self.size) + + +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + + if anno_path: + import os + import yaml + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k, v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set([cat for cat in self.category_map]) # index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + + detections = [] + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) + detections.append(detection) + + bboxes, str_labels, int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from coco_tools import COCOEvalWrapper, COCOWrapper + + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False + ) + box_metrics.update(box_per_category_ap) + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} + + return box_metrics[self.map_key] + + +class ParseDecodeCoco: # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features["image/encoded"] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + + +class COCORecordDataset(object): + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord" + ) + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + + +class TFDataLoader(object): + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py new file mode 100644 index 00000000000..632d66ac25a --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py @@ -0,0 +1,133 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +from __future__ import division + +import time + +import numpy as np +import tensorflow as tf + +from argparse import ArgumentParser +from data_process import( + COCOmAPv2, + COCORecordDataset, + ComposeTransform, + TFDataLoader, + LabelBalanceCOCORecordFilter, + TensorflowResizeWithRatio, +) + +arg_parser = ArgumentParser(description='Parse args') + +arg_parser.add_argument('-g', + "--input-graph", + help='Specify the input graph.', + dest='input_graph') +arg_parser.add_argument('--config', type=str, default='') +arg_parser.add_argument('--dataset_location', type=str, default='') +arg_parser.add_argument('--output_model', type=str, default='') +arg_parser.add_argument('--mode', type=str, default='accuracy') +arg_parser.add_argument('--batch_size', type=int, default=10) +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations') +arg_parser.add_argument('--tune', action='store_true', default=False) +arg_parser.add_argument('--benchmark', dest='benchmark', + action='store_true', help='run benchmark') +args = arg_parser.parse_args() + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph or string or INC.model.TensorflowCheckpointModel): The input model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + warmup = 5 + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3}) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list[warmup:]).mean() / args.batch_size + return latency + + use_padding = True if args.mode == 'performance' else False + eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[TensorflowResizeWithRatio( + min_dim=800, max_dim=1356, padding=use_padding)])) + batch_size = 1 if args.mode == 'accuracy' else args.batch_size + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=batch_size) + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + calib_dataset = COCORecordDataset(root=args.dataset_location, filter=LabelBalanceCOCORecordFilter(size=1)) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=1) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..6c2115f58ff --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location "${dataset_location}" \ + --batch_size ${batch_size} \ + --iters ${iters} \ + --benchmark +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..559d695f768 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph "${input_model}" \ + --output_model "${output_model}" \ + --dataset_location "${dataset_location}" \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh b/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh new file mode 100644 index 00000000000..fea0ff1c373 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh @@ -0,0 +1,136 @@ +!/bin/bash +# set -x + +DATA_DIR="${PWD}/data" +DATA_NAME="val2017" +DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip' +PACKAGES_LIST='val2017.zip annotations_trainval2017.zip' +VAL_IMAGE_DIR=$DATA_DIR/val2017 +TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json +TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +OUTPUT_DIR=$DATA_DIR + +help() +{ + cat <<- EOF + + Desc: Prepare dataset for Tensorflow COCO object detection. + + -h --help help info + + --dataset_location set dataset location, default is ./data + +EOF + exit 0 +} + +function main { + init_params "$@" + download_dataset + convert_to_tf_record +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --dataset_location=*) + DATA_DIR=$(echo "$var" |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# removes files that will not be used anymore +function remove_zipped_packages { + for package in $PACKAGES_LIST; do + rm "$package" + done +} + +function download_tf_models_repo { + if [ ! -d models ]; then + git clone https://github.com/tensorflow/models.git + fi + cd models || exit + git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 + cd .. +} + +function divide_tf_records_by_dataset { + if [ ! -d "${DATA_DIR}/tf_test2017" ]; then + mkdir "${DATA_DIR}/tf_test2017" + fi + if [ ! -d "${DATA_DIR}/tf_train2017" ]; then + mkdir "${DATA_DIR}/tf_train2017" + fi + if [ ! -d "${DATA_DIR}/tf_val2017" ]; then + mkdir "${DATA_DIR}/tf_val2017" + fi + mv ${DATA_DIR}/coco_testdev.record* ${DATA_DIR}/tf_test2017 + mv ${DATA_DIR}/coco_train.record* ${DATA_DIR}/tf_train2017 + mv ${DATA_DIR}/coco_val.record* ${DATA_DIR}/tf_val2017 +} + +function convert { + cd models/research + protoc object_detection/protos/*.proto --python_out=. + export PYTHONPATH=$PYTHONPATH:$(pwd) + export PYTHONPATH=$PYTHONPATH:$(pwd)/slim + python ./object_detection/dataset_tools/create_coco_tf_record.py --logtostderr \ + --train_image_dir=empty_dir \ + --val_image_dir="${VAL_IMAGE_DIR}" \ + --test_image_dir=empty_dir \ + --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ + --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \ + --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \ + --output_dir="${OUTPUT_DIR}" +} + +function convert_to_tf_record { + download_tf_models_repo + convert + divide_tf_records_by_dataset +} + +# download_dataset +function download_dataset { + if [ ! -d "${DATA_DIR}" ]; then + mkdir "${DATA_DIR}" + fi + + cd "${DATA_DIR}" || exit + if [ ! -f "${VAL_IMAGE_DIR}" ]; then + + for dataset_dowload_link in $DATA_URL_LIST; do + wget "$dataset_dowload_link" + done + for package in $PACKAGES_LIST; do + unzip -o "$package" + done + remove_zipped_packages + if [ ! -d empty_dir ]; then + mkdir empty_dir + fi + + cd annotations || exit + echo "{ \"images\": {}, \"categories\": {}}" > empty.json + cd .. + else + echo "Dataset ${DATA_NAME} is exist!" + fi + + cd ../ +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/requirements.txt b/examples/3.x_api/tensorflow/object_detection/requirements.txt new file mode 100644 index 00000000000..865df0f3a6b --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/requirements.txt @@ -0,0 +1,8 @@ +Cython +contextlib2 +pillow>=8.2.0 +lxml>=4.6.2 +matplotlib +numpy>=1.17.4 +pycocotools +protobuf diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md new file mode 100644 index 00000000000..1b52ecf8b17 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md @@ -0,0 +1,160 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.6 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api//tensorflow/object_detection +pip install -r requirements.txt +cd ssd_mobilenet_v1/quantization/ptq +``` + +### Install Protocol Buffer Compiler + +`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow +[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Model + +### Automated approach +Run the `prepare_model.py` script located in `examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq`. + +``` +python prepare_model.py --model_name=ssd_mobilenet_v1 --model_path=./ + +Prepare pre-trained model for COCO object detection + +optional arguments: + -h, --help show this help message and exit + --model_name {ssd_resnet50_v1,ssd_mobilenet_v1} + model to download, default is ssd_resnet50_v1 + --model_path MODEL_PATH + directory to put models, default is ./model +``` + +### Manual approach + +```shell +wget http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz +tar -xvzf ssd_mobilenet_v1_coco_2018_01_28.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. + +Usage: +```shell +cd examples/3.x_api/tensorflow/object_detection +. prepare_dataset.sh +cd ssd_mobilenet_v1/quantization/ptq +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run Command + +Now we support both pb and ckpt formats. + +## 1. Quantization +### For PB format + + ```shell + bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +### For ckpt format + + ```shell + bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/ --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +## 2. Benchmark + ```shell + # run performance benchmark + bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + + # run accuracy benchmark + bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy + ``` + +Details of enabling Intel® Neural Compressor on ssd_mobilenet_v1 for Tensorflow. +========================= + +This is a tutorial of how to enable ssd_mobilenet_v1 model with Intel® Neural Compressor. +## User Code Analysis +User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For ssd_mobilenet_v1, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*. + +### Code update + +After prepare step is done, we just need update main.py like below. +```python + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) +``` + +The quantization.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py new file mode 100644 index 00000000000..2f9369798df --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union + +import numpy as np +from pycocotools import coco, cocoeval, mask + +from neural_compressor.utils import logger + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ["bbox", "segmentation"] + if detection_type not in supported_detection_types: + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset["images"] = [img for img in self.dataset["images"]] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": + for idx, ann in enumerate(annotations): + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": + for idx, ann in enumerate(annotations): + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) + + results.dataset["annotations"] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + print("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print("-pe", _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) + + # calculate recall + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t, :, k, a, m] = np.array(ss) + # exit(0) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, + } + toc = time.time() + print("DONE (t={:0.2f}s).".format(toc - tic)) + + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)["name"] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") + if len(groundtruth_boxes.shape) != 2: + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") + if groundtruth_boxes.shape[1] != 4: + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, + } + if groundtruth_masks is not None: + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + if len(detection_boxes.shape) != 2: + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") + if detection_boxes.shape[1] != 4: + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) + return detections_list diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py new file mode 100644 index 00000000000..32e55adb3fd --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py @@ -0,0 +1,655 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import cv2 +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +interpolation_map = { + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, +} + +category_map = { + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", +} + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ResizeTFTransform(object): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation="bilinear"): + """Initialize `ResizeTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + self.interpolation = interpolation + + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) + return (image, label) + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + + if anno_path: + import os + import yaml + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k, v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set([cat for cat in self.category_map]) # index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + + detections = [] + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) + detections.append(detection) + + bboxes, str_labels, int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from coco_tools import COCOEvalWrapper, COCOWrapper + + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False + ) + box_metrics.update(box_per_category_ap) + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} + + return box_metrics[self.map_key] + + +class ParseDecodeCoco: # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features["image/encoded"] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + + +class COCORecordDataset(object): + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord" + ) + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + + +class TFDataLoader(object): + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py new file mode 100644 index 00000000000..dbced65f2d7 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py @@ -0,0 +1,129 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +from __future__ import division + +import time + +import numpy as np +import tensorflow as tf + +from argparse import ArgumentParser +from data_process import( + COCOmAPv2, + COCORecordDataset, + ComposeTransform, + ResizeTFTransform, + TFDataLoader, +) + +arg_parser = ArgumentParser(description='Parse args') + +arg_parser.add_argument('-g', + "--input-graph", + help='Specify the input graph.', + dest='input_graph') +arg_parser.add_argument('--config', type=str, default='') +arg_parser.add_argument('--dataset_location', type=str, default='') +arg_parser.add_argument('--output_model', type=str, default='') +arg_parser.add_argument('--mode', type=str, default='performance') +arg_parser.add_argument('--batch_size', type=int, default=10) +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations') +arg_parser.add_argument('--tune', action='store_true', default=False) +arg_parser.add_argument('--benchmark', dest='benchmark', + action='store_true', help='run benchmark') +args = arg_parser.parse_args() + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph or string or INC.model.TensorflowCheckpointModel): The input model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + warmup = 5 + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3}) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list[warmup:]).mean() / args.batch_size + return latency + + eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)])) + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size) + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=args.batch_size) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..51882cf0bfe --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py @@ -0,0 +1,99 @@ +import os +import argparse +import enum +import tarfile +import abc + + +class SupportedModels(enum.Enum): + """ + Enumeration containing supported models + """ + ssd_resnet50_v1 = 'ssd_resnet50_v1' + ssd_mobilnet_v1 = 'ssd_mobilenet_v1' + + +class Model(abc.ABC): + """ + Base model class used to obtain the model (and perform any necessary operations to make it usable) + """ + + @abc.abstractmethod + def get_pretrained_model(self, destination): + """ + Base method for obtaining a ready to use model + Args: + destination: path to where the file should be stored + """ + pass + + +class SsdMobilenetV1(Model): + """ Concrete implementation of the Model base class for ssd_mobilenet_v1""" + + def get_pretrained_model(self, destination): + """ + Obtains a ready to use ssd_mobilenet_v1 model file. + Args: + destination: path to where the file should be stored + """ + url = 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz' + os.system("curl -o ssd_mobilenet_v1_coco_2018_01_28.tar.gz {0}".format(url)) + with tarfile.open("ssd_mobilenet_v1_coco_2018_01_28.tar.gz") as tar: + if not os.path.exists(destination): + os.makedirs(destination) + tar.extractall(destination) + + +class SsdResnet50(Model): + """ Concrete implementation of the Model base class for ssd_resnet_50""" + + def get_pretrained_model(self, destination): + """ + Obtains a ready to use ssd_resnet_50 model file. + Args: + destination: path to where the file should be stored + """ + url = "http://download.tensorflow.org/models/object_detection/" \ + "ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz" + os.system("curl -o ssd_resnet50_v1.tar.gz {0}".format(url)) + with tarfile.open("ssd_resnet50_v1.tar.gz") as tar: + if not os.path.exists(destination): + os.makedirs(destination) + tar.extractall(destination) + + +def get_model(model: SupportedModels) -> Model: + """ + Factory method that returns the requested model object + Args: + model: model from SupportedModels enumeration + + Returns: Concrete object inheriting the Model base class + + """ + if model == SupportedModels.ssd_resnet50_v1: + return SsdResnet50() + if model == SupportedModels.ssd_mobilnet_v1: + return SsdMobilenetV1() + else: + raise AttributeError("The model {0} is not supported. Supported models: {1}" + .format(model_name, SupportedModels.__members__.keys())) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Prepare pre-trained model for COCO object detection') + parser.add_argument('--model_name', type=str, default='ssd_resnet50_v1', + help='model to download, default is ssd_resnet50_v1', + choices=["ssd_resnet50_v1", "ssd_mobilenet_v1"]) + parser.add_argument('--model_path', type=str, default='./model', help='directory to put models, default is ./model') + + args = parser.parse_args() + model_name = args.model_name + model_path = args.model_path + try: + model = get_model(SupportedModels(model_name)) + model.get_pretrained_model(model_path) + except AttributeError: + print("The model {0} is not supported. Supported models: {1}" + .format(model_name, SupportedModels.__members__.keys())) diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ee728de373 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location "${dataset_location}" \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..559d695f768 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph "${input_model}" \ + --output_model "${output_model}" \ + --dataset_location "${dataset_location}" \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md new file mode 100644 index 00000000000..7bff08a2f84 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md @@ -0,0 +1,98 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Wide & Deep tuning zoo result. +This example can run on Intel CPUs and GPUs. + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### Install Additional Dependency packages +```shell +cd examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq +pip install -r requirements.txt +``` + +### 2. Download Frozen PB +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_fp32_pretrained_model.pb +``` + +### 3. Prepare Dataset +Download training dataset: (8 million samples) +```bash +$ wget https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/train.csv +``` +Download evaluation dataset (2 million samples) +```bash +$ wget https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/eval.csv +``` + +### 4. Process Dataset +Process calib dataset +```bash +python preprocess_csv_tfrecords.py \ + --inputcsv-datafile train.csv \ + --calibrationcsv-datafile eval.csv \ + --outputfile-name processed_data +``` +Process eval dataset +```bash +python preprocess_csv_tfrecords.py \ + --inputcsv-datafile eval.csv \ + --calibrationcsv-datafile train.csv \ + --outputfile-name processed_data +``` +Two .tfrecords files are generated and will be used later on: +1) train_processed_data.tfrecords +2) eval_processed_data.tfrecords + + +# Run Command + +## Quantization + ```shell + bash run_quant.sh --dataset_location=/path/to/datasets --input_model=/path/to/wide_deep_fp32_pretrained_model.pb --output_model=./wnd_int8_opt.pb + ``` + +## Benchmark + ``` + bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=accuracy --batch_size=500 + bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=performance --batch_size=500 + ``` + +# Other +This example takes the reference from https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds. +The pretrained model was trained with preprocessed data from dataset Criteo. diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py new file mode 100644 index 00000000000..a89efd25537 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py @@ -0,0 +1,347 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import os +import numpy as np +import argparse +import collections +import time +import math +import json +import datetime + +import tensorflow as tf + +from tensorflow.python.framework import ops +from tensorflow.core.framework import graph_pb2 +from google.protobuf import text_format +from argparse import ArgumentParser +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.compat.v1 import graph_util + + +def load_graph(model_file): + """This is a function to load TF graph from pb file + + Args: + model_file (string): TF pb file local path + + Returns: + graph: TF graph object + """ + graph = tf.Graph() + #graph_def = tf.compat.v1.GraphDef() + graph_def = graph_pb2.GraphDef() + + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + + +numeric_feature_names = ["numeric_1"] +string_feature_names = ["string_1"] + +def get_feature_name(compute_accuracy): + + if compute_accuracy: + full_features_names = numeric_feature_names + string_feature_names + ["label"] + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature([], tf.int64, default_value=0, allow_missing=True)] + else: + full_features_names = numeric_feature_names + string_feature_names + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)] + return full_features_names, feature_datatypes + +def input_fn(data_file, num_epochs, shuffle, batch_size, compute_accuracy=True): + """Generate an input function for the Estimator.""" + full_features_names, feature_datatypes = get_feature_name(compute_accuracy) + def _parse_function(proto): + f = collections.OrderedDict( + zip(full_features_names, feature_datatypes)) + parsed_features = tf.io.parse_example(proto, f) + parsed_feature_vals_num = [tf.reshape( + parsed_features["numeric_1"], shape=[-1, 13])] + parsed_feature_vals_str = [tf.reshape( + parsed_features["string_1"], shape=[-1, 2]) for i in string_feature_names] + parsed_feature_vals = parsed_feature_vals_num + parsed_feature_vals_str + if compute_accuracy: + parsed_feature_vals_label = [tf.reshape(parsed_features[i], shape=[-1]) for i in ["label"]] + parsed_feature_vals = parsed_feature_vals + parsed_feature_vals_label + return parsed_feature_vals + + # Extract lines from input files using the Dataset API. + dataset = tf.data.TFRecordDataset([data_file]) + if shuffle: + dataset = dataset.shuffle(buffer_size=20000) + dataset = dataset.batch(batch_size) + dataset = dataset.map(_parse_function, num_parallel_calls=28) + dataset = dataset.prefetch(batch_size*10) + return dataset + +def evaluation_func(model, measurer=None): + return evaluate_opt_graph.eval_inference(model) + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-i', '--input_graph', type=str, + help='Specify the input of the model', + dest='input_graph', + required=True) + arg_parser.add_argument('-o', '--output_graph', type=str, + help='Specify the output of the model', + dest='output_graph') + arg_parser.add_argument('--calibration_data_location', type=str, + help='full path of calibration data file', + dest='calib_data') + arg_parser.add_argument('--evaluation_data_location', type=str, + help='full path of validation data file', + dest='eval_data', + required=True) + arg_parser.add_argument('--batch_size', type=int, + help='batch size for inference.Default is 512', + default=512, + dest='batch_size') + arg_parser.add_argument('--num_intra_threads', type=int, + help='number of threads for an operator', + required=False, + default=0, + dest='num_intra_threads') + arg_parser.add_argument('--num_inter_threads', type=int, + help='number of threads across operators', + required=False, + default=0, + dest='num_inter_threads') + arg_parser.add_argument('--kmp_blocktime', type=str, + help='KMP_BLOCKTIME value', + required=False, + default=None, + dest='kmp_blocktime') + arg_parser.add_argument('-r', "--accuracy", + help='For accuracy measurement only.', + dest='accuracy', action='store_true') + arg_parser.add_argument("--config", default=None, + help="tuning config") + arg_parser.add_argument('--performance', + dest='performance', + action='store_true', + help='run performance') + arg_parser.add_argument('--tune', + dest='tune', + action='store_true', + help='use neural_compressor to tune.') + arg_parser.add_argument("--warmup-steps", + type=int, default=50, + help="number of warmup steps") + arg_parser.add_argument("--steps", + type=int, default=2000, + help="number of iterations") + + arg_parser.add_argument('--env', + dest='env', + help='specific Tensorflow env', + default='mkl') + + + self.args = arg_parser.parse_args() + + def auto_tune(self): + """This is neural_compressor tuning part to generate a quantized pb + Returns: + graph: it will return a quantized pb + """ + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + set_random_seed(9527) + infer_graph = load_graph(self.args.input_graph) + model = Model(infer_graph) + model.input_tensor_names = ["new_numeric_placeholder", "new_categorical_placeholder"] + model.output_tensor_names = ["import/head/predictions/probabilities"] + + if self.args.calib_data: + quant_config = StaticQuantConfig() + calib_dataloader=Dataloader(self.args.calib_data, self.args.batch_size) + q_model = quantize_model(model, quant_config, calib_dataloader) + return q_model + print("Please provide calibration dataset!") + + def eval_inference(self, infer_graph): + print("Run inference") + if isinstance(infer_graph, tf.compat.v1.GraphDef): + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(infer_graph, name='') + infer_graph = graph + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.num_intra_threads + data_config.inter_op_parallelism_threads = self.args.num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + if self.args.env == 'mkl': + print("Set inter and intra for mkl: ") + print("intra_op_parallelism_threads = ", self.args.num_intra_threads) + print("inter_op_parallelism_threads = ", self.args.num_inter_threads) + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + total_test_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(self.args.eval_data)) + total_batches = math.ceil(float(total_test_samples)/self.args.batch_size) + placeholder_list = ['new_numeric_placeholder','new_categorical_placeholder'] + input_tensor = [infer_graph.get_tensor_by_name(name + ":0") for name in placeholder_list] + output_name = "import/head/predictions/probabilities" + output_tensor = infer_graph.get_tensor_by_name(output_name + ":0" ) + correctly_predicted = 0 + evaluate_duration = 0.0 + + features_list = [] + data_graph = tf.Graph() + with data_graph.as_default(): + res_dataset = input_fn(self.args.eval_data, 1, False, self.args.batch_size) + iterator = tf.compat.v1.data.make_one_shot_iterator(res_dataset) + next_element = iterator.get_next() + with tf.compat.v1.Session(config=data_config, graph=data_graph) as data_sess: + for i in range(int(total_batches)): + batch = data_sess.run(next_element) + features=batch[0:3] + features_list.append(features) + + if self.args.performance: + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + + if total_run > total_batches: + total_run = total_batches + + with tf.compat.v1.Session(config=infer_config, graph=infer_graph) as infer_sess: + i = 0 + for i in range(int(total_run)): + start_time = time.time() + logistic = infer_sess.run(output_tensor, dict(zip(input_tensor, features_list[iteration][0:2]))) + time_consume = time.time() - start_time + + if iteration > warm_up_iteration: + evaluate_duration += time_consume + + iteration += 1 + if iteration > total_batches: + iteration = 0 + test_batches = total_run - warm_up_iteration + else: + with tf.compat.v1.Session(config=infer_config, graph=infer_graph) as infer_sess: + i = 0 + for i in range(int(total_batches)): + start_time = time.time() + logistic = infer_sess.run(output_tensor, dict(zip(input_tensor, features_list[i][0:2]))) + time_consume = time.time() - start_time + evaluate_duration += time_consume + + predicted_labels = np.argmax(logistic,1) + correctly_predicted=correctly_predicted+np.sum(features_list[i][2] == predicted_labels) + + i=i+1 + + accuracy = float(correctly_predicted) / float(total_test_samples) + test_batches = total_batches + + no_of_test_samples = test_batches * self.args.batch_size + latency = 1000 * float(evaluate_duration) / float(test_batches) + throughput = no_of_test_samples / evaluate_duration + + print('--------------------------------------------------') + print('Total test records: %d' % no_of_test_samples) + print('Number of batches: %d' % test_batches) + print('Batch size = %d' % self.args.batch_size) + print('Latency: %.3f ms' % latency) + print('Throughput: %.3f records/sec' % throughput) + print('--------------------------------------------------') + + return accuracy + + def run(self): + """ This is neural_compressor function include tuning and benchmark option """ + + if self.args.tune: + q_model = evaluate_opt_graph.auto_tune() + q_model.save(self.args.output_graph) + else: + if self.args.accuracy: + infer_graph = load_graph(self.args.input_graph) + acc = evaluation_func(infer_graph) + print("Accuracy: %.5f" % acc) + if self.args.performance: + infer_graph = load_graph(self.args.input_graph) + evaluation_func(infer_graph) + + +class Dataloader(object): + def __init__(self, data_location, batch_size): + """dataloader generator + + Args: + data_location (str): tf recorder local path + batch_size (int): dataloader batch size + """ + self.batch_size = batch_size + self.data_file = data_location + self.total_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(data_location)) + self.n = math.ceil(float(self.total_samples) / batch_size) + print("batch size is " + str(self.batch_size) + "," + str(self.n) + " iteration") + + def __iter__(self): + data_graph = tf.Graph() + with data_graph.as_default(): + self.dataset = input_fn(self.data_file, 1, False, self.batch_size) + self.dataset_iterator = tf.compat.v1.data.make_one_shot_iterator(self.dataset) + next_element = self.dataset_iterator.get_next() + + with tf.compat.v1.Session(graph=data_graph) as sess: + for i in range(self.n): + batch = sess.run(next_element) + yield (batch[0:2], batch[2]) + + def __len__(self): + return self.n + + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py new file mode 100644 index 00000000000..e1a82cd674c --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py @@ -0,0 +1,155 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +import pandas +import argparse +import numpy as np +import tensorflow as tf +if tf.version.VERSION < '2.0': + tf.enable_eager_execution() +parser = argparse.ArgumentParser() +parser.add_argument('--inputcsv-datafile', type=str, + help='full path of data file e.g. eval.csv', + dest='evaldatafile_path', + required=True) +parser.add_argument('--calibrationcsv-datafile', type=str, + help='full path of data file of calibration/train dataset to get normalization ranges', + dest='traindatafile_path', + default='NULL', + required=False) + +parser.add_argument('--outputfile-name', type=str, + help='output tfrecord file name e.g. processed_eval.[tfrecords]', + dest='outputfile_path', + default="processed_data.tfrecords", + required=False) + +args = parser.parse_args() + +eval_csv_file = args.evaldatafile_path +train_csv_file = args.traindatafile_path +output_file = args.outputfile_path + +if not os.path.isfile(eval_csv_file): + print("Please input a valid csv file") + sys.exit(1) + +filename, file_ext = os.path.splitext(output_file) +in_filename, _ = os.path.splitext(os.path.basename(eval_csv_file)) + +if file_ext != ".tfrecords": + output_file = output_file + ".tfrecords" + +output_file = "{}_{}".format(in_filename,output_file) +csv = pandas.read_csv(eval_csv_file, header=None) +if len(csv.columns)==39: + dataset_type = 'test' +else: + dataset_type = 'eval' +fill_na_dict = {} +if dataset_type=='test': + for i in range(0,13): + fill_na_dict[i]=0.0 + for i in range(13,39): + fill_na_dict[i]="" +else: + for i in range(1,14): + fill_na_dict[i]=0.0 + for i in range(14,40): + fill_na_dict[i]="" +csv=csv.fillna(value=fill_na_dict).values +numeric_feature_names = ["numeric_1"] +string_feature_names = ["string_1"] +LABEL_COLUMN =["clicked"] +CATEGORICAL_COLUMNS1 = ["C"+str(i)+"_embedding" for i in range(1, 27)] +NUMERIC_COLUMNS1 = ["I"+str(i) for i in range(1, 14)] +if dataset_type=='eval': + DATA_COLUMNS = LABEL_COLUMN + NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1 +else: + DATA_COLUMNS = NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1 +CATEGORICAL_COLUMNS2 = ["C"+str(i)+"_embedding" for i in range(1, 27)] +NUMERIC_COLUMNS2 = ["I"+str(i) for i in range(1, 14)] + +CATEGORICAL_COLUMNS1.sort() +NUMERIC_COLUMNS1.sort() +no_of_rows = 0 +with open(eval_csv_file, 'r') as f: + if not os.path.isfile(train_csv_file): + nums=[line.strip('\n\r').split(',') for line in f.readlines()] + else: + f1 = open(train_csv_file, 'r') + nums=[line.strip('\n\r').split(',') for line in f.readlines( + )]+[line.strip('\n\t').split(',') for line in f1.readlines()] + numpy_arr = np.array(nums) + numpy_arr[numpy_arr=='']='0' + min_list,max_list,range_list = [],[],[] + for i in range(len(DATA_COLUMNS)): + if DATA_COLUMNS[i] in NUMERIC_COLUMNS1: + col_min = numpy_arr[:,i].astype(np.float32).min() + col_max = numpy_arr[:,i].astype(np.float32).max() + min_list.append(col_min) + max_list.append(col_max) + range_list.append(col_max-col_min) + if os.path.isfile(train_csv_file): + f1.close() + print('min list',min_list) + print('max list',max_list) + print('range list',range_list) + + +with tf.compat.v1.python_io.TFRecordWriter(output_file) as writer: + print('*****Processing data******') + for row in csv: + no_of_rows = no_of_rows+1 + if dataset_type == 'eval': + unnormalized_vals = np.array(row[1:14]) + else: + unnormalized_vals = np.array(row[0:13]) + normalized_vals = (unnormalized_vals-min_list)/range_list + if dataset_type == 'eval': + new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[14:40])) + else: + new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[13:39])) + new_categorical_list = [] + for i in CATEGORICAL_COLUMNS1: + if pandas.isnull(new_categorical_dict[i]): + new_categorical_list.append("") + else: + new_categorical_list.append(new_categorical_dict[i]) + hash_values = tf.compat.v1.string_to_hash_bucket_fast( + new_categorical_list, 1000).numpy() + new_numerical_dict = dict(zip(NUMERIC_COLUMNS2, normalized_vals)) + example = tf.train.Example() + for i in NUMERIC_COLUMNS1: + example.features.feature[numeric_feature_names[0]].float_list.value.extend([new_numerical_dict[i]]) + for i in range(0, 26): + example.features.feature[string_feature_names[0]].int64_list.value.extend([i]) + example.features.feature[string_feature_names[0]].int64_list.value.extend([hash_values[i]]) + if dataset_type == 'eval': + example.features.feature["label"].int64_list.value.append(row[0]) + writer.write(example.SerializeToString()) + +print('Total number of rows ', no_of_rows) +print('Generated output file name :'+output_file) diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..e2f0ef81736 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt @@ -0,0 +1,9 @@ +intel-tensorflow>=2.12.0 +Cython +contextlib2 +pillow>=8.2.0 +lxml>=4.6.2 +matplotlib +numpy>=1.17.4 +pycocotools +protobuf diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..72ab01f2a19 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + define_mode + run_benchmark + +} + +# init params +function init_params { + iters=1000 + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +function define_mode { + if [[ ${mode} == "accuracy" ]]; then + mode_cmd=" --accuracy" + elif [[ ${mode} == "performance" ]]; then + mode_cmd=" --performance" + else + echo "Error: No such mode: ${mode}" + exit 1 + fi +} + +# run_tuning +function run_benchmark { + #numactl -N 0 -m 0 \ + python main.py \ + --input_graph ${input_model} \ + --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \ + --batch_size ${batch_size} \ + --num_inter_threads 4 \ + ${mode_cmd} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..a8068917a27 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + + +# run_tuning +function run_tuning { + python main.py \ + --input_graph ${input_model} \ + --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \ + --calibration_data_location ${dataset_location}/train_processed_data.tfrecords \ + --accuracy \ + --batch_size 1000 \ + --output_graph ${output_model} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md new file mode 100644 index 00000000000..4307ec85480 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md @@ -0,0 +1,76 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of 3dunet-mlperf. +This example can run on Intel CPUs and GPUs. + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install requirements +```shell +pip install -r requirements.txt +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Pre-trained model + Download the pre-trained model from the + [3DUnetCNN](https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/3dunet_dynamic_ndhwc.pb). + In this example, we are using the model, + trained using the fold 1 BRATS 2019 data. + The validation files have been copied from [here](https://github.com/mlcommons/inference/tree/r0.7/vision/medical_imaging/3d-unet/folds) + +## 3. Prepare dataset + +### Download BraTS 2019 dataset + Please download [Brats 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) + separately and unzip the dataset. The directory that contains the dataset files will be + passed to the launch script when running the benchmarking script. + +### Prepare Calibration set + The calibration set is the forty images listed in brats_cal_images_list.txt. They are randomly selected from Fold 0, Fold 2, Fold 3, and Fold 4 of BraTS 2019 Training Dataset. + + +# Run command +Please set the following environment variables before running quantization or benchmark commands: + +* `export nnUNet_preprocessed=/build/preprocessed_data` +* `export nnUNet_raw_data_base=/build/raw_data` +* `export RESULTS_FOLDER=/build/result` + +## Quantization + +`bash run_quant.sh --input_model=3dunet_dynamic_ndhwc.pb --dataset_location=/build --output_model=3dunet_dynamic_ndhwc_int8.pb` + +## Benchmark + +`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=100 --iters=500 --mode=benchmark` + +`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=1 --mode=accuracy` diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py new file mode 100644 index 00000000000..0a18c579d8b --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt new file mode 100644 index 00000000000..69276e67b6a --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt @@ -0,0 +1,40 @@ +HGG__BraTS19_2013_18_1 +HGG__BraTS19_2013_20_1 +HGG__BraTS19_CBICA_AAP_1 +HGG__BraTS19_CBICA_ABN_1 +HGG__BraTS19_CBICA_ABO_1 +HGG__BraTS19_CBICA_ALU_1 +HGG__BraTS19_CBICA_ANZ_1 +HGG__BraTS19_CBICA_APY_1 +HGG__BraTS19_CBICA_AQJ_1 +HGG__BraTS19_CBICA_AQZ_1 +HGG__BraTS19_CBICA_ASN_1 +HGG__BraTS19_CBICA_ASY_1 +HGG__BraTS19_CBICA_AUW_1 +HGG__BraTS19_CBICA_AXJ_1 +HGG__BraTS19_CBICA_AXM_1 +HGG__BraTS19_CBICA_AYG_1 +HGG__BraTS19_CBICA_AYU_1 +HGG__BraTS19_CBICA_AZD_1 +HGG__BraTS19_CBICA_BAX_1 +HGG__BraTS19_CBICA_BGR_1 +HGG__BraTS19_CBICA_BHV_1 +HGG__BraTS19_TCIA01_235_1 +HGG__BraTS19_TCIA02_394_1 +HGG__BraTS19_TCIA02_473_1 +HGG__BraTS19_TCIA02_606_1 +HGG__BraTS19_TCIA03_419_1 +HGG__BraTS19_TCIA04_192_1 +HGG__BraTS19_TCIA04_479_1 +HGG__BraTS19_TCIA06_372_1 +HGG__BraTS19_TCIA08_278_1 +LGG__BraTS19_2013_28_1 +LGG__BraTS19_TCIA09_462_1 +LGG__BraTS19_TCIA10_130_1 +LGG__BraTS19_TCIA10_202_1 +LGG__BraTS19_TCIA10_346_1 +LGG__BraTS19_TCIA10_387_1 +LGG__BraTS19_TCIA10_628_1 +LGG__BraTS19_TCIA12_470_1 +LGG__BraTS19_TCIA13_621_1 +LGG__BraTS19_TCIA13_653_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py new file mode 100644 index 00000000000..bc8ce8edc07 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py @@ -0,0 +1,219 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +from argparse import ArgumentParser +import os +import pickle +import sys +import math +import array + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import dtypes +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from nnunet.evaluation.region_based_evaluation import evaluate_regions, get_brats_regions + +from nnUNet.setup import setup +from nnUNet.postprocess import postprocess_output + +INPUTS = 'input' +OUTPUTS = 'Identity' + +if __name__ == "__main__": + """Evaluate 3d_unet with optimized TensorFlow graph""" + def get_args(): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-m', "--mode", + help="One of three options: 'benchmark'/'accuracy'/'tune'.") + arg_parser.add_argument('-n', "--iters", + help='The number of iteration. shall > warmup num(10)', + type=int, default=100) + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + arg_parser.add_argument('-i', "--input-model", + help='Specify the input graph.', + dest='input_model') + arg_parser.add_argument('-o', "--output-model", + help='Specify the output graph.', + dest='output_model') + arg_parser.add_argument('-c', "--calib-preprocess", + help='Specify calibration preprocess dir.', + dest='calib_preprocess') + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data.', + dest="data_location", default=None) + arg_parser.add_argument("--batch-size", dest="batch_size", type=int, default=1) + arg_parser.add_argument("--bfloat16", type=int, default=0) + + args = arg_parser.parse_args() + print(args) + return args + + def eval_func(graph): + print("Run inference for accuracy") + args = get_args() + #setup(args.data_location, args.input_model) + + output_graph = optimize_for_inference(graph.as_graph_def(), [INPUTS], [OUTPUTS], + dtypes.float32.as_datatype_enum, False) + tf.import_graph_def(output_graph, name="") + + input_tensor = graph.get_tensor_by_name('input:0') + output_tensor = graph.get_tensor_by_name('Identity:0') + + config = tf.compat.v1.ConfigProto() + config.intra_op_parallelism_threads=args.num_intra_threads + config.inter_op_parallelism_threads=args.num_inter_threads + if args.bfloat16: + config.graph_options.rewrite_options.auto_mixed_precision_mkl = rewriter_config_pb2.RewriterConfig.ON + + sess = tf.compat.v1.Session(graph=graph, config=config) + if args.mode: + print("Inference with real data") + preprocessed_data_dir = os.path.join(args.data_location, "preprocessed_data") + with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "rb") as f: + preprocessed_files = pickle.load(f) + + dictionaries = [] + for preprocessed_file in preprocessed_files: + with open(os.path.join(preprocessed_data_dir, preprocessed_file + ".pkl"), "rb") as f: + dct = pickle.load(f)[1] + dictionaries.append(dct) + + count = len(preprocessed_files) + predictions = [None] * count + validation_indices = list(range(0,count)) + print("Found {:d} preprocessed files".format(count)) + loaded_files = {} + batch_size = args.batch_size + + # Get the number of steps based on batch size + steps = count#math.ceil(count/batch_size) + warmup = 10 + assert args.iters >= warmup, 'iteration must be larger than warmup' + time_list=[] + for i in range(steps): + print("Iteration {} ...".format(i)) + test_data_index = validation_indices[i]#validation_indices[i * batch_size:(i + 1) * batch_size] + file_name = preprocessed_files[test_data_index] + with open(os.path.join(preprocessed_data_dir, "{:}.pkl".format(file_name)), "rb") as f: + data = pickle.load(f)[0] + if args.mode == 'performance' and i < args.iters: + time_start = time.time() + predictions[i] = sess.run(output_tensor, feed_dict={input_tensor: data[np.newaxis, ...]})[0].astype(np.float32) + duration = time.time() - time_start + time_list.append(duration) + else: + predictions[i] = sess.run(output_tensor, feed_dict={input_tensor: data[np.newaxis, ...]})[0].astype(np.float32) + if args.mode == 'performance': + latency = np.array(time_list[warmup: ]).mean() / args.batch_size + print('Batch size = {}'.format(args.batch_size)) + print('Latency: {:.3f} ms'.format(latency * 1000)) + print('Throughput: {:.3f} items/sec'.format(1./ latency)) + else: + output_folder = os.path.join(args.data_location, "postprocessed_data") + output_files = preprocessed_files + # Post Process + postprocess_output(predictions, dictionaries, validation_indices, output_folder, output_files) + + ground_truths = os.path.join(args.data_location, \ + "raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr") + # Run evaluation + print("Running evaluation...") + evaluate_regions(output_folder, ground_truths, get_brats_regions()) + # Load evaluation summary + print("Loading evaluation summary...") + accuracy=0.0 + with open(os.path.join(output_folder, "summary.csv")) as f: + for line in f: + words = line.split(",") + if words[0] == "mean": + whole = float(words[1]) + core = float(words[2]) + enhancing = float(words[3]) + mean = (whole + core + enhancing) / 3 + accuracy=mean + print("Batch size =", args.batch_size) + print("Accuracy is {:.5f}".format(mean)) + break + print("Done!") + return accuracy + + def load_graph(file_name): + tf.compat.v1.logging.info('Loading graph from: ' + file_name) + with tf.io.gfile.GFile(file_name, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name='') + return graph + + class CalibrationDL(): + def __init__(self): + path = os.path.abspath(os.path.expanduser( + './brats_cal_images_list.txt')) + with open(path, 'r') as f: + self.preprocess_files = [line.rstrip() for line in f] + + self.loaded_files = {} + self.batch_size = 1 + + def __getitem__(self, sample_id): + file_name = self.preprocess_files[sample_id] + print("Loading file {:}".format(file_name)) + with open(os.path.join(args.calib_preprocess, "{:}.pkl".format(file_name)), "rb") as f: + self.loaded_files[sample_id] = pickle.load(f)[0] + # note that calibration phase does not care label, here we return 0 for label free case. + return self.loaded_files[sample_id], 0 + + def __len__(self): + self.count = len(self.preprocess_files) + return self.count + + + args = get_args() + print(args) + graph = load_graph(args.input_model) + if args.mode == 'tune': + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow.utils import BaseDataLoader + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + quant_config = StaticQuantConfig() + calib_dataloader=BaseDataLoader(dataset=CalibrationDL()) + q_model = quantize_model(graph, quant_config, calib_dataloader) + try: + q_model.save(args.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) + else: + eval_func(graph) diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py new file mode 100644 index 00000000000..d26521276d6 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py @@ -0,0 +1,125 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is copied from nnUnet/nnunet/dataset_conversion/Task043_BraTS_2019.py, except that +# the validation/test set part is removed and downloaded_data_dir is now configurable. + +import argparse +import numpy as np +from collections import OrderedDict +import os +import sys + +from batchgenerators.utilities.file_and_folder_operations import * +from nnunet.paths import nnUNet_raw_data +import SimpleITK as sitk +import shutil + +def copy_BraTS_segmentation_and_convert_labels(in_file, out_file): + # use this for segmentation only!!! + # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3 + img = sitk.ReadImage(in_file) + img_npy = sitk.GetArrayFromImage(img) + + uniques = np.unique(img_npy) + for u in uniques: + if u not in [0, 1, 2, 4]: + raise RuntimeError('unexpected label') + + seg_new = np.zeros_like(img_npy) + seg_new[img_npy == 4] = 3 + seg_new[img_npy == 2] = 1 + seg_new[img_npy == 1] = 2 + img_corr = sitk.GetImageFromArray(seg_new) + img_corr.CopyInformation(img) + sitk.WriteImage(img_corr, out_file) + +def task_setup(downloaded_data_dir): + """ + REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION! + """ + + task_name = "Task043_BraTS2019" + print(task_name) + print(downloaded_data_dir) + print(nnUNet_raw_data) + + target_base = join(nnUNet_raw_data, task_name) + if not os.path.isdir(target_base): + target_imagesTr = join(target_base, "imagesTr") + target_imagesVal = join(target_base, "imagesVal") + target_imagesTs = join(target_base, "imagesTs") + target_labelsTr = join(target_base, "labelsTr") + + maybe_mkdir_p(target_imagesTr) + maybe_mkdir_p(target_imagesVal) + maybe_mkdir_p(target_imagesTs) + maybe_mkdir_p(target_labelsTr) + + patient_names = [] + for tpe in ["HGG", "LGG"]: + cur = join(downloaded_data_dir, tpe) + for p in subdirs(cur, join=False): + patdir = join(cur, p) + patient_name = tpe + "__" + p + patient_names.append(patient_name) + t1 = join(patdir, p + "_t1.nii.gz") + t1c = join(patdir, p + "_t1ce.nii.gz") + t2 = join(patdir, p + "_t2.nii.gz") + flair = join(patdir, p + "_flair.nii.gz") + seg = join(patdir, p + "_seg.nii.gz") + + assert all([ + isfile(t1), + isfile(t1c), + isfile(t2), + isfile(flair), + isfile(seg) + ]), "%s" % patient_name + + shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz")) + shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz")) + shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz")) + shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz")) + + copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz")) + + json_dict = OrderedDict() + json_dict['name'] = "BraTS2019" + json_dict['description'] = "nothing" + json_dict['tensorImageSize'] = "4D" + json_dict['reference'] = "see BraTS2019" + json_dict['licence'] = "see BraTS2019 license" + json_dict['release'] = "0.0" + json_dict['modality'] = { + "0": "T1", + "1": "T1ce", + "2": "T2", + "3": "FLAIR" + } + json_dict['labels'] = { + "0": "background", + "1": "edema", + "2": "non-enhancing", + "3": "enhancing", + } + json_dict['numTraining'] = len(patient_names) + json_dict['numTest'] = 0 + json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in + patient_names] + json_dict['test'] = [] + + save_json(json_dict, join(target_base, "dataset.json")) + print("DONE") diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py new file mode 100644 index 00000000000..0a18c579d8b --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt new file mode 100644 index 00000000000..57eeeb651c5 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_22_1 +HGG__BraTS19_2013_23_1 +HGG__BraTS19_2013_3_1 +HGG__BraTS19_2013_5_1 +HGG__BraTS19_2013_7_1 +HGG__BraTS19_CBICA_AAB_1 +HGG__BraTS19_CBICA_AAL_1 +HGG__BraTS19_CBICA_ABN_1 +HGG__BraTS19_CBICA_ALU_1 +HGG__BraTS19_CBICA_AME_1 +HGG__BraTS19_CBICA_ANG_1 +HGG__BraTS19_CBICA_AOC_1 +HGG__BraTS19_CBICA_AOD_1 +HGG__BraTS19_CBICA_APZ_1 +HGG__BraTS19_CBICA_AQD_1 +HGG__BraTS19_CBICA_AQJ_1 +HGG__BraTS19_CBICA_AQN_1 +HGG__BraTS19_CBICA_ASA_1 +HGG__BraTS19_CBICA_ASK_1 +HGG__BraTS19_CBICA_ASO_1 +HGG__BraTS19_CBICA_AWH_1 +HGG__BraTS19_CBICA_AWV_1 +HGG__BraTS19_CBICA_AYA_1 +HGG__BraTS19_CBICA_AYC_1 +HGG__BraTS19_CBICA_AYI_1 +HGG__BraTS19_CBICA_BFB_1 +HGG__BraTS19_CBICA_BGN_1 +HGG__BraTS19_CBICA_BGR_1 +HGG__BraTS19_CBICA_BJY_1 +HGG__BraTS19_TCIA01_231_1 +HGG__BraTS19_TCIA01_378_1 +HGG__BraTS19_TCIA01_390_1 +HGG__BraTS19_TCIA01_412_1 +HGG__BraTS19_TCIA02_135_1 +HGG__BraTS19_TCIA02_179_1 +HGG__BraTS19_TCIA02_208_1 +HGG__BraTS19_TCIA02_274_1 +HGG__BraTS19_TCIA02_314_1 +HGG__BraTS19_TCIA02_430_1 +HGG__BraTS19_TCIA02_608_1 +HGG__BraTS19_TCIA03_121_1 +HGG__BraTS19_TCIA03_138_1 +HGG__BraTS19_TCIA03_375_1 +HGG__BraTS19_TCIA03_498_1 +HGG__BraTS19_TCIA06_184_1 +HGG__BraTS19_TCIA06_372_1 +HGG__BraTS19_TCIA08_113_1 +HGG__BraTS19_TCIA08_162_1 +HGG__BraTS19_TCIA08_218_1 +HGG__BraTS19_TCIA08_469_1 +LGG__BraTS19_2013_6_1 +LGG__BraTS19_TCIA09_141_1 +LGG__BraTS19_TCIA09_255_1 +LGG__BraTS19_TCIA09_402_1 +LGG__BraTS19_TCIA09_451_1 +LGG__BraTS19_TCIA09_462_1 +LGG__BraTS19_TCIA09_620_1 +LGG__BraTS19_TCIA10_266_1 +LGG__BraTS19_TCIA10_413_1 +LGG__BraTS19_TCIA10_628_1 +LGG__BraTS19_TCIA10_629_1 +LGG__BraTS19_TCIA10_640_1 +LGG__BraTS19_TCIA12_298_1 +LGG__BraTS19_TCIA12_470_1 +LGG__BraTS19_TCIA13_621_1 +LGG__BraTS19_TCIA13_624_1 +LGG__BraTS19_TCIA13_654_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt new file mode 100644 index 00000000000..d24f39b67c4 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_13_1 +HGG__BraTS19_2013_19_1 +HGG__BraTS19_2013_27_1 +HGG__BraTS19_CBICA_AAG_1 +HGG__BraTS19_CBICA_ALN_1 +HGG__BraTS19_CBICA_ANV_1 +HGG__BraTS19_CBICA_AOH_1 +HGG__BraTS19_CBICA_APK_1 +HGG__BraTS19_CBICA_APR_1 +HGG__BraTS19_CBICA_AQG_1 +HGG__BraTS19_CBICA_AQP_1 +HGG__BraTS19_CBICA_ARZ_1 +HGG__BraTS19_CBICA_ASF_1 +HGG__BraTS19_CBICA_ASG_1 +HGG__BraTS19_CBICA_ATP_1 +HGG__BraTS19_CBICA_ATX_1 +HGG__BraTS19_CBICA_AUA_1 +HGG__BraTS19_CBICA_AVJ_1 +HGG__BraTS19_CBICA_AVV_1 +HGG__BraTS19_CBICA_AWG_1 +HGG__BraTS19_CBICA_AXL_1 +HGG__BraTS19_CBICA_AXQ_1 +HGG__BraTS19_CBICA_BAN_1 +HGG__BraTS19_CBICA_BBG_1 +HGG__BraTS19_CBICA_BGE_1 +HGG__BraTS19_CBICA_BHQ_1 +HGG__BraTS19_CBICA_BIC_1 +HGG__BraTS19_CBICA_BNR_1 +HGG__BraTS19_TCIA01_131_1 +HGG__BraTS19_TCIA01_147_1 +HGG__BraTS19_TCIA01_180_1 +HGG__BraTS19_TCIA01_190_1 +HGG__BraTS19_TCIA01_221_1 +HGG__BraTS19_TCIA01_335_1 +HGG__BraTS19_TCIA01_411_1 +HGG__BraTS19_TCIA02_151_1 +HGG__BraTS19_TCIA02_321_1 +HGG__BraTS19_TCIA02_331_1 +HGG__BraTS19_TCIA02_368_1 +HGG__BraTS19_TCIA02_471_1 +HGG__BraTS19_TCIA03_257_1 +HGG__BraTS19_TCIA03_474_1 +HGG__BraTS19_TCIA04_111_1 +HGG__BraTS19_TCIA04_328_1 +HGG__BraTS19_TCIA04_343_1 +HGG__BraTS19_TCIA05_277_1 +HGG__BraTS19_TCIA05_478_1 +HGG__BraTS19_TCIA06_165_1 +HGG__BraTS19_TCIA08_105_1 +HGG__BraTS19_TCIA08_280_1 +HGG__BraTS19_TMC_15477_1 +HGG__BraTS19_TMC_21360_1 +HGG__BraTS19_TMC_30014_1 +LGG__BraTS19_TCIA09_428_1 +LGG__BraTS19_TCIA10_175_1 +LGG__BraTS19_TCIA10_276_1 +LGG__BraTS19_TCIA10_393_1 +LGG__BraTS19_TCIA10_408_1 +LGG__BraTS19_TCIA10_410_1 +LGG__BraTS19_TCIA10_449_1 +LGG__BraTS19_TCIA10_490_1 +LGG__BraTS19_TCIA10_625_1 +LGG__BraTS19_TCIA10_637_1 +LGG__BraTS19_TCIA12_249_1 +LGG__BraTS19_TCIA12_466_1 +LGG__BraTS19_TCIA13_615_1 +LGG__BraTS19_TCIA13_630_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt new file mode 100644 index 00000000000..c468e57417d --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_11_1 +HGG__BraTS19_2013_21_1 +HGG__BraTS19_2013_2_1 +HGG__BraTS19_2013_4_1 +HGG__BraTS19_CBICA_ABB_1 +HGG__BraTS19_CBICA_ABE_1 +HGG__BraTS19_CBICA_ABM_1 +HGG__BraTS19_CBICA_ANZ_1 +HGG__BraTS19_CBICA_AOP_1 +HGG__BraTS19_CBICA_APY_1 +HGG__BraTS19_CBICA_AQA_1 +HGG__BraTS19_CBICA_AQO_1 +HGG__BraTS19_CBICA_AQU_1 +HGG__BraTS19_CBICA_ARW_1 +HGG__BraTS19_CBICA_ASV_1 +HGG__BraTS19_CBICA_AUN_1 +HGG__BraTS19_CBICA_AUW_1 +HGG__BraTS19_CBICA_AUX_1 +HGG__BraTS19_CBICA_AVB_1 +HGG__BraTS19_CBICA_AVF_1 +HGG__BraTS19_CBICA_AWX_1 +HGG__BraTS19_CBICA_AXO_1 +HGG__BraTS19_CBICA_AYW_1 +HGG__BraTS19_CBICA_BAX_1 +HGG__BraTS19_CBICA_BEM_1 +HGG__BraTS19_CBICA_BHK_1 +HGG__BraTS19_CBICA_BHM_1 +HGG__BraTS19_CBICA_BLJ_1 +HGG__BraTS19_TCIA01_150_1 +HGG__BraTS19_TCIA01_203_1 +HGG__BraTS19_TCIA01_235_1 +HGG__BraTS19_TCIA01_401_1 +HGG__BraTS19_TCIA01_448_1 +HGG__BraTS19_TCIA01_499_1 +HGG__BraTS19_TCIA02_168_1 +HGG__BraTS19_TCIA02_222_1 +HGG__BraTS19_TCIA02_226_1 +HGG__BraTS19_TCIA02_283_1 +HGG__BraTS19_TCIA02_290_1 +HGG__BraTS19_TCIA02_309_1 +HGG__BraTS19_TCIA02_394_1 +HGG__BraTS19_TCIA02_455_1 +HGG__BraTS19_TCIA02_606_1 +HGG__BraTS19_TCIA03_133_1 +HGG__BraTS19_TCIA04_192_1 +HGG__BraTS19_TCIA04_361_1 +HGG__BraTS19_TCIA06_332_1 +HGG__BraTS19_TCIA08_167_1 +HGG__BraTS19_TCIA08_205_1 +HGG__BraTS19_TCIA08_234_1 +HGG__BraTS19_TCIA08_242_1 +HGG__BraTS19_TCIA08_278_1 +HGG__BraTS19_TCIA08_436_1 +HGG__BraTS19_TMC_12866_1 +LGG__BraTS19_2013_15_1 +LGG__BraTS19_2013_1_1 +LGG__BraTS19_TCIA09_312_1 +LGG__BraTS19_TCIA10_109_1 +LGG__BraTS19_TCIA10_130_1 +LGG__BraTS19_TCIA10_152_1 +LGG__BraTS19_TCIA10_241_1 +LGG__BraTS19_TCIA10_282_1 +LGG__BraTS19_TCIA10_325_1 +LGG__BraTS19_TCIA10_639_1 +LGG__BraTS19_TCIA13_618_1 +LGG__BraTS19_TCIA13_633_1 +LGG__BraTS19_TMC_09043_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt new file mode 100644 index 00000000000..171a51a02a8 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_12_1 +HGG__BraTS19_2013_14_1 +HGG__BraTS19_2013_18_1 +HGG__BraTS19_2013_20_1 +HGG__BraTS19_2013_26_1 +HGG__BraTS19_CBICA_ABO_1 +HGG__BraTS19_CBICA_ALX_1 +HGG__BraTS19_CBICA_ANP_1 +HGG__BraTS19_CBICA_AOS_1 +HGG__BraTS19_CBICA_AOZ_1 +HGG__BraTS19_CBICA_AQT_1 +HGG__BraTS19_CBICA_ARF_1 +HGG__BraTS19_CBICA_ASE_1 +HGG__BraTS19_CBICA_ASW_1 +HGG__BraTS19_CBICA_ATN_1 +HGG__BraTS19_CBICA_ATV_1 +HGG__BraTS19_CBICA_AUQ_1 +HGG__BraTS19_CBICA_AVG_1 +HGG__BraTS19_CBICA_AVT_1 +HGG__BraTS19_CBICA_AWI_1 +HGG__BraTS19_CBICA_AXW_1 +HGG__BraTS19_CBICA_AYG_1 +HGG__BraTS19_CBICA_AYU_1 +HGG__BraTS19_CBICA_BAP_1 +HGG__BraTS19_CBICA_BCL_1 +HGG__BraTS19_CBICA_BDK_1 +HGG__BraTS19_CBICA_BGG_1 +HGG__BraTS19_CBICA_BGT_1 +HGG__BraTS19_CBICA_BGW_1 +HGG__BraTS19_CBICA_BGX_1 +HGG__BraTS19_TCIA01_186_1 +HGG__BraTS19_TCIA01_429_1 +HGG__BraTS19_TCIA01_460_1 +HGG__BraTS19_TCIA02_171_1 +HGG__BraTS19_TCIA02_370_1 +HGG__BraTS19_TCIA02_374_1 +HGG__BraTS19_TCIA02_377_1 +HGG__BraTS19_TCIA02_473_1 +HGG__BraTS19_TCIA02_491_1 +HGG__BraTS19_TCIA02_607_1 +HGG__BraTS19_TCIA03_296_1 +HGG__BraTS19_TCIA03_338_1 +HGG__BraTS19_TCIA03_419_1 +HGG__BraTS19_TCIA04_437_1 +HGG__BraTS19_TCIA04_479_1 +HGG__BraTS19_TCIA06_247_1 +HGG__BraTS19_TCIA06_603_1 +HGG__BraTS19_TMC_11964_1 +LGG__BraTS19_2013_28_1 +LGG__BraTS19_2013_29_1 +LGG__BraTS19_2013_9_1 +LGG__BraTS19_TCIA09_177_1 +LGG__BraTS19_TCIA09_254_1 +LGG__BraTS19_TCIA10_103_1 +LGG__BraTS19_TCIA10_299_1 +LGG__BraTS19_TCIA10_310_1 +LGG__BraTS19_TCIA10_330_1 +LGG__BraTS19_TCIA10_346_1 +LGG__BraTS19_TCIA10_351_1 +LGG__BraTS19_TCIA10_420_1 +LGG__BraTS19_TCIA10_442_1 +LGG__BraTS19_TCIA10_632_1 +LGG__BraTS19_TCIA10_644_1 +LGG__BraTS19_TCIA12_480_1 +LGG__BraTS19_TCIA13_623_1 +LGG__BraTS19_TCIA13_642_1 +LGG__BraTS19_TCIA13_645_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt new file mode 100644 index 00000000000..0fc2a8bc9cc --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_10_1 +HGG__BraTS19_2013_17_1 +HGG__BraTS19_2013_25_1 +HGG__BraTS19_CBICA_AAP_1 +HGG__BraTS19_CBICA_ABY_1 +HGG__BraTS19_CBICA_AMH_1 +HGG__BraTS19_CBICA_ANI_1 +HGG__BraTS19_CBICA_AOO_1 +HGG__BraTS19_CBICA_AQQ_1 +HGG__BraTS19_CBICA_AQR_1 +HGG__BraTS19_CBICA_AQV_1 +HGG__BraTS19_CBICA_AQY_1 +HGG__BraTS19_CBICA_AQZ_1 +HGG__BraTS19_CBICA_ASH_1 +HGG__BraTS19_CBICA_ASN_1 +HGG__BraTS19_CBICA_ASR_1 +HGG__BraTS19_CBICA_ASU_1 +HGG__BraTS19_CBICA_ASY_1 +HGG__BraTS19_CBICA_ATB_1 +HGG__BraTS19_CBICA_ATD_1 +HGG__BraTS19_CBICA_ATF_1 +HGG__BraTS19_CBICA_AUR_1 +HGG__BraTS19_CBICA_AXJ_1 +HGG__BraTS19_CBICA_AXM_1 +HGG__BraTS19_CBICA_AXN_1 +HGG__BraTS19_CBICA_AZD_1 +HGG__BraTS19_CBICA_AZH_1 +HGG__BraTS19_CBICA_BCF_1 +HGG__BraTS19_CBICA_BFP_1 +HGG__BraTS19_CBICA_BGO_1 +HGG__BraTS19_CBICA_BHB_1 +HGG__BraTS19_CBICA_BHV_1 +HGG__BraTS19_CBICA_BHZ_1 +HGG__BraTS19_CBICA_BKV_1 +HGG__BraTS19_TCIA01_201_1 +HGG__BraTS19_TCIA01_425_1 +HGG__BraTS19_TCIA02_117_1 +HGG__BraTS19_TCIA02_118_1 +HGG__BraTS19_TCIA02_198_1 +HGG__BraTS19_TCIA02_300_1 +HGG__BraTS19_TCIA02_322_1 +HGG__BraTS19_TCIA02_605_1 +HGG__BraTS19_TCIA03_199_1 +HGG__BraTS19_TCIA03_265_1 +HGG__BraTS19_TCIA04_149_1 +HGG__BraTS19_TCIA05_396_1 +HGG__BraTS19_TCIA05_444_1 +HGG__BraTS19_TCIA06_211_1 +HGG__BraTS19_TCIA06_409_1 +HGG__BraTS19_TCIA08_319_1 +HGG__BraTS19_TCIA08_406_1 +HGG__BraTS19_TMC_06290_1 +HGG__BraTS19_TMC_06643_1 +HGG__BraTS19_TMC_27374_1 +LGG__BraTS19_2013_0_1 +LGG__BraTS19_2013_16_1 +LGG__BraTS19_2013_24_1 +LGG__BraTS19_2013_8_1 +LGG__BraTS19_TCIA09_493_1 +LGG__BraTS19_TCIA10_202_1 +LGG__BraTS19_TCIA10_261_1 +LGG__BraTS19_TCIA10_307_1 +LGG__BraTS19_TCIA10_387_1 +LGG__BraTS19_TCIA12_101_1 +LGG__BraTS19_TCIA13_634_1 +LGG__BraTS19_TCIA13_650_1 +LGG__BraTS19_TCIA13_653_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py new file mode 100644 index 00000000000..e5590bdb338 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py @@ -0,0 +1,72 @@ +# coding=utf-8 +# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. +# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from multiprocessing import Pool +import os +from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax + +def load_predictions(predictions, dictionaries, validation_indices): + assert len(predictions) == len(dictionaries),"Number of predictions does not match number of samples in validation set!" + padded_shape = [224,224,160] + results = [None for i in range(len(predictions))] + for i in range(len(predictions)): + qsl_idx = validation_indices[i] + prediction = predictions[qsl_idx] + assert qsl_idx >= 0 and qsl_idx < len(predictions), "Invalid qsl_idx!" + raw_shape = list(dictionaries[qsl_idx]["size_after_cropping"]) + # Remove the padded part + pad_before = [(p - r) // 2 for p, r in zip(padded_shape, raw_shape)] + pad_after = [-(p - r - b) for p, r, b in zip(padded_shape, raw_shape, pad_before)] + result_shape = (4,) + tuple(padded_shape) + result = np.reshape(prediction, result_shape).astype(np.float32) + results[qsl_idx] = result[:, pad_before[0]:pad_after[0], pad_before[1]:pad_after[1], pad_before[2]:pad_after[2]] + assert all([i is not None for i in results]), "Missing some results!" + return results + +def postprocess_output(predictions, dictionaries, validation_indices, output_folder, output_files): + processed_predictions = load_predictions(predictions, dictionaries, validation_indices) + print("Running postprocessing with multiple threads...") + force_separate_z=None + interp_order=3 + interp_order_z=0 + num_threads_nifti_save = 12 + all_in_gpu = "None" + print("Saving predictions...") + pool = Pool(num_threads_nifti_save) + results = [] + for i, output_filename in enumerate(output_files): + print(i, "/", len(output_files)) + output_filename = os.path.join(output_folder, output_filename + ".nii.gz") + softmax_mean = processed_predictions[i] + dct = dictionaries[i] + bytes_per_voxel = 4 + if all_in_gpu: + bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) + if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save + print( + "This output is too large for python process-process communication. Saving output temporarily to disk") + np.save(output_filename[:-7] + ".npy", softmax_mean) + softmax_mean = output_filename[:-7] + ".npy" + + results.append(pool.starmap_async(save_segmentation_nifti_from_softmax, + ((softmax_mean, output_filename, dct, interp_order, None, None, None, + None, None, force_separate_z, interp_order_z),) + )) + + _ = [i.get() for i in results] + pool.close() + pool.join() diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py new file mode 100644 index 00000000000..048eb0e91cb --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py @@ -0,0 +1,109 @@ +# coding=utf-8 +# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. +# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file has been copied from +# https://github.com/mlcommons/inference/blob/r0.7/vision/medical_imaging/3d-unet/preprocess.py + +import argparse +import numpy +import os +import pickle +import sys +import torch + +from batchgenerators.augmentations.utils import pad_nd_image +from batchgenerators.utilities.file_and_folder_operations import subfiles +from nnunet.training.model_restore import load_model_and_checkpoint_files +from nnunet.inference.predict import preprocess_multithreaded + +def preprocess_MLPerf(model, checkpoint_name, folds, fp16, list_of_lists, output_filenames, preprocessing_folder, num_threads_preprocessing): + assert len(list_of_lists) == len(output_filenames) + print("loading parameters for folds", folds) + trainer, params = load_model_and_checkpoint_files(model, folds, fp16, checkpoint_name=checkpoint_name) + + print("starting preprocessing generator") + preprocessing = preprocess_multithreaded(trainer, list_of_lists, output_filenames, num_threads_preprocessing, None) + print("Preprocessing images...") + all_output_files = [] + + for preprocessed in preprocessing: + output_filename, (d, dct) = preprocessed + + all_output_files.append(output_filename) + if isinstance(d, str): + data = np.load(d) + os.remove(d) + d = data + + # Pad to the desired full volume + d = pad_nd_image(d, trainer.patch_size, "constant", None, False, None) + + with open(os.path.join(preprocessing_folder, output_filename+ ".pkl"), "wb") as f: + pickle.dump([d, dct], f) + f.close() + + return all_output_files + + +def preprocess_setup(preprocessed_data_dir): + print("Preparing for preprocessing data...") + + # Validation set is fold 1 + fold = 1 + import sys + import os + CURRENT_DIR = os.path.split(os.path.abspath(__file__))[0] + #validation_fold_file = '/workspace/intelai_models/inference/nnUNet/folds/fold1_validation.txt' + validation_fold_file = os.path.join(CURRENT_DIR, 'folds/fold1_validation.txt') + # Make sure the model exists + model_dir = 'build/result/nnUNet/3d_fullres/Task043_BraTS2019/nnUNetTrainerV2__nnUNetPlansv2.mlperf.1' + model_path = os.path.join(model_dir, "plans.pkl") + assert os.path.isfile(model_path), "Cannot find the model file {:}!".format(model_path) + checkpoint_name = "model_final_checkpoint" + + # Other settings + fp16 = False + num_threads_preprocessing = 12 + raw_data_dir = 'build/raw_data/nnUNet_raw_data/Task043_BraTS2019/imagesTr' + + # Open list containing validation images from specific fold (e.g. 1) + validation_files = [] + with open(validation_fold_file) as f: + for line in f: + validation_files.append(line.rstrip()) + + # Create output and preprocessed directory + if not os.path.isdir(preprocessed_data_dir): + os.makedirs(preprocessed_data_dir) + + # Create list of images locations (i.e. 4 images per case => 4 modalities) + all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True) + list_of_lists = [[os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and + len(i) == (len(j) + 12)] for j in validation_files] + + # Preprocess images, returns filenames list + # This runs in multiprocess + print("Actually preprocessing data...") + + preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists, + validation_files, preprocessed_data_dir, num_threads_preprocessing) + + print("Saving metadata of the preprocessed data...") + with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f: + pickle.dump(preprocessed_files, f) + + print("Preprocessed data saved to {:}".format(preprocessed_data_dir)) + print("Done!") diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py new file mode 100644 index 00000000000..bf4d5981497 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py @@ -0,0 +1,81 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import os, shutil +import argparse +import sys +import zipfile +#import sys +#print(sys.path) +#sys.path.append('/home/sys_dltest/lpot/lz/frameworks.ai.models.intel-models/models/image_segmentation/tensorflow/3d_unet_mlperf') +from nnUNet.Task043_BraTS_2019 import task_setup +from nnUNet.preprocess import preprocess_setup + +BUILD_DIR = 'build' +RAW_DATA_DIR = BUILD_DIR + '/raw_data' +PREPROCESSED_DATA_DIR = BUILD_DIR + '/preprocessed_data' +POSTPROCESSED_DATA_DIR = BUILD_DIR + '/postprocessed_data' +MODEL_DIR = BUILD_DIR + '/model' +RESULT_DIR = BUILD_DIR + '/result' +TF_MODEL = '224_224_160.pb' +OTHER_FILES = 'fold_1.zip' + +def create_directories(): + print("Creating directories") + if not os.path.isdir(BUILD_DIR): + os.makedirs(BUILD_DIR) + if not os.path.isdir(RAW_DATA_DIR): + os.makedirs(RAW_DATA_DIR) + if not os.path.isdir(PREPROCESSED_DATA_DIR): + os.makedirs(PREPROCESSED_DATA_DIR) + if not os.path.isdir(POSTPROCESSED_DATA_DIR): + os.makedirs(POSTPROCESSED_DATA_DIR) + if not os.path.isdir(RESULT_DIR): + os.makedirs(RESULT_DIR) + if not os.path.isdir(MODEL_DIR): + os.makedirs(MODEL_DIR) + +def download_model(input_graph): + pwd = os.getcwd() + os.chdir(os.path.join(pwd, MODEL_DIR)) + if input_graph == 'NONE': + print("Downloading TF model from Zenodo") + if not os.path.isfile(TF_MODEL): + os.system('wget -O 224_224_160.pb https://zenodo.org/record/3928991/files/224_224_160.pb?download=1;') + os.chdir(os.path.join(pwd, RESULT_DIR)) + if not os.path.isfile(OTHER_FILES): + os.system('wget -O fold_1.zip https://zenodo.org/record/3904106/files/fold_1.zip?download=1;') + zip_file = "fold_1.zip" + #legacy bitmap issue https://bugzilla.redhat.com/show_bug.cgi?id=1802689 + if (not os.path.isfile(OTHER_FILES)): + os.system('curl -O --output fold_1.zip https://zenodo.org/record/3904106/files/fold_1.zip') + try: + with zipfile.ZipFile(zip_file) as z: + z.extractall() + print("Extracted all") + except: + print("Could not extract fold_1.zip") + os.chdir(pwd) + +def setup(downloaded_data_dir, input_graph='NONE'): + create_directories() + download_model(input_graph) + task_setup(downloaded_data_dir) + preprocess_setup(PREPROCESSED_DATA_DIR) diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..d5069f8038f --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +nnunet +tensorflow \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..36f8d8502f0 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh @@ -0,0 +1,65 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + export BUILD_DIR=${dataset_location} + export nnUNet_preprocessed=${BUILD_DIR}/preprocessed_data + export nnUNet_raw_data_base=${BUILD_DIR}/raw_data + export RESULTS_FOLDER=${BUILD_DIR}/result + run_benchmark + +} + +# init params +function init_params { + iters=100 + batch_size=1 + for var in "$@" + do + case $var in + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + + +# run_benchmark +function run_benchmark { + if [[ ${bfloat16} == "true" ]]; then + extra_cmd="--bfloat16" + else + extra_cmd="" + fi + + python main.py \ + --input-model=${input_model} \ + --data-location=${dataset_location} \ + --calib-preprocess=${BUILD_DIR}/calib_preprocess \ + --batch-size=${batch_size} \ + --mode=${mode} \ + --iters=${iters} \ + ${extra_cmd} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..79256545613 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + export BUILD_DIR=${dataset_location} + export nnUNet_preprocessed=${BUILD_DIR}/preprocessed_data + export nnUNet_raw_data_base=${BUILD_DIR}/raw_data + export RESULTS_FOLDER=${BUILD_DIR}/result + run_tuning + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-model=${input_model} \ + --output-model=${output_model} \ + --data-location=${dataset_location} \ + --calib-preprocess=${BUILD_DIR}/calib_preprocess \ + --mode=tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md new file mode 100644 index 00000000000..6fa291d0b36 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md @@ -0,0 +1,117 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow style transfer Intel® Neural Compressor tuning zoo result. +This example can run on Intel CPUs and GPUs. + +# Prerequisite + +## Prerequisite + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Supported Tensorflow [Version](../../../../../../README.md#supported-frameworks). + +### Install Additional Dependency packages +```shell +cd examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### 2. Prepare Pretrained model + +#### Automated approach +Run the `prepare_model.py` script located in `./examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq`. + +``` +usage: prepare_model.py [-h] [--model_path MODEL_PATH] + +optional arguments: + -h, --help show this help message and exit + --model_path MODEL_PATH directory to put models, default is ./model +``` + +#### Manual approach + +```shell +wget https://storage.googleapis.com/download.magenta.tensorflow.org/models/arbitrary_style_transfer.tar.gz +tar -xvzf arbitrary_style_transfer.tar.gz +``` + +### 3. Prepare Dataset +There are two folders named style_images and content_images in current folder. Please use these two folders to generated stylized images for test. And you can also prepare your own style_images or content_images. + + +# Run Command + ```shell + python main.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt + ``` + + +## Quantization Config + +## Quantization + ```shell + bash run_quant.sh --dataset_location=style_images/,content_images/ --input_model=./model/model.ckpt --output_model=saved_model + ``` +## Benchmark + ```shell + bash run_benchmark.sh --dataset_location=style_images/,content_images/ --input_model=saved_model.pb --batch_size=1 + ``` + +Details of enabling Intel® Neural Compressor on style transfer for Tensorflow. +========================= + +This is a tutorial of how to enable style_transfer model with Intel® Neural Compressor. +## User Code Analysis +1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. + +2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For style_transfer, we applied the latter one because we don't have metric for style transfer model.The first one is to implement the q_dataloader and implement a fake *eval_func*. As neural_compressor have implement a style_transfer dataset, so only eval_func should be prepared after load the graph + +### Evaluation Part Adaption +As style transfer don't have a metric to measure the accuracy, we only implement a fake eval_func +```python +def eval_func(model): + return 1. +``` + +Here we set the input tensor and output tensors name into *inputs* and *outputs* field. In this case we only calibration and quantize the model without tune the accuracy + +### Code update + +After prepare step is done, we just need add 2 lines to get the quantized model. +```python +from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + +quant_config = StaticQuantConfig() +q_model = quantize_model(graph, quant_config, calib_dataloader) +q_model.save(FLAGS.output_model) +``` diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg new file mode 100644 index 00000000000..5f6c5a6beb5 Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg differ diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg new file mode 100644 index 00000000000..248d9fd31f9 Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg differ diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py new file mode 100644 index 00000000000..d49c262bbec --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py @@ -0,0 +1,362 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import glob +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + + +class StyleTransferDataset(object): + """Dataset used for style transfer task on tensorflow/inteltensorflow/tensorflow_itex backend. + + This Dataset is to construct a dataset from two specific image holders representing + content image folder and style image folder. + """ + + def __init__( + self, + content_folder, + style_folder, + crop_ratio=0.1, + resize_shape=(256, 256), + image_format="jpg", + transform=None, + filter=None, + ): + """Initialize `StyleTransferDataset` class. + + Args: + content_folder (str): Root directory of content images. + style_folder (str): Root directory of style images. + crop_ratio (float, default=0.1): Cropped ratio to each side. + resize_shape (tuple, default=(256, 256)): Target size of image. + image_format (str, default='jpg'): Target image format. + transform (transform object, default=None): Transform to process input data. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + self.transform = transform + self.content_folder = content_folder + self.style_folder = style_folder + self.resize_shape = resize_shape + self.crop_ratio = crop_ratio + self.content_images = glob.glob(os.path.join(content_folder, "*" + image_format)) + self.style_images = glob.glob(os.path.join(style_folder, "*" + image_format)) + self.image_list = [] + for content in self.content_images: + for style in self.style_images: + self.image_list.append((content, style)) + + def __len__(self): + """Return the length of dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + from PIL import Image + + content_image, style_image = self.image_list[index] + content_image = Image.open(content_image) + style_image = Image.open(style_image) + width, height = style_image.size + crop_ratio = self.crop_ratio + crop_box = (crop_ratio * height, crop_ratio * width, (1 - crop_ratio) * height, (1 - crop_ratio) * width) + content_image = np.asarray(content_image.resize(self.resize_shape)) + style_image = np.asarray(style_image.resize(self.resize_shape)) + if content_image.max() > 1.0: + content_image = content_image / 255.0 + if style_image.max() > 1.0: + style_image = style_image / 255.0 + + return (content_image, style_image), 0 + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + +class ParseDecodeVocTransform(): + """Parse features in Example proto. + + Returns: + tuple of parsed image and labels + """ + + def __call__(self, sample): + """Parse decode voc.""" + + # Currently only supports jpeg and png. + # Need to use this logic because the shape is not known for + # tf.image.decode_image and we rely on this info to + # extend label if necessary. + def _decode_image(content, channels): + """Decode the image with content.""" + return tf.cond( + tf.image.is_jpeg(content), + lambda: tf.image.decode_jpeg(content, channels), + lambda: tf.image.decode_png(content, channels), + ) + + features = { + "image/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/filename": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="jpeg"), + "image/height": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + "image/width": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + "image/segmentation/class/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/segmentation/class/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="png"), + } + + parsed_features = tf.compat.v1.parse_single_example(sample, features) + + image = _decode_image(parsed_features["image/encoded"], channels=3) + + label = None + label = _decode_image(parsed_features["image/segmentation/class/encoded"], channels=1) + + sample = { + "image": image, + } + + label.set_shape([None, None, 1]) + + sample["labels_class"] = label + + return sample["image"], sample["labels_class"] + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py new file mode 100644 index 00000000000..440b0cee4af --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py @@ -0,0 +1,207 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import os +import io +import skimage.io +import glob +import numpy as np +import tensorflow.compat.v1 as tf +from PIL import Image +import time + +from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDatasetV2 +from data_process import ( + StyleTransferDataset, + ComposeTransform, + ParseDecodeVocTransform, +) + +flags = tf.flags +flags.DEFINE_string('style_images_paths', None, 'Paths to the style images' + 'for evaluation.') +flags.DEFINE_string('content_images_paths', None, 'Paths to the content images' + 'for evaluation.') +flags.DEFINE_string('output_dir', './result', 'Output stylized image directory.') + +flags.DEFINE_string('output_model', None, 'Output model directory.') + +flags.DEFINE_string('input_model', None, 'Output directory.') + +flags.DEFINE_integer('batch_size', 1, 'batch_size') + +flags.DEFINE_bool('tune', False, 'if use tune') + +FLAGS = flags.FLAGS + +def load_img(path, resize_shape=(256, 256), crop_ratio=0.1): + img = Image.open(path) + width, height = img.size + crop_box = (crop_ratio*height, crop_ratio*width, (1-crop_ratio)*height, (1-crop_ratio)*width) + img = np.asarray(img.crop(crop_box).resize(resize_shape)) + if img.max() > 1.0: + img = img / 255. + img = img.astype(np.float32)[np.newaxis, ...] + return img + +def save_image(image, output_file, save_format='jpeg'): + image = np.uint8(image * 255.0) + buf = io.BytesIO() + skimage.io.imsave(buf, np.squeeze(image, 0), format=save_format) + buf.seek(0) + f = tf.gfile.GFile(output_file, 'w') + f.write(buf.getvalue()) + f.close() + +def image_style_transfer(sess, content_img_path, style_img_path): + stylized_images = sess.graph.get_tensor_by_name('import/import/transformer/expand/conv3/conv/Sigmoid:0') + style_img_np = load_img(style_img_path, crop_ratio=0) + content_img_np = load_img(content_img_path, crop_ratio=0) + stylized_image_res = sess.run( + stylized_images, + feed_dict={ + 'import/import/style_input:0': style_img_np, + 'import/import/content_input:0': content_img_np}) + # saves stylized image. + save_image(stylized_image_res, os.path.join(FLAGS.output_dir, 'stylized_image.jpg')) + +def main(args=None): + tf.logging.set_verbosity(tf.logging.INFO) + if not tf.gfile.Exists(FLAGS.output_dir): + tf.gfile.MkDir(FLAGS.output_dir) + + with tf.Session() as sess: + if FLAGS.input_model.rsplit('.', 1)[-1] == 'ckpt': + style_img_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3], name='style_input') + content_img_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3], name='content_input') + # import meta_graph + meta_data_path = FLAGS.input_model + '.meta' + saver = tf.train.import_meta_graph(meta_data_path, clear_devices=True) + + sess.run(tf.global_variables_initializer()) + saver.restore(sess, FLAGS.input_model) + graph_def = sess.graph.as_graph_def() + + replace_style = 'style_image_processing/ResizeBilinear_2' + replace_content = 'batch_processing/batch' + for node in graph_def.node: + for idx, input_name in enumerate(node.input): + # replace style input and content input nodes to placeholder + if replace_content == input_name: + node.input[idx] = 'content_input' + if replace_style == input_name: + node.input[idx] = 'style_input' + + if FLAGS.tune: + from neural_compressor.tensorflow.quantization.utils.utility import _parse_ckpt_bn_input + _parse_ckpt_bn_input(graph_def) + output_name = 'transformer/expand/conv3/conv/Sigmoid' + frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, [output_name]) + # use frozen pb instead + elif FLAGS.input_model.rsplit('.', 1)[-1] == 'pb': + with open(FLAGS.input_model, 'rb') as f: + frozen_graph = tf.GraphDef() + frozen_graph.ParseFromString(f.read()) + else: + print("not supported model format") + exit(-1) + + if FLAGS.tune: + with tf.Graph().as_default() as graph: + tf.import_graph_def(frozen_graph, name='') + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + dataset = StyleTransferDataset( + content_folder=FLAGS.content_images_paths.strip(), + style_folder=FLAGS.style_images_paths.strip(), + transform=ComposeTransform(transform_list= [ + ParseDecodeVocTransform(), + ] + ) + ) + calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) + + quant_config = StaticQuantConfig() + q_model = quantize_model(graph, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + frozen_graph= q_model.graph_def + + # validate the quantized model here + with tf.Graph().as_default(), tf.Session() as sess: + if FLAGS.tune: + # create dataloader using default style_transfer dataset + # generate stylized images + dataset = StyleTransferDataset( + content_folder=FLAGS.content_images_paths.strip(), + style_folder=FLAGS.style_images_paths.strip(), + crop_ratio=0.2, + resize_shape=(256, 256) + ) + else: + dataset = DummyDatasetV2(input_shape=[(256, 256, 3), (256, 256, 3)], label_shape=(1, )) + + dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) + tf.import_graph_def(frozen_graph, name='') + style_transfer(sess, dataloader) + +def add_import_to_name(sess, name, try_cnt=2): + for i in range(0, try_cnt): + try: + sess.graph.get_tensor_by_name(name) + return name + except: + name = 'import/' + name + + raise ValueError('can not find tensor by name') + +# validate and save the files +def style_transfer(sess, dataloader): + time_list = [] + output_name = add_import_to_name(sess, 'transformer/expand/conv3/conv/Sigmoid:0', 3) + style_name = add_import_to_name(sess, 'style_input:0', 3) + content_name = add_import_to_name(sess, 'content_input:0', 3) + + stylized_images = sess.graph.get_tensor_by_name(output_name) + + for idx, ((content_img_np, style_img_np), _) in enumerate(dataloader): + start_time = time.time() + stylized_image_res = sess.run( + stylized_images, + feed_dict={ + style_name: style_img_np, + content_name: content_img_np}) + duration = time.time() - start_time + time_list.append(duration) + if idx + 1 == 20: + break + warm_up = 1 + throughput = (len(time_list) - warm_up)/ np.array(time_list[warm_up:]).sum() + print('Batch size = {}'.format(FLAGS.batch_size)) + print('Latency: {:.3f} ms'.format(np.array(time_list[warm_up:]).mean() * 1000)) + print('Throughput: {:.3f} images/sec'.format(throughput)) + + +def run_tuning(): + tf.disable_v2_behavior() + tf.app.run(main) + +if __name__ == '__main__': + run_tuning() diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..74182ad5f37 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py @@ -0,0 +1,33 @@ +import os +import argparse +import enum +import tarfile +import abc + +def get_pretrained_model(destination): + """ + Obtains a ready to use style_transfer model file. + Args: + destination: path to where the file should be stored + """ + url = "https://storage.googleapis.com/download.magenta.tensorflow.org/models/ \ + arbitrary_style_transfer.tar.gz" + + os.system("curl -o arbitrary_style_transfer.tar.gz {0}".format(url)) + with tarfile.open("arbitrary_style_transfer.tar.gz") as tar: + if not os.path.exists(destination): + os.makedirs(destination) + tar.extractall(destination) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Prepare pre-trained model for style transfer model') + parser.add_argument('--model_path', type=str, default='./model', help='directory to put models, default is ./model') + + args = parser.parse_args() + model_path = args.model_path + try: + get_pretrained_model(model_path) + except AttributeError: + print("The model fetched failed.") + diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..1e5d462dcd4 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +scikit-image +Pillow>=8.2.0 diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..41fee820958 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + iters=100 + for var in "$@" + do + case $var in + --topology=*) + topology=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo ${var} |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + style_images=$(echo ${dataset_location} | awk -F ',' '{print $1}') + content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}') + echo "$style_images, $content_images" + + python main.py \ + --input_model "${input_model}" \ + --style_images_paths "${style_images}" \ + --content_images_paths "${content_images}" \ + --batch_size "${batch_size}" \ + --tune=False \ + --output_model "${output_model}" + +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..4fdfdd2e8a5 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --topology=*) + topology=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + style_images=$(echo ${dataset_location} | awk -F ',' '{print $1}') + content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}') + echo "$style_images, $content_images" + + python main.py \ + --input_model "${input_model}" \ + --style_images_paths "${style_images}" \ + --content_images_paths "${content_images}" \ + --config "./conf.yaml" \ + --tune=True \ + --output_model "${output_model}" +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg new file mode 100644 index 00000000000..5af5a0eff59 Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg differ diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg new file mode 100644 index 00000000000..bb0c46ea1de Binary files /dev/null and b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg differ diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index 004393c8c27..c92ee43d1c2 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -87,7 +87,7 @@ def __init__(self, framework_specific_info): self.fold_conv = [] self.keras3 = True if version1_gte_version2(tf.__version__, "2.16.1") else False if not os.path.exists(DEFAULT_WORKSPACE): - os.mkdir(DEFAULT_WORKSPACE) + os.makedirs(DEFAULT_WORKSPACE) self.tmp_dir = (DEFAULT_WORKSPACE + "tmp_model.keras") if self.keras3 else (DEFAULT_WORKSPACE + "tmp_model") def _set_weights(self, qmodel, layer_weights): @@ -203,17 +203,20 @@ def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): # pragma: no cover fuse_layers.append(layer) else: for bound_node in layer._inbound_nodes: - inbound_layer = bound_node.inbound_layers - if inbound_layer in self.bn_weights.keys(): - for bn_inbound_node in inbound_layer._inbound_nodes: - bn_inbound_layer = bn_inbound_node.inbound_layers - if bn_inbound_layer.name in self.conv_weights.keys(): - new_bound_nodes.append(bn_inbound_node) - else: - if bound_node not in new_bound_nodes: - new_bound_nodes.append(bound_node) - else: - new_bound_nodes.append(bound_node) + inbound_layers = bound_node.inbound_layers + if not isinstance(inbound_layers, list): + inbound_layers = [inbound_layers] + for inbound_layer in inbound_layers: + if inbound_layer in self.bn_weights.keys(): + for bn_inbound_node in inbound_layer._inbound_nodes: + bn_inbound_layer = bn_inbound_node.inbound_layers + if bn_inbound_layer.name in self.conv_weights.keys(): + new_bound_nodes.append(bn_inbound_node) + else: + if bound_node not in new_bound_nodes: + new_bound_nodes.append(bound_node) + else: + new_bound_nodes.append(bound_node) layer._inbound_nodes.clear() for bound_node in new_bound_nodes: @@ -718,12 +721,9 @@ def _parse_inputs(self, BN_fused_layers=None, conv_names=None): for out_layer_name in out_layer_names: if out_layer_name not in input_layer_dict: - input_layer_dict[out_layer_name] = set([layer.name]) + input_layer_dict[out_layer_name] = [layer.name] else: - input_layer_dict[out_layer_name].add(layer.name) - - for key in input_layer_dict.keys(): - input_layer_dict[key] = list(input_layer_dict[key]) + input_layer_dict[out_layer_name].append(layer.name) try: model_input = self.model.input diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py index 752f8d4ecbe..c7b2d5b0549 100644 --- a/neural_compressor/tensorflow/quantization/config.py +++ b/neural_compressor/tensorflow/quantization/config.py @@ -113,7 +113,7 @@ def register_supported_configs(cls) -> List[OperatorConfig]: weight_algorithm=["minmax", "kl"], act_dtype=["int8", "bf16", "fp32"], act_sym=[True, False], - act_granularity=["per_tensor", "per_channel"], + act_granularity=["per_tensor"], act_algorithm=["minmax", "kl"], ) operators = [ @@ -173,7 +173,7 @@ def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["Sta weight_algorithm=["minmax", "kl"], act_dtype=["int8", "fp32"], act_sym=[True, False], - act_granularity=["per_tensor", "per_channel"], + act_granularity=["per_tensor"], act_algorithm=["minmax", "kl"], ) diff --git a/neural_compressor/tensorflow/utils/model_wrappers.py b/neural_compressor/tensorflow/utils/model_wrappers.py index e1a58f2f53b..baeaa746914 100644 --- a/neural_compressor/tensorflow/utils/model_wrappers.py +++ b/neural_compressor/tensorflow/utils/model_wrappers.py @@ -1429,6 +1429,7 @@ def graph_info(self): def save(self, root, *args, **kwargs): """Save Keras model.""" self._model_object.save(root) + logger.info("Save quantized model to {}.".format(root)) @property def input_node_names(self):