From 80a9d604b2a5f86444a2a6fe87dc68581d0eaf67 Mon Sep 17 00:00:00 2001 From: randyh62 <42045079+randyh62@users.noreply.github.com> Date: Thu, 15 Feb 2024 08:01:36 -0800 Subject: [PATCH] Documentation - reorg for diataxis (#102) * reorg with doxyfile * updates from Kiriti --- LICENSE.txt | 2 +- docs/examples.md | 7 - docs/examples.rst | 15 ++ docs/how-to/architecture.rst | 31 +++ docs/how-to/framework.rst | 234 ++++++++++++++++++ docs/how-to/index.rst | 18 ++ docs/how-to/overview.rst | 112 +++++++++ docs/how-to/using-with-cpp.rst | 134 ++++++++++ .../ch4.md => how-to/using-with-python.rst} | 214 ++++++++++------ docs/index.rst | 60 +++++ docs/install/install.rst | 208 ++++++++++++++++ docs/license.md | 4 - docs/license.rst | 11 + docs/sphinx/_toc.yml.in | 51 ++-- docs/user_guide/ch1.md | 83 ------- docs/user_guide/ch2.md | 21 -- docs/user_guide/ch3.md | 42 ---- docs/user_guide/ch5.md | 164 ------------ docs/user_guide/ch6.md | 112 --------- 19 files changed, 999 insertions(+), 524 deletions(-) delete mode 100644 docs/examples.md create mode 100644 docs/examples.rst create mode 100644 docs/how-to/architecture.rst create mode 100644 docs/how-to/framework.rst create mode 100644 docs/how-to/index.rst create mode 100644 docs/how-to/overview.rst create mode 100644 docs/how-to/using-with-cpp.rst rename docs/{user_guide/ch4.md => how-to/using-with-python.rst} (54%) create mode 100644 docs/index.rst create mode 100644 docs/install/install.rst delete mode 100644 docs/license.md create mode 100644 docs/license.rst delete mode 100644 docs/user_guide/ch1.md delete mode 100644 docs/user_guide/ch2.md delete mode 100644 docs/user_guide/ch3.md delete mode 100644 docs/user_guide/ch5.md delete mode 100644 docs/user_guide/ch6.md diff --git a/LICENSE.txt b/LICENSE.txt index dd546006b..cbd5c4f1c 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/examples.md b/docs/examples.md deleted file mode 100644 index 7e9089c38..000000000 --- a/docs/examples.md +++ /dev/null @@ -1,7 +0,0 @@ -# Examples - -Use the links below to see more examples: - -- [Image Processing](https://github.com/ROCm/rocAL/tree/master/docs/examples/image_processing) - -- [Pytorch](https://github.com/ROCm/rocAL/tree/master/docs/examples/pytorch) diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 000000000..1baa5e5bf --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,15 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _examples: + +******************************************************************** +Examples +******************************************************************** + +Use the links below to see more examples: + +* `Image Processing `_ +* `Pytorch `_ + diff --git a/docs/how-to/architecture.rst b/docs/how-to/architecture.rst new file mode 100644 index 000000000..739768245 --- /dev/null +++ b/docs/how-to/architecture.rst @@ -0,0 +1,31 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _architecture: + +******************************************************************** +Architecture Components +******************************************************************** + +The rocAL architecture comprises rocAL Master-Graph and ROCm Performance Primitive (RPP) as major components. + +rocAL Master-Graph +=================== + +The rocAL pipeline is built on top of rocAL Master-Graph. The architectural components of rocAL Master-Graph are described below: + +**Loader and Processing Modules:** The rocAL Master-Graph consists of two main architectural components, a loader module to load data and a processing module to process data. The loader module is clearly separated from the processing module for a seamless execution without any blockages. The Prefetch queue helps to load data ahead of time and can be configured with user-defined parameters. The Output routine runs in parallel with the load routine, as both have separate queues for storing the result. + +.. figure:: ../data/ch2_arch.png + + rocAL Master-Graph Architecture + +**rocAL Pipeline:** The rocAL pipeline holds great significance, as it contains all the information required to create a rocAL graph with data loader, augmentation nodes, and the output format. Once a rocAL pipeline is created, the user can build, run, and call an iterator to get the next batch of data into the pipeline. The user can install the rocAL pipeline using the rocAL Python package. It supports many operators for data loading and data augmentation. + +ROCm Performance Primitive (RPP) Library +========================================= + +RPP is a comprehensive high-performance computer vision library optimized for the AMD CPU and GPU with HIP and OpenCL backends. It is available under the AMD ROCm software platform. It provides low-level functionality for all rocAL operators for single, image, and tensor datatypes. RPP provides an extensive library for vision augmentations that includes vision functions, color augmentations, filter augmentations, geometric distortions, and a few more features. + +For more information on RPP along with the list of supported kernels, see `ROCm Performance Primitives `_. diff --git a/docs/how-to/framework.rst b/docs/how-to/framework.rst new file mode 100644 index 000000000..9dc4923d8 --- /dev/null +++ b/docs/how-to/framework.rst @@ -0,0 +1,234 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _framework: + +******************************************************************** +ML Framework Integration +******************************************************************** + +rocAL improves the pipeline efficiency by preprocessing the data and parallelizing the data loading on the CPU and running trainings on the GPU. To separate the data loading from the training, rocAL provides TensorFlow and PyTorch iterators and readers as a plugin. The integration process with PyTorch and TensorFlow is described in the sections below. + +.. _pytorch: + +PyTorch Integration +=========================== + +This section demonstrates how to use rocAL with PyTorch for training. Follow the steps below to get started. + +Build PyTorch Docker +-------------------------------- + +Build a rocAL PyTorch docker by following the steps here. + +Create Data-loading Pipeline +---------------------------------------- + +Follow these steps: + +1. Import libraries for `rocAL `_. + +.. code-block:: python + :caption: Import libraries + + from amd.rocal.plugin.pytorch import ROCALClassificationIterator + from amd.rocal.pipeline import Pipeline + import amd.rocal.fn as fn + import amd.rocal.types as types + + +2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the `pipeline `_. + +.. code-block:: python + :caption: Pipeline for PyTorch + + def trainPipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16): + pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, + rocal_cpu=rocal_cpu, tensor_dtype = types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, + prefetch_queue_depth = 7) + with pipe: + jpegs, labels = fn.readers.file(file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + rocal_device = 'cpu' if rocal_cpu else 'gpu' + # decode = fn.decoders.image(jpegs, output_type=types.RGB,file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + decode = fn.decoders.image_slice(jpegs, output_type=types.RGB, + file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + res = fn.resize(decode, resize_x=224, resize_y=224) + flip_coin = fn.random.coin_flip(probability=0.5) + cmnp = fn.crop_mirror_normalize(res, device="gpu", + output_dtype=types.FLOAT, + output_layout=types.NCHW, + crop=(crop, crop), + mirror=flip_coin, + image_type=types.RGB, + mean=[0.485,0.456,0.406], + std=[0.229,0.224,0.225]) + if(one_hot): + _ = fn.one_hot(labels, num_classes) + pipe.set_outputs(cmnp) + print('rocal "{0}" variant'.format(rocal_device)) + return pipe + + +3. Import libraries for PyTorch. + +.. code-block:: python + :caption: Import libraries for PyTorch + + import torch.nn as nn + import torch.nn.functional as F + import torch.optim as optim + + +4. Call the training pipeline with rocAL classification data `loader `_. + +.. code-block:: python + :caption: Call the training pipeline + + Def get_pytorch_train_loader(self): + print(“in get_pytorch_train_loader function”) + pipe_train = trainPipeline(self.data_path, self.batch_size, self.num_classes, self.one_hot, self.local_rank, + self.world_size, self.num_thread, self.crop, self.rocal_cpu, self.fp16) + pipe_train.build() + train_loader = ROCALClassificationIterator(pipe_train, device=”cpu” if self.rocal_cpu else “cuda”, device_id = self.local_rank) + + +5. Run the `training script `_. + +.. code-block:: python + :caption: Run the training pipeline + + # Training loop + for epoch in range(10): # loop over the dataset multiple times + print(“\n epoch:: “,epoch) + running_loss = 0.0 + + for i, (inputs,labels) in enumerate(train_loader, 0): + + sys.stdout.write(“\r Mini-batch “ + str(i)) + # print(“Images”,inputs) + # print(“Labels”,labels) + inputs, labels = inputs.to(device), labels.to(device) + + +6. To see and run a sample training script, refer to `rocAL PyTorch example `_. + +.. _tensorflow: + +TensorFlow Integration +=============================== + +This section demonstrates how to use rocAL with TensorFlow for training. Follow the steps below to get started. + +Build TensorFlow Docker +-------------------------------------- + +Build a rocAL TensorFlow docker by following the steps here. + +Create Data-loading Pipeline +---------------------------------------- + +Follow these steps: + +1. Import libraries for `rocAL_pybind `_. + +.. code-block:: python + :caption: Import libraries + + from amd.rocal.plugin.tf import ROCALIterator + from amd.rocal.pipeline import Pipeline + import amd.rocal.fn as fn + import amd.rocal.types as types + + +2. See a rocAL pipeline for TensorFlow below. It reads data from the TFRecords using TFRecord Reader and uses ``fn.decoders.image`` to decode the raw `images `_. + +.. code-block:: python + :caption: Pipeline for TensorFlow + + trainPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=1, rocal_cpu=RUN_ON_HOST, tensor_layout = types.NHWC) + with trainPipe: + inputs = fn.readers.tfrecord(path=TRAIN_RECORDS_DIR, index_path = "", reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap, + features={ + 'image/encoded':tf.io.FixedLenFeature((), tf.string, ""), + 'image/class/label':tf.io.FixedLenFeature([1], tf.int64, -1), + 'image/filename':tf.io.FixedLenFeature((), tf.string, "") + } + ) + jpegs = inputs["image/encoded"] + images = fn.decoders.image(jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=TRAIN_RECORDS_DIR) + resized = fn.resize(images, resize_x=crop_size[0], resize_y=crop_size[1]) + flip_coin = fn.random.coin_flip(probability=0.5) + cmn_images = fn.crop_mirror_normalize(resized, crop=(crop_size[1], crop_size[0]), + mean=[0,0,0], + std=[255,255,255], + mirror=flip_coin, + output_dtype=types.FLOAT, + output_layout=types.NHWC, + pad_output=False) + trainPipe.set_outputs(cmn_images) + trainPipe.build() + + +3. Import libraries for `TensorFlow `_. + +.. code-block:: python + :caption: Import libraries for TensorFlow + + import tensorflow.compat.v1 as tf + tf.compat.v1.disable_v2_behavior() + import tensorflow_hub as hub + Call the train pipeline + trainIterator = ROCALIterator(trainPipe) + Run the training Session + i = 0 + with tf.compat.v1.Session(graph = train_graph) as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + while i < NUM_TRAIN_STEPS: + + + for t, (train_image_ndArray, train_label_ndArray) in enumerate(trainIterator, 0): + train_label_one_hot_list = get_label_one_hot(train_label_ndArray) + + +4. To see and run a sample training script, refer to `rocAL TensorFlow example `_. + + +.. _ml-perf: + +Run MLPerf Resnet50 classification training with rocAL +======================================================= + +#. Ensure you have downloaded ``ILSVRC2012_img_val.tar`` (6.3GB) and ``ILSVRC2012_img_train.tar`` (138 GB) files and unzip into ``train`` and ``val`` folders +#. Build `MIVisionX Pytorch docker `_ + + * Run the docker image + + .. code-block:: shell + + sudo docker run -it -v :/data -v /:/dockerx -w /dockerx --privileged --device=/dev/kfd --device=/dev/dri --group-add video --shm-size=4g --ipc="host" --network=host + + .. note:: + Refer to the `docker `_ page for prerequisites and information on building the docker image. + + Optional: Map localhost directory on the docker image + + * Option to map the localhost directory with imagenet dataset folder to be accessed on the docker image. + * Usage: ``-v {LOCAL_HOST_DIRECTORY_PATH}:{DOCKER_DIRECTORY_PATH}`` + +#. Install rocAL ``python_pybind`` plugin as described above +#. Clone `MLPerf `_ repo and checkout ``mlperf-v1.1-rocal`` branch + +.. code-block:: shell + + git clone -b mlperf-v1.1-rocal https://github.com/rrawther/MLPerf-mGPU + +#. Modify ``RN50_AMP_LARS_8GPUS_NCHW.sh`` or ``RN50_AMP_LARS_8GPUS_NHWC.sh`` to reflect correct path for imagenet directory +#. Run appropriate script as needed: + +.. code-block:: shell + + ./RN50_AMP_LARS_8GPUS_NCHW.sh + (or) + ./RN50_AMP_LARS_8GPUS_NHWC.sh + diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst new file mode 100644 index 000000000..113034e55 --- /dev/null +++ b/docs/how-to/index.rst @@ -0,0 +1,18 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _how-to: + +******************************************************************** +How to +******************************************************************** + +This section provides guides on how to use the rocAL library and its +different utilities. + +* :ref:`overview` +* :ref:`architecture` +* :ref:`using-with-cpp` +* :ref:`using-with-python` +* :ref:`framework` diff --git a/docs/how-to/overview.rst b/docs/how-to/overview.rst new file mode 100644 index 000000000..fc3d674ad --- /dev/null +++ b/docs/how-to/overview.rst @@ -0,0 +1,112 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _overview: + +******************************************************************** +rocAL Overview +******************************************************************** + +Overview +========== + +The performance of Deep Learning applications depends upon the efficiency of performance pipelines that can load and preprocess data efficiently to provide a high throughput. The pipelines are typically used to perform tasks such as loading and decoding data, perform a variety of augmentations, perform color-format conversions, etc., before passing the data for training or inference. The Deep Learning frameworks also require the pipelines to support multiple data formats and augmentations to adapt to a variety of datasets and models. This can be achieved by creating processing pipelines that fully utilize the underlying hardware capabilities. + +ROCm™ Augmentation Library (rocAL™) lets the user create hybrid pipelines to maximize the throughput for Machine Learning applications. It helps to create pipelines that can efficiently process images, videos, and a variety of storage formats. The user can program these pipelines using C or Python API. rocAL significantly accelerates data processing on AMD processors. + +To optimize the preprocessing pipeline, rocAL utilizes the following features: + +- Prefetching: Loads the data for the next batch while the existing batch is under process. This parallelization allows more batches to be processed in less time. +- Hybrid execution: Utilizes both the CPU and GPU simultaneously. For example, decoding the data on the CPU while running the training on the GPU.  +- Hardware decoding: Uses the AMD VCN and VA-API to efficiently decode data on the hardware.  +- Batch processing: Groups and processes the data together as a batch. + +.. figure:: ../data/ch1_pipelines.png + + The Role of Pipelines in Deep Learning Applications + +Key Components +================ + +- CPU- or GPU-based implementation for each augmentation and data_loader nodes +- Python and C APIs for easy integration and testing +- Multiple framework support and portable on PyTorch, TensorFlow, and MXNet +- Flexible graphs to help the user create custom pipelines +- Multicore host and multi-gpu execution for the graph +- Support for various augmentations such as fish-eye, water, gitter, non-linear blend, etc., using the AMD ROCm Performance Primitive (RPP) library +- Support for classification, object detection, segmentation, and keypoint data pipelines + +Third-party Integration +======================== + +rocAL provides support for many operators. The module imports are designed like other available data loaders for a smooth integration with training frameworks. The rocal_pybind package provides support for integrating with PyTorch, TensorFlow, and Caffe2. rocAL also supports many data formats such as FileReader, COCO Reader, TFRecordReader, and Lightning Memory-Mapped Database (LMDB), thus offering a unified approach to framework integration. + +rocAL Operators +================= + +rocAL operators offer the flexibility to run on CPU or GPU for building hybrid pipelines. They also support classification and object detection on the workload. Some of the useful operators supported by rocAL are listed below: + +* **Augmentations:** These are used to enhance the data set by adding effects to the original images. + To use the augmentations, import the instance of ``amd.rocal.fn`` into the Python script. These augmentation + APIs further call the RPP kernels underneath (HIP/HOST) depending on the backend used to build RPP and rocAL. + +* **Readers:** These are used to read and understand the different types of datasets and their metadata. Some + examples of readers are list of files with folders, LMDB, TFRecord, and JSON file for metadata. To use the + readers, import the instance of ``amd.rocal.readers`` into the Python script. + +* **Decoders:** These are used to support different input formats of images and videos. Decoders extract + data from the datasets that are in compressed formats such as JPEG, MP4, etc. To use the decoders, + import the instance of ``amd.rocal.decoders`` into the Python script. + + +Table 1. Augmentations Available through rocAL +-------------------------------------------------------- + +===================== ========================= ========================================= +Color Augmentations Effects Augmentations Geometry Augmentations +===================== ========================= ========================================= +| Blend | Fog | Crop +| Blur | Jitter | Crop Mirror Normalization +| Brightness | Pixelization | Crop Resize +| Color Temperature | Raindrops | Fisheye Lens +| Color Twist | Snowflakes | Flip (Horizontal, Vertical, and Both) +| Contrast | Salt and Pepper Noise | Lens Correction +| Exposure | | Random Crop +| Gamma | | Resize +| Hue | | Resize Crop Mirror +| Saturation | | Rotation +| Vignette | | Warp Affine +===================== ========================= ========================================= + + +Table 2. Readers Available through rocAL +-------------------------------------------------- + +========================================== ===================================================== +Readers Description +========================================== ===================================================== +| File Reader | Reads images from a list of files in a folder(s) +| Video Reader | Reads videos from a list of files in a folder(s) +| Caffe LMDB Reader | Reads (key, value) pairs from Caffe LMDB +| Caffe2 LMDB Reader | Reads (key, value) pairs from Caffe2 LMDB +| COCO Reader – file source and keypoints | Reads images and JSON annotations from COCO dataset +| TFRecord Reader | Reads from a TFRecord dataset +| MXNet Reader | Reads from a RecordIO dataset +========================================== ===================================================== + + +Table 3. Decoders Available through rocAL +--------------------------------------------------- + +====================== ======================================== +Decoders Description +====================== ======================================== +| Image | Decodes JPEG images +| Image_raw | Decodes images in raw format +| Image_random_crop | Decodes and randomly crops JPEG images +| Image_slice | Decodes and slices JPEG images +====================== ======================================== + +To see examples demonstrating the usage of decoders and readers, see +`MIVisionX rocAL Python Binding Examples `_. diff --git a/docs/how-to/using-with-cpp.rst b/docs/how-to/using-with-cpp.rst new file mode 100644 index 000000000..50704c60b --- /dev/null +++ b/docs/how-to/using-with-cpp.rst @@ -0,0 +1,134 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _using-with-cpp: + +******************************************************************** +Using rocAL with C++ API +******************************************************************** + +This chapter explains how to create a pipeline and add augmentations using C++ APIs directly. The Python APIs also call these C++ APIs internally using the Python pybind utility as explained in the section Installing rocAL Python Package. + +C++ Common APIs +======================= + +The following sections list the commonly used C++ APIs. + +rocalCreate +-------------------------- + +Use: To create the pipeline + +Returns: The context for the pipeline + +Arguments: + +* RocalProcessMode: Defines whether rocal data loading should be on the CPU or `GPU `_. + +.. code-block:: cpp + + RocalProcessMode:: ROCAL_PROCESS_GPU + RocalProcessMode::ROCAL_PROCESS_CPU + + +* RocalTensorOutputType: Defines whether the output of rocal tensor is `FP32 or FP16 `_. + +.. code-block:: cpp + + RocalTensorOutputType::ROCAL_FP32 + RocalTensorOutputType::ROCAL_FP16 + + +See `rocalCreate example `_. + +.. code-block:: cpp + + extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); + + +rocalVerify +------------------------ + +Use: To verify the graph for all the inputs and outputs + +Returns: A status code indicating the success or failure + +See `rocalVerify example `_. + +.. code-block:: cpp + + extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); + + +rocalRun +--------------------- + +Use: To process and run the built and verified graph + +Returns: A status code indicating the success or failure + +See `rocalRun example `_. + +.. code-block:: cpp + + extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); + + +rocalRelease +--------------------------- + +Use: To free all the resources allocated during the graph creation process + +Returns: A status code indicating the success or failure + +See `rocalRelease example `_. + +.. code-block:: cpp + + extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context); + + +Image Augmentation Using C++ API +-------------------------------------------- + +The example below shows how to create a pipeline, read JPEG images, perform certain augmentations on them, and show the output using OpenCV by utilizing `C++ API `_. + +.. code-block:: cpp + :caption: Example Image Augmentation + + Auto handle = rocalCreate(inputBatchSize, processing_device?RocalProcessMode::ROCAL_PROCESS_GPU:RocalProcessMode::ROCAL_PROCESS_CPU, 0,1); + input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false, ROCAL_USE_USER_GIVEN_SIZE, decode_width, decode_height, dec_type); + + image0 = rocalResize(handle, input1, resize_w, resize_h, true); + + RocalImage image1 = rocalRain(handle, image0, false); + + + RocalImage image11 = rocalFishEye(handle, image1, false); + + + rocalRotate(handle, image11, true, rand_angle); + + + // Creating successive blur nodes to simulate a deep branch of augmentations + RocalImage image2 = rocalCropResize(handle, image0, resize_w, resize_h, false, rand_crop_area);; + for(int i = 0 ; i < aug_depth; i++) + { + image2 = rocalBlurFixed(handle, image2, 17.25, (i == (aug_depth -1)) ? true:false ); + } + // Calling the API to verify and build the augmentation graph + if(rocalVerify(handle) != ROCAL_OK) + { + std::cout << "Could not verify the augmentation graph" << std::endl; + return -1; + } + + while (!rocalIsEmpty(handle)) + { + if(rocalRun(handle) != 0) + break; + } + + +To see a sample image augmentation application in C++, see `Image Augmentation `_. diff --git a/docs/user_guide/ch4.md b/docs/how-to/using-with-python.rst similarity index 54% rename from docs/user_guide/ch4.md rename to docs/how-to/using-with-python.rst index 4992d995d..442674518 100644 --- a/docs/user_guide/ch4.md +++ b/docs/how-to/using-with-python.rst @@ -1,16 +1,65 @@ -# Chapter 4: Using with Python API +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _using-with-python: + +******************************************************************** +Using rocAL with Python API +******************************************************************** rocAL uses simple Python operators to provide high performance and flexibility by utilizing the underlying hardware capabilities in a very efficient manner. -## 4.1 Creating a Basic Pipeline +* rocAL Python package has been created using Pybind11 which enables data transfer between rocAL C++ API and Python API +* Module imports are made similar to other data loaders like NVidia's DALI +* ``rocal_pybind`` package has both PyTorch and TensorFlow framework support as described in :ref:`framework` +* Various reader format support including ``FileReader``, ``COCOReader``, and ``TFRecordReader`` +* Example folder contains sample implementations for each reader variation as well as sample training script for PyTorch +* rocAL is integrated into MLPerf as described in :ref:`ml-perf` + + +rocAL Python API +===================== + +``amd.rocal.fn`` +----------------------------- + +* Contains the image augmentations & file read and decode operations which are linked to rocAL C++ API +* All ops (listed below) are supported for the single input image and batched inputs. + +``amd.rocal.pipeline`` +----------------------- + +* Contains Pipeline class which has all the data needed to build and run the rocAL graph. +* Contains support for context/graph creation, verify and run the graph. +* Has data transfer functions to exchange data between frameworks and rocAL +* define_graph functionality has been implemented to add nodes to build a pipeline graph. + +``amd.rocal.types`` +------------------------ + +``amd.rocal.types`` are enums exported from C++ API to Python. Some examples include CPU, GPU, FLOAT, FLOAT16, RGB, GRAY, etc.. + +``amd.rocal.plugin.pytorch`` +----------------------------- + +* Contains ``ROCALGenericIterator`` for Pytorch. +* ``ROCALClassificationIterator`` class implements iterator for image classification and return images with corresponding labels. +* From the above classes, any hybrid iterator pipeline can be created by adding augmentations. +* See `PyTorch Simple Example `_. Requires PyTorch. + + +Creating a Basic Pipeline +============================ The rocAL pipeline is a Python script that defines a data loader, augmentation graph, and instructions to build and execute it. The most significant part of data processing with rocAL is pipeline creation. A pipeline is composed of multiple operations connected in an ordered graph that is encapsulated in an object of amd.rocal.pipeline. amd.rocal.pipeline is a single library that can be integrated to build preprocessing pipelines for both training and inference applications. To import a rocAL pipeline using the library, use: -``` -from amd.rocal.pipeline import Pipeline -``` +.. code-block:: python + + from amd.rocal.pipeline import Pipeline + The library provides functions required to define, build, and run the pipeline. @@ -21,35 +70,40 @@ To start using a rocAL pipeline, perform the steps below, which are explained in 3. Build the pipeline. 4. Run the pipeline. -## 4.1.1 Instantiating the Pipeline Class +Instantiating the pipeline class +----------------------------------- A pipeline is defined by instantiating a pipeline object and adding rocAL operators into the pipeline. Given below is an example of a file reader, which takes a folder of images as input and decodes the images followed by a resize augmentation. The pipeline runs on the CPU if rocal_cpu is True, or else it runs on the device with the specified device_id. -``` -# Create Pipeline instance +.. code-block:: python + :caption: Create Pipeline Instance + pipe = SimplePipeline(batch_size=batch_size, num_threads=num_threads, device_id=args.local_rank, seed=random_seed, rocal_cpu=rocal_cpu, tensor_layout=types.NHWC if args.NHWC else types.NCHW , tensor_dtype=types.FLOAT16 if args.fp16 else types.FLOAT) # Set Params output_set = 0 rocal_device = 'cpu' if rocal_cpu else 'gpu' decoder_device = 'cpu' if rocal_cpu else 'gpu' - # Use pipeline instance to make calls to reader, decoder & augmentation's + # Use pipeline instance to make calls to reader, decoder & augmentations with pipe: jpegs, _ = fn.readers.file(file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) images = fn.decoders.image(jpegs, file_root=data_path, device=decoder_device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=True) images = fn.resize(images, device=rocal_device, resize_x=300, resize_y=300) -``` -## 4.1.2 Defining the Pipeline -To define a pipeline, see https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/amd/rocal/pipeline.py#L29. +Defining the Pipeline +------------------------ + +To define a pipeline, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/amd/rocal/pipeline.py#L29`. + +.. code-block:: shell + :caption: Pipeline Class -``` -class Pipeline(object): + class Pipeline(object): - """Pipeline class internally calls RocalCreate which returns context which will have all + Pipeline class internally calls RocalCreate which returns context which will have all the info set by the user. @@ -87,7 +141,7 @@ class Pipeline(object): queues executor, with buffer queue size `x` for cpu stage and `y` for mixed and gpu stages. It is not supported when both `exec_async` and `exec_pipelined` are set to `False`. - Executor will buffer cpu and gpu stages separatelly, + Executor will buffer cpu and gpu stages separately, and will fill the buffer queues when the first :meth:`amd.rocal.pipeline.Pipeline.run` is issued. `exec_async` : bool, optional, default = True @@ -108,98 +162,118 @@ class Pipeline(object): unrestricted number of streams is assumed). `default_cuda_stream_priority` : int, optional, default = 0 HIP stream priority used by ROCAL. -``` -Following are the important functions available in the Pipeline class, which is an instance of `amd.rocal.pipeline`: -- `build()`: Used to build a pipeline graph -- `__init__ constructor`: Defines all the operators to be used in the graph with the corresponding parameters -- `is_empty()`: Used to check if all the pipeline handles are empty -- `rocalResetLoaders()`: Used to reset the iterator to the beginning -- `set_outputs()`: Used to set the augmentations output of the graph +Following are the important functions available in the Pipeline class, which is an instance of ``amd.rocal.pipeline``: -## 4.1.3 Building the Pipeline +* ``build()``: Used to build a pipeline graph +* ``__init__ constructor``: Defines all the operators to be used in the graph with the corresponding parameters +* ``is_empty()``: Used to check if all the pipeline handles are empty +* ``rocalResetLoaders()``: Used to reset the iterator to the beginning +* ``set_outputs()``: Used to set the augmentations output of the graph + +Building the Pipeline +------------------------- Building the pipeline ensures that all operators are validated with the corresponding inputs and outputs. -To build the pipeline, see https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L166 +To build the pipeline, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L166` + +.. code-block:: python + :caption: Build the Pipeline + + # build the pipeline + pipe = SimplePipeline(batch_size=max_batch_size, num_threads=1, device_id=0) + pipe.build() -``` -# build the pipeline - pipe = SimplePipeline(batch_size=max_batch_size, num_threads=1, device_id=0) - pipe.build() -``` -## 4.1.4 Running the Pipeline +Running the Pipeline +----------------------------- To run/use the pipeline, simply create a data loader using the pipeline and iterate through it to get the next batch of images with labels. -To run the pipeline, see https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L168 +To run the pipeline, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L168` + +.. code-block:: python + :caption: Run the Pipeline -``` # Dataloader data_loader = ROCALClassificationIterator(pipe,device=device) # Enumerate over the Dataloader for epoch in range(int(args.num_epochs)): print("EPOCH:::::", epoch) for i, it in enumerate(data_loader, 0): -``` -## 4.1.5 Pipeline Output + +Pipeline Output +------------------------- The output of the pipeline created above for 4 iterations (number of epochs) with a batch size of 2 is shown below for your reference. Each image is decoded and resized to 224x224. -![Sample](../data/ch4_sample.png) -Figure 3. Sample Pipeline Output +.. figure:: ../data/ch4_sample.png -## 4.2 Performing Augmentations + Sample Pipeline Output + + +Performing Augmentations +================================ rocAL not only reads images from the disk and batches them into tensors, it can also perform various augmentations on those images. -To read images, decode them, and rotate them in the pipeline, see https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L77 +To read images, decode them, and rotate them in the pipeline, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L77` + +.. code-block:: python + :caption: Perform Augmentations + + def rotated_pipeline(): + jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True) + images = fn.decoders.image(jpegs, device='cpu') -``` -def rotated_pipeline(): - jpegs, labels = fn.readers.file(file_root=image_dir, random_shuffle=True) - images = fn.decoders.image(jpegs, device='cpu') + # Rotate the decoded images at an angle of 10ᵒ and fill the remaining space + With black color (0) + rotated_images = fn.rotate(images, angle=10.0, fill_value=0) + return rotated_images, labels -# Rotate the decoded images at an angle of 10ᵒ and fill the remaining space -With black color (0) - rotated_images = fn.rotate(images, angle=10.0, fill_value=0) - return rotated_images, labels + pipe = rotated_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0) + pipe.build() -pipe = rotated_pipeline(batch_size=max_batch_size, num_threads=1, device_id=0) -pipe.build() -``` To run the pipeline, see: -``` -pipe_out = pipe.run() -images, labels = pipe_out -show_images(images) -``` +.. code-block:: python -## 4.3 rocAL Data Types + pipe_out = pipe.run() + images, labels = pipe_out + show_images(images) -All the rocAL data types are defined under [amd.rocal.types](https://github.com/ROCm/MIVisionX/blob/master/rocAL/rocAL_pybind/amd/rocal/types.py). Import this library in the application to access the various data types such as rocAL status, processing mode, tensor output type, image size evaluation policy, image color, tensor layout, decode device, resize scaling mode, and resize interpolation type. + +rocAL Data Types +========================= + +All the rocAL data types are defined under `amd.rocal.types `_. Import this library in the application to access the various data types such as rocAL status, processing mode, tensor output type, image size evaluation policy, image color, tensor layout, decode device, resize scaling mode, and resize interpolation type. Here are some of the commonly used rocAL data types: -- Processing modes: Values (GPU/CPU). Use the rocal_cpu argument in the pipeline to set the processing mode. - - rocal_cpu = True: This performs data loading on the CPU. If GPUs are heavily used for training, it is viable to create the data-loading pipeline using CPU. - - rocal_cpu = False: This performs data loading on the available GPU as specified using the device_id argument in the pipeline. -- Tensor output types: Values (NCHW/NHWC). Example: - - tensor_layout = types.NCHW - - tensor_layout = types.NHWC -- Tensor data types: Values (FLOAT/FLOAT16). Example: - - tensor_dtype = types.FLOAT - - tensor_dtype = types.FLOAT16 +* Processing modes: Values (GPU/CPU). Use the rocal_cpu argument in the pipeline to set the processing mode. + + * rocal_cpu = True: This performs data loading on the CPU. If GPUs are heavily used for training, it is viable to create the data-loading pipeline using CPU. + * rocal_cpu = False: This performs data loading on the available GPU as specified using the device_id argument in the pipeline. + +* Tensor output types: Values (NCHW/NHWC). Example: + + * tensor_layout = types.NCHW + * tensor_layout = types.NHWC -To see the usage of the above-mentioned data types, see https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/amd/rocal/pipeline.py#L97 +* Tensor data types: Values (FLOAT/FLOAT16). Example: -``` -def __init__(self, batch_size=-1, num_threads=-1, device_id=-1, seed=-1, + * tensor_dtype = types.FLOAT + * tensor_dtype = types.FLOAT16 + +To see the usage of the above-mentioned data types, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/amd/rocal/pipeline.py#L97` + +.. code-block:: python + + def __init__(self, batch_size=-1, num_threads=-1, device_id=-1, seed=-1, exec_pipelined=True, prefetch_queue_depth=2, exec_async=True, bytes_per_sample=0, rocal_cpu=False, max_streams=-1, default_cuda_stream_priority=0, tensor_layout = types.NCHW, reverse_channels = False, multiplier = [1.0,1.0,1.0], offset = [0.0, 0.0, 0.0], tensor_dtype=types.FLOAT): @@ -211,4 +285,4 @@ def __init__(self, batch_size=-1, num_threads=-1, device_id=-1, seed=-1, print("comes to gpu") self._handle = b.rocalCreate( batch_size, types.GPU, device_id, num_threads,prefetch_queue_depth,types.FLOAT) -``` + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..8d83db89f --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,60 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _rocal: + +******************************************************************** +rocAL documentation +******************************************************************** + +Deep learning applications require loading and pre-processing data efficiently to achieve high processing throughput. +This requires creating efficient processing pipelines fully utilizing the underlying hardware capabilities. +Some examples are load and decode data, and perform a variety of augmentations such as color-format conversions. Deep learning +frameworks require supporting multiple data formats and augmentations to adapt to a variety of data-sets and models. + +The ROCm Augmentation Library (rocAL) is designed to efficiently decode and process image and video pipelines from a +variety of storage formats. These pipelines are programmable by the user using both C++ and Python APIs. rocAL is +implemented in the `HIP programming language `_ and optimized for AMD's +latest discrete GPUs. + +The code is open and hosted at: https://github.com/ROCm/rocAL + +The rocAL documentation is structured as follows: + +.. grid:: 2 + :gutter: 3 + + .. grid-item-card:: Installation + + * :ref:`install` + + .. grid-item-card:: How-to + + * :ref:`overview` + * :ref:`architecture` + * :ref:`using-with-cpp` + * :ref:`using-with-python` + * :ref:`framework` + + .. grid-item-card:: Reference + + * `rocAL API Modules `_ + * `rocAL API `_ + * `rocAL Datatypes `_ + * `rocAL Augmentation API `_ + * `rocAL Data Loaders API `_ + * `rocAL Data Transfer API `_ + * `rocAL Info API `_ + * `rocAL Metadata API `_ + * `rocAL Parameter API `_ + * `rocAL Header Files `_ + + .. grid-item-card:: Tutorials + + * :ref:`examples` + +To contribute to the documentation refer to `Contributing to ROCm `_. + +You can find licensing information on the `Licensing `_ page. + diff --git a/docs/install/install.rst b/docs/install/install.rst new file mode 100644 index 000000000..7e6dca28d --- /dev/null +++ b/docs/install/install.rst @@ -0,0 +1,208 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _install: + +******************************************************************** +Installation +******************************************************************** + +This chapter provides information about the installation of rocAL and related packages. + +Prerequisites +============================= + +* Linux distribution + + - Ubuntu 20.04 or 22.04 + - CentOS 7 + - RedHat 8 or 9 + - SLES 15-SP4 + +* `ROCm supported hardware `_ +* Install ROCm with `amdgpu-install `_ with ``--usecase=graphics,rocm --no-32`` +* `RPP `_ +* `AMD OpenVX™ `_ and AMD OpenVX™ Extensions: ``VX_RPP`` and ``AMD Media`` - MIVisionX Components +* `Turbo JPEG `_ - Version 2.0.6.2 from ``https://github.com/rrawther/libjpeg-turbo.git`` +* `Half-precision floating-point `_ library - Version 1.12.0 or higher +* `Google Protobuf `_ - Version 3.12.4 or higher +* `LMBD Library `_ +* `RapidJSON `_ +* `PyBind11 `_ +* `HIP `_ +* OpenMP +* C++17 + +Installation instructions +================================ + +The installation process uses the following steps: + +* `ROCm supported hardware install `_ +* Install ROCm with `amdgpu-install `_ with ``--usecase=graphics,rocm --no-32`` +* Use either :ref:`package-install` or :ref:`source-install` as described below. + +.. _package-install: + +Package install +------------------------------- + +Install rocAL runtime, development, and test packages. + +* Runtime package - ``rocal`` only provides the dynamic libraries +* Development package - ``rocal-dev`` / ``rocal-devel`` provides the libraries, executables, header files, Python bindings, and samples +* Test package - ``rocal-test`` provides ctest to verify installation + +On Ubuntu +^^^^^^^^^^^^^^^ + +.. code-block:: shell + + sudo apt-get install rocal rocal-dev rocal-test + + +On CentOS/RedHat +^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: shell + + sudo yum install rocal rocal-devel rocal-test + + +On SLES +^^^^^^^^^^^^^^ + +.. code-block:: shell + + sudo zypper install rocal rocal-devel rocal-test + + +.. note:: + * Package install requires ``Turbo JPEG``, ``PyBind 11 v2.10.4`` and ``Protobuf V3.12.4`` manual install + * ``CentOS`` / ``RedHat`` / ``SLES`` requires ``FFMPEG Dev`` package manual install + +.. _source-install: + +Source Install +--------------------------- + +For your convenience the ``rocAL-setup.py`` setup script is provided for Linux installations. This script will install all the dependencies required for the rocAL API. + +.. note:: + This script only needs to be executed once. However, upgrading the ROCm version also requires rerunning the ``rocAL-setup.py`` script. + +The process for installing with the setup script is as follows: + +#. Clone rocAL source code + + .. code-block:: shell + + git clone https://github.com/ROCm/rocAL.git + +#. Use either flow depending on the backend: + + * :ref:`hip-backend` + * :ref:`opencl-backend` + + .. note:: + + rocAL supports two GPU backends: OpenCL and HIP + +Running the ``rocAL-setup.py`` setup script +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Prerequisites: + + * Linux distribution + + - Ubuntu 20.04 or 22.04 + - CentOS 7 + - RedHat 8 or 9 + - SLES 15-SP4 + + * `ROCm supported hardware `_ + * Install ROCm with `amdgpu-install `_ with ``--usecase=graphics,rocm --no-32`` + +Using ``rocAL-setup.py`` script: + +.. code-block:: python + + python rocAL-setup.py --directory [setup directory - optional (default:~/)] + --opencv [OpenCV Version - optional (default:4.6.0)] + --protobuf [ProtoBuf Version - optional (default:3.12.4)] + --pybind11 [PyBind11 Version - optional (default:v2.10.4)] + --reinstall [Remove previous setup and reinstall (default:OFF)[options:ON/OFF]] + --backend [rocAL Dependency Backend - optional (default:HIP) [options:OCL/HIP]] + --rocm_path [ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required] + + +.. _hip-backend: + +Instructions for building rocAL with the HIP GPU backend (default) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +1. Run the setup script to install all the dependencies required by the HIP GPU backend: + +.. code-block:: shell + + cd rocAL + python rocAL-setup.py + + +2. Run the following commands to build rocAL with the HIP GPU backend: + +.. code-block:: shell + + mkdir build-hip + cd build-hip + cmake ../ + make -j8 + sudo cmake --build . --target PyPackageInstall + sudo make install + + +3. Run tests - `test option instructions `_ + +.. code-block:: shell + + make test + + +.. note:: + * `PyPackageInstall` used for rocal_pybind installation + * `sudo` required for pybind installation + +.. _opencl-backend: + +Instructions for building rocAL with OpenCL GPU backend +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Find instructions on building rocAL for use with the OpenCL backend on `OPENCL GPU Backend `_. + +.. note:: + * rocAL_pybind is not supported on OPENCL backend + * rocAL cannot be installed for both GPU backends in the same default folder (i.e., ``/opt/rocm/``) + * If an app interested in installing rocAL with both GPU backends, then add ``-DCMAKE_INSTALL_PREFIX`` in the cmake commands to install rocAL with OPENCL and HIP backends into two separate custom folders. + +Verify installation +========================= + +The installer will copy: + + * Executables into ``/opt/rocm/bin`` + * Libraries into ``/opt/rocm/lib`` + * Header files into ``/opt/rocm/include/rocal`` + * Apps, & Samples folder into ``/opt/rocm/share/rocal`` + * Documents folder into ``/opt/rocm/share/doc/rocal`` + +Verify with ``rocal-test`` package +-------------------------------------------- + +Test package will install ctest module to test rocAL. Follow below steps to test package install + +.. code-block:: shell + + mkdir rocAL-test && cd rocAL-test + cmake /opt/rocm/share/rocal/test/ + ctest -VV + diff --git a/docs/license.md b/docs/license.md deleted file mode 100644 index 27dad28c7..000000000 --- a/docs/license.md +++ /dev/null @@ -1,4 +0,0 @@ -# License - -```{include} ../LICENSE.txt -``` diff --git a/docs/license.rst b/docs/license.rst new file mode 100644 index 000000000..b6712e13c --- /dev/null +++ b/docs/license.rst @@ -0,0 +1,11 @@ +.. meta:: + :description: rocAL documentation and API reference library + :keywords: rocAL, ROCm, API, documentation + +.. _license: + +******************************************************************** +License +******************************************************************** + +.. include:: ../LICENSE.txt \ No newline at end of file diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index cbfa51912..313992e13 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -1,23 +1,34 @@ -# Anywhere {branch} is used, the branch name will be substituted. -# These comments will also be removed. defaults: numbered: False -root: doxygen/html/index +#root: doxygen/html/index +root: index subtrees: -- caption: User Guide - entries: - - file: README - subtrees: - - entries: - - file: user_guide/ch1 - - file: user_guide/ch2 - - file: user_guide/ch3 - - file: user_guide/ch4 - - file: user_guide/ch5 - - file: user_guide/ch6 -- caption: Examples - entries: - - file: examples -- caption: About - entries: - - file: license +- entries: + - file: install/install.rst + title: Installation + - file: how-to/index.rst + title: How To + subtrees: + - entries: + - file: how-to/overview.rst + - file: how-to/architecture.rst + - file: how-to/using-with-cpp.rst + - file: how-to/using-with-python.rst + - file: how-to/framework.rst + - file: doxygen/html/modules + title: rocAL API Reference + subtrees: + - entries: + - file: doxygen/html/group__group__rocal + - file: doxygen/html/group__group__rocal__types + - file: doxygen/html/group__group__rocal__augmentations + - file: doxygen/html/group__group__rocal__data__loaders + - file: doxygen/html/group__group__rocal__data__transfer + - file: doxygen/html/group__group__rocal__info + - file: doxygen/html/group__group__rocal__meta__data + - file: doxygen/html/group__group__rocal__parameters + - file: doxygen/html/files + title: rocAL Header files + - file: examples.rst + title: Examples + - file: license.rst diff --git a/docs/user_guide/ch1.md b/docs/user_guide/ch1.md deleted file mode 100644 index 7ad19e5c6..000000000 --- a/docs/user_guide/ch1.md +++ /dev/null @@ -1,83 +0,0 @@ -# Chapter 1: Overview - -## 1.1 Overview - -The performance of Deep Learning applications depends upon the efficiency of performance pipelines that can load and preprocess data efficiently to provide a high throughput. The pipelines are typically used to perform tasks such as loading and decoding data, perform a variety of augmentations, perform color-format conversions, etc., before passing the data for training or inference. The Deep Learning frameworks also require the pipelines to support multiple data formats and augmentations to adapt to a variety of datasets and models. - -This can be achieved by creating processing pipelines that fully utilize the underlying hardware capabilities. - -ROCm™ Augmentation Library (rocAL™) allows the user to create hybrid pipelines to maximize the throughput for Machine Learning applications. It helps to create pipelines that can efficiently process images, videos, and a variety of storage formats. The user can program these pipelines using C or Python API. rocAL significantly accelerates data processing on AMD processors. - -To optimize the preprocessing pipeline, rocAL utilizes the following features: - -- Prefetching: Loads the data for the next batch while the existing batch is under process. This parallelization allows more batches to be processed in less time. -- Hybrid execution: Utilizes both the CPU and GPU simultaneously. For example, decoding the data on the CPU while running the training on the GPU.  -- Hardware decoding: Uses the AMD VCN and VA-API to efficiently decode data on the hardware.  -- Batch processing: Groups and processes the data together as a batch. - -![The Role of Pipelines in Deep Learning Applications](../data/ch1_pipelines.png) - -Figure 1. The Role of Pipelines in Deep Learning Applications - -## 1.2 Key Components - -- CPU- or GPU-based implementation for each augmentation and data_loader nodes -- Python and C APIs for easy integration and testing -- Multiple framework support and portable on PyTorch, TensorFlow, and MXNet -- Flexible graphs to help the user create custom pipelines -- Multicore host and multi-gpu execution for the graph -- Support for various augmentations such as fish-eye, water, gitter, non-linear blend, etc., using the AMD ROCm Performance Primitive (RPP) library -- Support for classification, object detection, segmentation, and keypoint data pipelines - -## 1.3 Third-party Integration - -rocAL provides support for many operators. The module imports are designed like other available data loaders for a smooth integration with training frameworks. The rocal_pybind package provides support for integrating with PyTorch, TensorFlow, and Caffe2. rocAL also supports many data formats such as FileReader, COCO Reader, TFRecordReader, and Lightning Memory-Mapped Database (LMDB), thus offering a unified approach to framework integration. - -## 1.4 rocAL Operators - -rocAL operators offer the flexibility to run on CPU or GPU for building hybrid pipelines. They also support classification and object detection on the workload. Some of the useful operators supported by rocAL are listed below: - -- Augmentations: These are used to enhance the data set by adding effects to the original images. To use the augmentations, import the instance of amd.rocal.fn. into the Python script. These augmentation APIs further call the RPP kernels underneath (HIP/HOST) depending on the backend used to build RPP and rocAL.  - -### Table 1. Augmentations Available through rocAL - -| Color Augmentations | Effects Augmentations | Geometry Augmentations | -| ------------------- | --------------------- | ------------------------------------- | -| Blend | Fog | Crop | -| Blur | Jitter | Crop Mirror Normalization | -| Brightness | Pixelization | Crop Resize | -| Color Temperature | Raindrops | Fisheye Lens | -| Color Twist | Snowflakes | Flip (Horizontal, Vertical, and Both) | -| Contrast | Salt and Pepper Noise | Lens Correction | -| Exposure | | Random Crop | -| Gamma | | Resize | -| Hue | | Resize Crop Mirror | -| Saturation | | Rotation | -| Vignette | | Warp Affine | - -- Readers: These are used to read and understand the different types of datasets and their metadata. Some examples of readers are list of files with folders, LMDB, TFRecord, and JSON file for metadata. To use the readers, import the instance of amd.rocal.readers into the Python script. - -### Table 2. Readers Available through rocAL - -| Readers | Description | -| --------------------------------------- | --------------------------------------------------- | -| File Reader | Reads images from a list of files in a folder(s) | -| Video Reader | Reads videos from a list of files in a folder(s) | -| Caffe LMDB Reader | Reads (key, value) pairs from Caffe LMDB | -| Caffe2 LMDB Reader | Reads (key, value) pairs from Caffe2 LMDB | -| COCO Reader – file source and keypoints | Reads images and JSON annotations from COCO dataset | -| TFRecord Reader | Reads from a TFRecord dataset | -| MXNet Reader | Reads from a RecordIO dataset | - -- Decoders: These are used to support different input formats of images and videos. Decoders extract data from the datasets that are in compressed formats such as JPEG, MP4, etc. To use the decoders, import the instance of amd.rocal.decoders into the Python script. - -### Table 3. Decoders Available through rocAL - -| Decoders | Description | -| ----------------- | -------------------------------------- | -| Image | Decodes JPEG images | -| Image_raw | Decodes images in raw format | -| Image_random_crop | Decodes and randomly crops JPEG images | -| Image_slice | Decodes and slices JPEG images | - -To see examples demonstrating the usage of decoders and readers, [click here](https://github.com/ROCm/MIVisionX/tree/master/rocAL/rocAL_pybind/examples) diff --git a/docs/user_guide/ch2.md b/docs/user_guide/ch2.md deleted file mode 100644 index 87aa1e19f..000000000 --- a/docs/user_guide/ch2.md +++ /dev/null @@ -1,21 +0,0 @@ -# Chapter 2: Architecture Components - -The rocAL architecture comprises rocAL Master-Graph and RPP as major components. - -## 2.1 rocAL Master-Graph - -The rocAL pipeline is built on top of rocAL Master-Graph. The architectural components of rocAL Master-Graph are described below: - -- Loader and Processing Modules: The rocAL Master-Graph consists of two main architectural components, a loader module to load data and a processing module to process data. The loader module is clearly separated from the processing module for a seamless execution without any blockages. The Prefetch queue helps to load data ahead of time and can be configured with user-defined parameters. The Output routine runs in parallel with the load routine, as both have separate queues for storing the result. - -![rocAL Master-Graph Architecture](../data/ch2_arch.png) - -Figure 2. rocAL Master-Graph Architecture - -- rocAL Pipeline: The rocAL pipeline holds great significance, as it contains all the information required to create a rocAL graph with data loader, augmentation nodes, and the output format. Once a rocAL pipeline is created, the user can build, run, and call an iterator to get the next batch of data into the pipeline. The user can install the rocAL pipeline using the rocAL Python package. It supports many operators for data loading and data augmentation. - -## 2.2 ROCm Performance Primitive Library - -RPP is a comprehensive high-performance computer vision library optimized for the AMD CPU and GPU with HIP and OpenCL backends. It is available under the AMD ROCm software platform. It provides low-level functionality for all rocAL operators for single, image, and tensor datatypes. RPP provides an extensive library for vision augmentations that includes vision functions, color augmentations, filter augmentations, geometric distortions, and a few more features. - -For more information on RPP along with the list of supported kernels, see https://github.com/ROCm/rpp. diff --git a/docs/user_guide/ch3.md b/docs/user_guide/ch3.md deleted file mode 100644 index aa86daa49..000000000 --- a/docs/user_guide/ch3.md +++ /dev/null @@ -1,42 +0,0 @@ -# Chapter 3: Installation - -This chapter provides information about the installation of rocAL and related packages. - -## 3.1 Prerequisites - -* Linux distribution -* [AMD RPP](https://github.com/ROCm/rpp) -* [AMD OpenVX™](https://github.com/ROCm/MIVisionX/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media` -* [Turbo JPEG](https://libjpeg-turbo.org/) - Version `2.0` or higher -* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher -* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher -* [LMBD Library](http://www.lmdb.tech/doc/) -* [RapidJSON](https://github.com/Tencent/rapidjson) -* [PyBind11](https://github.com/pybind/pybind11) - -## 3.2 Platform Support - -To see the list of supported platforms for rocAL, see the ROCm Installation Guide at https://docs.amd.com. - -## 3.3 Installing rocAL - -rocAL is shipped along with MIVisionX. To build and install the rocAL C++ library, follow the instructions given [here](https://github.com/ROCm/MIVisionX#build--install-mivisionx) - -## 3.4 Installing rocAL Python Package - -The rocAL Python package (rocal_pybind) is a separate redistributable wheel. rocal_pybind, which is created using Pybind11, enables data transfer between rocAL C++ API and Python API. With the help of rocal_pybind.so wrapper library, the rocAL functionality, which is primarily in C/C++, can be effectively used in Python. -The Python package supports PyTorch, TensorFlow, Caffe2, and data readers available for various formats such as FileReader, COCO Reader, TFRecord Reader, and CaffeReader. - -To build and install the Python package, see [rocAL python](https://github.com/ROCm/MIVisionX/tree/master/rocAL/rocAL_pybind). - -## 3.5 Installing rocAL Using Framework Dockers - -To test the rocAL Python APIs using PyTorch or TensorFlow, we recommend building a docker with rocAL and ROCm using any of the links below: - -- [rocAL PyTorch docker](https://github.com/ROCm/MIVisionX/tree/master/docker/pytorch) -- [rocAL TensorFlow docker](https://github.com/ROCm/MIVisionX/tree/master/docker/tensorflow) - -To use rocAL on Ubuntu, use the following dockers: - -- [rocAL on ubuntu20](https://github.com/ROCm/MIVisionX/blob/master/docker/mivisionx-on-ubuntu20.dockerfile) -- [rocAL on Ubuntu22](https://github.com/ROCm/MIVisionX/blob/master/docker/mivisionx-on-ubuntu22.dockerfile) diff --git a/docs/user_guide/ch5.md b/docs/user_guide/ch5.md deleted file mode 100644 index 8c4fd7d15..000000000 --- a/docs/user_guide/ch5.md +++ /dev/null @@ -1,164 +0,0 @@ -# Chapter 5: Framework Integration - -rocAL improves the pipeline efficiency by preprocessing the data and parallelizing the data loading on the CPU and running trainings on the GPU. To separate the data loading from the training, rocAL provides TensorFlow and PyTorch iterators and readers as a plugin. The integration process with PyTorch and TensorFlow is described in the sections below. - -## 5.1 PyTorch Integration - -This section demonstrates how to use rocAL with PyTorch for training. Follow the steps below to get started. - -## 5.1.1 Build PyTorch Docker - -Build a rocAL PyTorch docker by following the steps here. - -## 5.1.2 Create Data-loading Pipeline - -Follow these steps: - -1. Import libraries for [rocAL](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/test_training.py#L28). - -``` -from amd.rocal.plugin.pytorch import ROCALClassificationIterator -from amd.rocal.pipeline import Pipeline -import amd.rocal.fn as fn -import amd.rocal.types as types -``` - -2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the [pipeline](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/test_training.py#L38). - -``` -def trainPipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16): - pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, - rocal_cpu=rocal_cpu, tensor_dtype = types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, - prefetch_queue_depth = 7) - with pipe: - jpegs, labels = fn.readers.file(file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) - rocal_device = 'cpu' if rocal_cpu else 'gpu' - # decode = fn.decoders.image(jpegs, output_type=types.RGB,file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) - decode = fn.decoders.image_slice(jpegs, output_type=types.RGB, - file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) - res = fn.resize(decode, resize_x=224, resize_y=224) - flip_coin = fn.random.coin_flip(probability=0.5) - cmnp = fn.crop_mirror_normalize(res, device="gpu", - output_dtype=types.FLOAT, - output_layout=types.NCHW, - crop=(crop, crop), - mirror=flip_coin, - image_type=types.RGB, - mean=[0.485,0.456,0.406], - std=[0.229,0.224,0.225]) - if(one_hot): - _ = fn.one_hot(labels, num_classes) - pipe.set_outputs(cmnp) - print('rocal "{0}" variant'.format(rocal_device)) - return pipe -``` - -3. Import libraries for PyTorch. - -``` -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -``` - -4. Call the training pipeline with rocAL classification data [loader](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/test_training.py#L78). - -``` -Def get_pytorch_train_loader(self): - print(“in get_pytorch_train_loader function”) - pipe_train = trainPipeline(self.data_path, self.batch_size, self.num_classes, self.one_hot, self.local_rank, - self.world_size, self.num_thread, self.crop, self.rocal_cpu, self.fp16) - pipe_train.build() - train_loader = ROCALClassificationIterator(pipe_train, device=”cpu” if self.rocal_cpu else “cuda”, device_id = self.local_rank) -``` - -5. Run the [training](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/test_training.py#L179). - -``` -# Training loop - for epoch in range(10): # loop over the dataset multiple times - print(“\n epoch:: “,epoch) - running_loss = 0.0 - - for i, (inputs,labels) in enumerate(train_loader, 0): - - sys.stdout.write(“\r Mini-batch “ + str(i)) - # print(“Images”,inputs) - # print(“Labels”,labels) - inputs, labels = inputs.to(device), labels.to(device) -``` - -6. Run the training as shown [here](https://github.com/ROCm/rocAL/tree/master/docs/examples/pytorch). - -To see a sample training script, click [here](https://github.com/ROCm/rocAL/tree/master/docs/examples/pytorch). - -## 5.2 TensorFlow Integration - -This section demonstrates how to use rocAL with TensorFlow for training. Follow the steps below to get started. - -## 5.2.1 Build TensorFlow Docker - -Build a rocAL TensorFlow docker by following the steps here. - -## 5.2.2 Create Data-loading Pipeline - -Follow these steps: - -1. Import libraries for [rocAL](https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/tf_petsTrainingExample/train_withROCAL_withTFRecordReader.py#L22). - -``` -from amd.rocal.plugin.tf import ROCALIterator -from amd.rocal.pipeline import Pipeline -import amd.rocal.fn as fn -import amd.rocal.types as types -``` - -2. See a rocAL pipeline for TensorFlow below. It reads data from the TFRecords using TFRecord Reader and uses fn.decoders.image to decode the raw [images](https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/tf_petsTrainingExample/train_withROCAL_withTFRecordReader.py#L128). - -``` -trainPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=1, rocal_cpu=RUN_ON_HOST, tensor_layout = types.NHWC) - with trainPipe: - inputs = fn.readers.tfrecord(path=TRAIN_RECORDS_DIR, index_path = "", reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap, - features={ - 'image/encoded':tf.io.FixedLenFeature((), tf.string, ""), - 'image/class/label':tf.io.FixedLenFeature([1], tf.int64, -1), - 'image/filename':tf.io.FixedLenFeature((), tf.string, "") - } - ) - jpegs = inputs["image/encoded"] - images = fn.decoders.image(jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=TRAIN_RECORDS_DIR) - resized = fn.resize(images, resize_x=crop_size[0], resize_y=crop_size[1]) - flip_coin = fn.random.coin_flip(probability=0.5) - cmn_images = fn.crop_mirror_normalize(resized, crop=(crop_size[1], crop_size[0]), - mean=[0,0,0], - std=[255,255,255], - mirror=flip_coin, - output_dtype=types.FLOAT, - output_layout=types.NHWC, - pad_output=False) - trainPipe.set_outputs(cmn_images) -trainPipe.build() -``` - -3. Import libraries for [TensorFlow](https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/tf_petsTrainingExample/train_withROCAL_withTFRecordReader.py#L174). - -``` -import tensorflow.compat.v1 as tf -tf.compat.v1.disable_v2_behavior() -import tensorflow_hub as hub -Call the train pipeline - trainIterator = ROCALIterator(trainPipe) -Run the training Session - i = 0 - with tf.compat.v1.Session(graph = train_graph) as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - while i < NUM_TRAIN_STEPS: - - - for t, (train_image_ndArray, train_label_ndArray) in enumerate(trainIterator, 0): - train_label_one_hot_list = get_label_one_hot(train_label_ndArray) -``` - -4. Run the training as shown [here](https://github.com/ROCm/MIVisionX/tree/master/rocAL/rocAL_pybind/examples/tf_petsTrainingExample). - -To see a sample training script, click [here](https://github.com/ROCm/MIVisionX/tree/master/rocAL/rocAL_pybind/examples/tf_petsTrainingExample). diff --git a/docs/user_guide/ch6.md b/docs/user_guide/ch6.md deleted file mode 100644 index 5c164cc03..000000000 --- a/docs/user_guide/ch6.md +++ /dev/null @@ -1,112 +0,0 @@ -# Chapter 6: Using with C++ API - -This chapter explains how to create a pipeline and add augmentations using C++ APIs directly. The Python APIs also call these C++ APIs internally using the Python pybind utility as explained in the section Installing rocAL Python Package. - -## 6.1 C++ Common APIs - -The following sections list the commonly used C++ APIs. - -## 6.1.1 rocalCreate - -Use: To create the pipeline - -Returns: The context for the pipeline - -Arguments: - -- RocalProcessMode: Defines whether rocal data loading should be on the CPU or [GPU](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api_types.h#L91) - -``` -RocalProcessMode:: ROCAL_PROCESS_GPU -RocalProcessMode::ROCAL_PROCESS_CPU -``` - -- RocalTensorOutputType: Defines whether the output of rocal tensor is FP32 or [FP16](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api_types.h#L124) - -``` -RocalTensorOutputType::ROCAL_FP32 -RocalTensorOutputType::ROCAL_FP16 -``` - -[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L41): - -``` -extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); -``` - -## 6.1.2 rocalVerify - -Use: To verify the graph for all the inputs and outputs - -Returns: A status code indicating the success or failure - -[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L47): - -``` -extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); -``` - -## 6.1.3 rocalRun - -Use: To process and run the built and verified graph - -Returns: A status code indicating the success or failure - -[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L52): - -``` -extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); -``` - -## 6.1.4 rocalRelease - -Use: To free all the resources allocated during the graph creation process - -Returns: A status code indicating the success or failure - -[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L57): - -``` -extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context); -``` - -## 6.1.5 Image Augmentation Using C++ API - -The example below shows how to create a pipeline, read JPEG images, perform certain augmentations on them, and show the output using OpenCV by utilizing C++ [APIs](https://github.com/ROCm/MIVisionX/blob/develop/apps/image_augmentation/image_augmentation.cpp#L103). - -``` -Auto handle = rocalCreate(inputBatchSize, processing_device?RocalProcessMode::ROCAL_PROCESS_GPU:RocalProcessMode::ROCAL_PROCESS_CPU, 0,1); -input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false, ROCAL_USE_USER_GIVEN_SIZE, decode_width, decode_height, dec_type); - -image0 = rocalResize(handle, input1, resize_w, resize_h, true); - -RocalImage image1 = rocalRain(handle, image0, false); - - - RocalImage image11 = rocalFishEye(handle, image1, false); - - - rocalRotate(handle, image11, true, rand_angle); - - - // Creating successive blur nodes to simulate a deep branch of augmentations - RocalImage image2 = rocalCropResize(handle, image0, resize_w, resize_h, false, rand_crop_area);; - for(int i = 0 ; i < aug_depth; i++) - { - image2 = rocalBlurFixed(handle, image2, 17.25, (i == (aug_depth -1)) ? true:false ); - } -// Calling the API to verify and build the augmentation graph - if(rocalVerify(handle) != ROCAL_OK) - { - std::cout << "Could not verify the augmentation graph" << std::endl; - return -1; - } - -while (!rocalIsEmpty(handle)) - { - if(rocalRun(handle) != 0) - break; -} -``` - -To see a sample image augmentation application in C++, click [here](https://github.com/ROCm/MIVisionX/tree/develop/apps/image_augmentation).