For X86 and AMD machines, we can create a pip based dlio installations (

#66) * added basic build script. * refactored code and build setup * refactored code build of dlio * fixed pytest * fixed pytest * fixed installation script. * Fixed comments and strings to use correct paths. * Fixes for installation of dlio 1. Fixed setup to install post processor. 2. Fixed CI to not set PYTHONPATH * fixed test dependencies * fixed ci scripts to use executable created from the installer * fixed test dependencies * fixed path of config * Fixed Readme to use new installation methodology * fixed ppc env * fixed ppc env * fixed application * Refactored code for better build 1. created setup.py 2. fix root drectory to dlio_benchmark 3. renamed dlio_benchmark.py to main.py 4. renamed dlio_postprocessor.py to postprocessor.py 5. fixed documentation to use dlio_benchmark and dlio_postprocessor entry points.
argonne-lcf · Jun 20, 2023 · 9f5a8db · 9f5a8db
1 parent fabdfd7
commit 9f5a8db
Show file tree

Hide file tree

Showing 85 changed files with 338 additions and 342 deletions.
diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ ubuntu-latest ]
+        os: [ ubuntu-20.04 ]
         profiler: [ DEFAULT, DLIO_PROFILER ]
         gcc: [10]
     name: ${{ matrix.os }}-${{ matrix.profiler }}-${{ matrix.gcc }}
@@ -35,23 +35,19 @@ jobs:
         sudo apt-get install $CC $CXX libc6 
         sudo apt-get install mpich
         python -m pip install --upgrade pip
-        pip install --upgrade --upgrade-strategy eager -r dev-requirements.txt
+        pip install .[test]
         if [[ $DLIO_PROFILER == 'DLIO_PROFILER' ]]; then
+          sudo apt-get install libhwloc-dev
           git clone https://github.com/hariharan-devarajan/dlio-profiler /home/runner/work/dlio_profiler
           cd /home/runner/work/dlio_profiler
           git submodule update --init --recursive
-          pushd external/GOTCHA
-          git apply ../gotcha_glibc_workaround.patch 
-          popd
           mkdir build
           cd build
           cmake ../
           sudo make install -j
         fi
     - name: test_gen_data
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[png-tensorflow] -v
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[npz-tensorflow] -v
@@ -60,14 +56,10 @@ jobs:
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_gen_data[hdf5-tensorflow] -v
     - name: test_custom_storage_root_gen_data
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k  test_storage_root_gen_data  -v
     - name: test_train
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[png-tensorflow-tensorflow]
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[npz-tensorflow-tensorflow]
@@ -92,52 +84,36 @@ jobs:
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_train[csv-pytorch-dali]
     - name: test_custom_storage_root_train
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_custom_storage_root_train -v
     - name: test_checkpoint_epoch
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k test_checkpoint_epoch -v
     - name: test_checkpoint_step
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k  test_checkpoint_step -v
     - name: test_eval
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k  test_eval -v
     - name: test_multi_threads
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
         RDMAV_FORK_SAFE=1 mpirun -np 2 pytest -k  test_multi_threads -v
     - name: test-tf-loader-tfrecord
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
-        RDMAV_FORK_SAFE=1 mpirun -np 2 python ./src/dlio_benchmark.py workload=resnet50 ++workload.dataset.num_files_train=64 ++workload.workflow.train=False ++workload.workflow.generate_data=True  ++workload.dataset.num_files_train=16 ++workload.dataset.num_samples_per_file=16
-        RDMAV_FORK_SAFE=1 mpirun -np 2 python ./src/dlio_benchmark.py workload=resnet50 ++workload.dataset.num_files_train=64 ++workload.workflow.train=True ++workload.workflow.generate_data=False  ++workload.dataset.num_files_train=16 ++workload.dataset.num_samples_per_file=16
+        RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=resnet50 ++workload.dataset.num_files_train=64 ++workload.workflow.train=False ++workload.workflow.generate_data=True  ++workload.dataset.num_files_train=16 ++workload.dataset.num_samples_per_file=16
+        RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=resnet50 ++workload.dataset.num_files_train=64 ++workload.workflow.train=True ++workload.workflow.generate_data=False  ++workload.dataset.num_files_train=16 ++workload.dataset.num_samples_per_file=16
     - name: test-torch-loader-npz
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
-        RDMAV_FORK_SAFE=1 mpirun -np 2 python ./src/dlio_benchmark.py workload=unet3d ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2 ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
-        RDMAV_FORK_SAFE=1 mpirun -np 2 python ./src/dlio_benchmark.py workload=unet3d ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2  ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
+        RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2 ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
+        RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2  ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
     - name: test-tf-loader-npz
       run: |
-        touch __init__.py
-        export PYTHONPATH=./:$PYTHONPATH
         export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
-        RDMAV_FORK_SAFE=1 mpirun -np 2 python ./src/dlio_benchmark.py workload=unet3d ++workload.framework=tensorflow ++workload.data_reader.data_loader=tensorflow ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2  ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
-        RDMAV_FORK_SAFE=1 mpirun -np 2 python ./src/dlio_benchmark.py workload=unet3d ++workload.framework=tensorflow ++workload.data_reader.data_loader=tensorflow ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2  ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
+        RDMAV_FORK_SAFE=1 mpirun -np 2 dlio_benchmark workload=unet3d ++workload.framework=tensorflow ++workload.data_reader.data_loader=tensorflow ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=False ++workload.workflow.generate_data=True ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2  ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
+        RDMAV_FORK_SAFE=1 mpirun -np 2  dlio_benchmark workload=unet3d ++workload.framework=tensorflow ++workload.data_reader.data_loader=tensorflow ++workload.train.computation_time=0.05 ++workload.evaluation.eval_time=0.01 ++workload.train.epochs=2 ++workload.workflow.train=True ++workload.workflow.generate_data=False ++workload.dataset.num_files_train=16 ++workload.dataset.num_files_eval=16 ++workload.reader.read_threads=2  ++workload.dataset.record_length=4096 ++workload.dataset.record_length_stdev=0
diff --git a/.gitignore b/.gitignore
@@ -156,3 +156,4 @@ dmypy.json
 #Apple system files
 .DS_Store
 /.idea/
+/venv-quartz/
diff --git a/Dockerfile b/Dockerfile
@@ -10,6 +10,4 @@ RUN apt-get update && \
     apt-get install -y mpich
 
 RUN python -m pip install --upgrade pip
-RUN pip install  -r requirements.txt
-
-ENV PYTHONPATH="${PYTHONPATH}:/workspace/dlio"
+RUN pip install .
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include requirements.txt
+recursive-include configs *
diff --git a/README.md b/README.md
@@ -13,9 +13,8 @@ DLIO is an I/O benchmark for Deep Learning. DLIO is aimed at emulating the I/O b
 ```bash
 git clone https://github.com/argonne-lcf/dlio_benchmark
 cd dlio_benchmark/
-pip install -r requirements.txt
-export PYTHONPATH=$PWD/:$PYTHONPATH
-python ./src/dlio_benchmark.py ++workload.workflow.generate_data=True
+pip install .
+dlio_benchmark ++workload.workflow.generate_data=True
 ```
 Additionally, to generate the report `iostat` is needed and can be installed from the `sysstat` package using your package manager.
 
@@ -25,37 +24,31 @@ Additionally, to generate the report `iostat` is needed and can be installed fro
 git clone https://github.com/argonne-lcf/dlio_benchmark
 cd dlio_benchmark/
 docker build -t dlio .
-docker run -t dlio python ./src/dlio_benchmark.py ++workload.workflow.generate_data=True
+docker run -t dlio dlio_benchmark ++workload.workflow.generate_data=True
 ``` 
 
 You can also pull rebuilt container from docker hub (might not reflect the most recent change of the code): 
 ```bash
 docker docker.io/zhenghh04/dlio:latest
-docker run -t docker.io/zhenghh04/dlio:latest python ./src/dlio_benchmark.py ++workload.workflow.generate_data=True
+docker run -t docker.io/zhenghh04/dlio:latest python ./dlio_benchmark/main.py ++workload.workflow.generate_data=True
 ```
 
 One can also run interactively inside the container
 ```bash
 docker run -t docker.io/zhenghh04/dlio:latest /bin/bash
-root@30358dd47935:/workspace/dlio$ python ./src/dlio_benchmark.py ++workload.workflow.generate_data=True
+root@30358dd47935:/workspace/dlio$ python ./dlio_benchmark/main.py ++workload.workflow.generate_data=True
 ```
 
 ## PowerPC
 PowerPC requires installation through anaconda.
 ```bash
 # Setup required channels
-conda config --prepend channels conda-forge
 conda config --prepend channels https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
 
 # create and activate environment
 conda env create --prefix ./dlio_env_ppc --file environment-ppc.yaml --force
 conda activate ./dlio_env_ppc
-
-# install hydra dependency by source
-mkdir external
-cd external
-git clone git@github.com:facebookresearch/hydra.git
-cd hydra
+# install other dependencies
 python -m pip install .
 ```
 
@@ -72,20 +65,20 @@ One can specify the workload through the ```workload=``` option on the command l
 
 First, generate the data
   ```bash
-  mpirun -np 8 python3 src/dlio_benchmark.py workload=unet3d ++workload.workflow.generate_data=True ++workload.workflow.train=False
+  mpirun -np 8 dlio_benchmark workload=unet3d ++workload.workflow.generate_data=True ++workload.workflow.train=False
   ```
 If possible, one can flush the filesystem caches in order to properly capture device I/O
   ```bash
   sudo sync && echo 3 | sudo tee /proc/sys/vm/drop_caches
   ```
 Finally, run the benchmark with ```iostat``` profiling, listing the io devices you would like to trace.
   ```bash
-  mpirun -np 8 python3 src/dlio_benchmark.py workload=unet3d ++workload.workflow.profiling=True ++workload.profiling.profiler=iostat ++workload.profiling.iostat_devices=[sda,sdb]
+  mpirun -np 8 dlio_benchmark workload=unet3d ++workload.workflow.profiling=True ++workload.profiling.profiler=iostat ++workload.profiling.iostat_devices=[sda,sdb]
   ```
 
 All the outputs will be stored in ```hydra_log/unet3d/$DATE-$TIME``` folder. To post process the data, one can do
 ```bash 
-python3 src/dlio_postprocessor.py --output-folder hydra_log/unet3d/$DATE-$TIME
+dlio_postprocessor --output-folder hydra_log/unet3d/$DATE-$TIME
 ```
 This will generate ```DLIO_$model_report.txt``` in the output folder. 
 

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -59,3 +59,4 @@ pytest-mpi
 pytest-subtests
 pytest-timeout
 nvidia-dali-cuda110
+psutil
diff --git a/src/common/data_structures.py → dlio_benchmark/__init__.py b/src/common/data_structures.py → dlio_benchmark/__init__.py
diff --git a/dlio_benchmark/common/__init__.py b/dlio_benchmark/common/__init__.py
diff --git a/src/common/constants.py → dlio_benchmark/common/constants.py b/src/common/constants.py → dlio_benchmark/common/constants.py
diff --git a/dlio_benchmark/common/data_structures.py b/dlio_benchmark/common/data_structures.py
diff --git a/src/common/enumerations.py → dlio_benchmark/common/enumerations.py b/src/common/enumerations.py → dlio_benchmark/common/enumerations.py
diff --git a/src/common/error_code.py → dlio_benchmark/common/error_code.py b/src/common/error_code.py → dlio_benchmark/common/error_code.py
diff --git a/dlio_benchmark/computation/__init__.py b/dlio_benchmark/computation/__init__.py
diff --git a/src/computation/asynchronous_computation.py → ...k/computation/asynchronous_computation.py b/src/computation/asynchronous_computation.py → ...k/computation/asynchronous_computation.py
@@ -16,7 +16,7 @@
 '''
 
 
-from src.computation.computation_handler import ComputationHandler
+from dlio_benchmark.computation.computation_handler import ComputationHandler
 
 
 class AsyncComputation(ComputationHandler):

diff --git a/src/computation/computation_factory.py → ...chmark/computation/computation_factory.py b/src/computation/computation_factory.py → ...chmark/computation/computation_factory.py
@@ -15,11 +15,11 @@
    limitations under the License.
 '''   
 
-from src.common.enumerations import ComputationType
-from src.common.error_code import ErrorCodes
-from src.computation.asynchronous_computation import AsyncComputation
-from src.computation.no_computation import NoComputation
-from src.computation.synchronous_computation import SyncComputation
+from dlio_benchmark.common.enumerations import ComputationType
+from dlio_benchmark.common.error_code import ErrorCodes
+from dlio_benchmark.computation.asynchronous_computation import AsyncComputation
+from dlio_benchmark.computation.no_computation import NoComputation
+from dlio_benchmark.computation.synchronous_computation import SyncComputation
 
 
 class ComputationFactory(object):

diff --git a/src/computation/computation_handler.py → ...chmark/computation/computation_handler.py b/src/computation/computation_handler.py → ...chmark/computation/computation_handler.py
diff --git a/src/computation/no_computation.py → dlio_benchmark/computation/no_computation.py b/src/computation/no_computation.py → dlio_benchmark/computation/no_computation.py
@@ -15,7 +15,7 @@
    limitations under the License.
 """
 
-from src.computation.computation_handler import ComputationHandler
+from dlio_benchmark.computation.computation_handler import ComputationHandler
 
 
 class NoComputation(ComputationHandler):

diff --git a/src/computation/synchronous_computation.py → ...rk/computation/synchronous_computation.py b/src/computation/synchronous_computation.py → ...rk/computation/synchronous_computation.py
@@ -15,7 +15,7 @@
    limitations under the License.
 """
 
-from src.computation.computation_handler import ComputationHandler
+from dlio_benchmark.computation.computation_handler import ComputationHandler
 
 
 class SyncComputation(ComputationHandler):

diff --git a/dlio_benchmark/configs/__init__.py b/dlio_benchmark/configs/__init__.py
diff --git a/configs/config.yaml → dlio_benchmark/configs/config.yaml b/configs/config.yaml → dlio_benchmark/configs/config.yaml
diff --git a/configs/hydra/help/dlio_benchmark_help.yaml → ...nfigs/hydra/help/dlio_benchmark_help.yaml b/configs/hydra/help/dlio_benchmark_help.yaml → ...nfigs/hydra/help/dlio_benchmark_help.yaml
@@ -26,13 +26,13 @@ template: |-
 
   DLIO - an IO benchmark for deep learning applications. 
 
-  Running the benchmark: python src/dlio_benchmark.py workload=unet3d
+  Running the benchmark: dlio_benchmark workload=unet3d
 
   One can select the workload configuration using "workload={WORKLOAD}". 
   The corresponding YAML file is ./configs/workload/{WORKLOAD}.yaml folder. 
   Available choise for $APP_CONFIG_GROUPS
   One can override everything in the command line, for example:
-  python src/dlio_benchmark.py workload.framework=tensorflow
+  dlio_benchmark workload.framework=tensorflow
 
   One can also create a custom YAML file for a specific workload. 
   An example of a YAML file is as follows. 

diff --git a/configs/hydra/job_logging/custom.yaml → ...ark/configs/hydra/job_logging/custom.yaml b/configs/hydra/job_logging/custom.yaml → ...ark/configs/hydra/job_logging/custom.yaml
diff --git a/configs/workload/bert.yaml → dlio_benchmark/configs/workload/bert.yaml b/configs/workload/bert.yaml → dlio_benchmark/configs/workload/bert.yaml
diff --git a/configs/workload/cosmoflow.yaml → ...benchmark/configs/workload/cosmoflow.yaml b/configs/workload/cosmoflow.yaml → ...benchmark/configs/workload/cosmoflow.yaml
diff --git a/configs/workload/default.yaml → dlio_benchmark/configs/workload/default.yaml b/configs/workload/default.yaml → dlio_benchmark/configs/workload/default.yaml
diff --git a/configs/workload/resnet50.yaml → ..._benchmark/configs/workload/resnet50.yaml b/configs/workload/resnet50.yaml → ..._benchmark/configs/workload/resnet50.yaml
diff --git a/configs/workload/unet3d.yaml → dlio_benchmark/configs/workload/unet3d.yaml b/configs/workload/unet3d.yaml → dlio_benchmark/configs/workload/unet3d.yaml
diff --git a/dlio_benchmark/data_generator/__init__.py b/dlio_benchmark/data_generator/__init__.py
diff --git a/src/data_generator/csv_generator.py → ...benchmark/data_generator/csv_generator.py b/src/data_generator/csv_generator.py → ...benchmark/data_generator/csv_generator.py
@@ -15,16 +15,16 @@
    limitations under the License.
 """
 
-from src.common.enumerations import Compression
-from src.data_generator.data_generator import DataGenerator
+from dlio_benchmark.common.enumerations import Compression
+from dlio_benchmark.data_generator.data_generator import DataGenerator
 import math
 import os
 
 import numpy as np
 import csv
 
 from shutil import copyfile
-from src.utils.utility import progress
+from dlio_benchmark.utils.utility import progress
 import pandas as pd
 
 """

diff --git a/src/data_generator/data_generator.py → ...enchmark/data_generator/data_generator.py b/src/data_generator/data_generator.py → ...enchmark/data_generator/data_generator.py
@@ -17,14 +17,14 @@
 
 from abc import ABC, abstractmethod
 
-from src.utils.config import ConfigArguments
-from src.storage.storage_factory import StorageFactory
+from dlio_benchmark.utils.config import ConfigArguments
+from dlio_benchmark.storage.storage_factory import StorageFactory
 import math
 from mpi4py import MPI
 from shutil import copyfile
 import numpy as np
 import logging
-from src.utils.utility import utcnow, add_padding
+from dlio_benchmark.utils.utility import utcnow, add_padding
 
 
 class DataGenerator(ABC):

diff --git a/src/data_generator/generator_factory.py → ...hmark/data_generator/generator_factory.py b/src/data_generator/generator_factory.py → ...hmark/data_generator/generator_factory.py
@@ -15,8 +15,8 @@
    limitations under the License.
 """
 
-from src.common.enumerations import FormatType
-from src.common.error_code import ErrorCodes
+from dlio_benchmark.common.enumerations import FormatType
+from dlio_benchmark.common.error_code import ErrorCodes
 
 
 
@@ -27,22 +27,22 @@ def __init__(self):
     @staticmethod
     def get_generator(type):
         if type == FormatType.TFRECORD:
-            from src.data_generator.tf_generator import TFRecordGenerator
+            from dlio_benchmark.data_generator.tf_generator import TFRecordGenerator
             return TFRecordGenerator()
         elif type == FormatType.HDF5:
-            from src.data_generator.hdf5_generator import HDF5Generator
+            from dlio_benchmark.data_generator.hdf5_generator import HDF5Generator
             return HDF5Generator()
         elif type == FormatType.CSV:
-            from src.data_generator.csv_generator import CSVGenerator
+            from dlio_benchmark.data_generator.csv_generator import CSVGenerator
             return CSVGenerator()
         elif type == FormatType.NPZ:
-            from src.data_generator.npz_generator import NPZGenerator
+            from dlio_benchmark.data_generator.npz_generator import NPZGenerator
             return NPZGenerator()
         elif type == FormatType.JPEG:
-            from src.data_generator.jpeg_generator import JPEGGenerator
+            from dlio_benchmark.data_generator.jpeg_generator import JPEGGenerator
             return JPEGGenerator()
         elif type == FormatType.PNG:
-            from src.data_generator.png_generator import PNGGenerator
+            from dlio_benchmark.data_generator.png_generator import PNGGenerator
             return PNGGenerator()
         else:
             raise Exception(str(ErrorCodes.EC1001))
diff --git a/src/data_generator/hdf5_generator.py → ...enchmark/data_generator/hdf5_generator.py b/src/data_generator/hdf5_generator.py → ...enchmark/data_generator/hdf5_generator.py
@@ -18,12 +18,12 @@
 import h5py
 import numpy as np
 
-from src.common.enumerations import Compression
-from src.data_generator.data_generator import DataGenerator
-from src.utils.utility import progress, Profile
+from dlio_benchmark.common.enumerations import Compression
+from dlio_benchmark.data_generator.data_generator import DataGenerator
+from dlio_benchmark.utils.utility import progress, Profile
 from shutil import copyfile
 
-from src.common.constants import MODULE_DATA_GENERATOR
+from dlio_benchmark.common.constants import MODULE_DATA_GENERATOR
 
 dlp = Profile(MODULE_DATA_GENERATOR)
 

diff --git a/src/data_generator/jpeg_generator.py → ...enchmark/data_generator/jpeg_generator.py b/src/data_generator/jpeg_generator.py → ...enchmark/data_generator/jpeg_generator.py
@@ -15,16 +15,16 @@
    limitations under the License.
 """
 
-from src.common.enumerations import Compression
-from src.data_generator.data_generator import DataGenerator
+from dlio_benchmark.common.enumerations import Compression
+from dlio_benchmark.data_generator.data_generator import DataGenerator
 
 import logging
 import numpy as np
 
-from src.utils.utility import progress, utcnow, Profile
+from dlio_benchmark.utils.utility import progress, utcnow, Profile
 from shutil import copyfile
 import PIL.Image as im
-from src.common.constants import MODULE_DATA_GENERATOR
+from dlio_benchmark.common.constants import MODULE_DATA_GENERATOR
 
 
 dlp = Profile(MODULE_DATA_GENERATOR)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		include requirements.txt
		recursive-include configs *
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,7 +16,7 @@ @@
     '''
-    from src.computation.computation_handler import ComputationHandler
+    from dlio_benchmark.computation.computation_handler import ComputationHandler
     class AsyncComputation(ComputationHandler):
@@ Expand Down @@