Skip to content

Commit

Permalink
Revert "Last Batch Policy changes for file source reader (ROCm#182)"
Browse files Browse the repository at this point in the history
This reverts commit 87348ad.
  • Loading branch information
swetha097 committed Nov 12, 2024
1 parent e72ed10 commit b494d4e
Show file tree
Hide file tree
Showing 40 changed files with 457 additions and 698 deletions.
135 changes: 60 additions & 75 deletions rocAL/include/api/rocal_api_data_loaders.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion rocAL/include/api/rocal_api_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ extern "C" TimingInfo ROCAL_API_CALL rocalGetTimingInfo(RocalContext rocal_conte
* \brief Retrieves the information about the size of the last batch.
* \ingroup group_rocal_info
* \param rocal_context
* \return The number of samples that were padded in the last batch in adherence with last_batch_policy and pad_last_batch_repeated.
* \return The number of samples that were padded in the last batch in adherence with last_batch_policy and last_batch_padded
*/
extern "C" size_t ROCAL_API_CALL rocalGetLastBatchPaddedSize(RocalContext rocal_context);

Expand Down
1 change: 0 additions & 1 deletion rocAL/include/api/rocal_api_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ class rocalTensor {
virtual ~rocalTensor() = default;
virtual void* buffer() = 0;
virtual unsigned copy_data(void* user_buffer, RocalOutputMemType external_mem_type = ROCAL_MEMCPY_HOST) = 0;
virtual unsigned copy_data(void* user_buffer, uint x_offset, uint y_offset, uint max_cols, uint max_rows) = 0; // Copy only the ROI to the user_buffer [The padded region is not copied]
virtual unsigned num_of_dims() = 0;
virtual unsigned batch_size() = 0;
virtual std::vector<size_t> dims() = 0;
Expand Down
38 changes: 19 additions & 19 deletions rocAL/include/api/rocal_api_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ enum RocalMelScaleFormula {
ROCAL_MELSCALE_HTK
};

/*! \brief Tensor Last Batch Policy Type enum
/*! \brief Tensor Last Batch Policies
* \ingroup group_rocal_types
*/
enum RocalLastBatchPolicy {
Expand All @@ -448,7 +448,7 @@ enum RocalLastBatchPolicy {
/*! \brief ROCAL_LAST_BATCH_DROP - The last batch is dropped if there are not enough samples from the current epoch.
*/
ROCAL_LAST_BATCH_DROP = 1,
/*! \brief ROCAL_LAST_BATCH_PARTIAL - The last batch is partially filled with the remaining data from the current epoch, keeping the rest of the samples empty. (currently this policy works similar to FILL in rocAL, PARTIAL policy needs to be handled in the python iterator)
/*! \brief ROCAL_LAST_BATCH_PARTIAL - The last batch is partially filled with the remaining data from the current epoch, keeping the rest of the samples empty. (currently this policy works similar to FILL in rocAL, PARTIAL policy needs to be handled from python end)
*/
ROCAL_LAST_BATCH_PARTIAL = 2
};
Expand All @@ -468,22 +468,22 @@ enum RocalMissingComponentsBehaviour {
ROCAL_EMPTY = 2
};

/*! \brief rocAL RocalShardingInfo enum
* \ingroup group_rocal_types
*/
struct RocalShardingInfo {
RocalLastBatchPolicy last_batch_policy;
bool pad_last_batch_repeated;
bool stick_to_shard;
int32_t shard_size;

// Constructor with default values
RocalShardingInfo()
: last_batch_policy(RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL),
pad_last_batch_repeated(true),
stick_to_shard(true),
shard_size(-1)
{}
};
// /*! \brief rocAL RocalShardingInfo enum
// * \ingroup group_rocal_types
// */
// struct RocalShardingInfo {
// RocalLastBatchPolicy last_batch_policy;
// bool pad_last_batch_repeated;
// bool stick_to_shard;
// int32_t shard_size;

// // Constructor with default values
// RocalShardingInfo()
// : last_batch_policy(RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL),
// pad_last_batch_repeated(true),
// stick_to_shard(true),
// shard_size(-1)
// {}
// };

#endif // MIVISIONX_ROCAL_API_TYPES_H
1 change: 0 additions & 1 deletion rocAL/include/loaders/audio/audio_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ class AudioLoader : public LoaderModule {
void feed_external_input(const std::vector<std::string>& input_images_names, const std::vector<unsigned char*>& input_buffer,
const std::vector<ROIxywh>& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels,
ExternalSourceFileMode mode, bool eos) override { THROW("external source feed is not supported in audio loader") }
size_t last_batch_padded_size() override;

private:
bool is_out_of_data();
Expand Down
1 change: 0 additions & 1 deletion rocAL/include/loaders/audio/audio_loader_sharded.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class AudioLoaderSharded : public LoaderModule {
void feed_external_input(const std::vector<std::string>& input_images_names, const std::vector<unsigned char*>& input_buffer,
const std::vector<ROIxywh>& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels,
ExternalSourceFileMode mode, bool eos) override { THROW("external source feed is not supported in audio loader") }
size_t last_batch_padded_size() override;

private:
void increment_loader_idx();
Expand Down
1 change: 0 additions & 1 deletion rocAL/include/loaders/audio/audio_read_and_decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ class AudioReadAndDecode {
const size_t max_decoded_channels);
// returns timing info or other status information
Timing GetTiming();
size_t last_batch_padded_size(); // The number of padded samples in the last batch

private:
std::vector<std::shared_ptr<AudioDecoder>> _decoder;
Expand Down
4 changes: 1 addition & 3 deletions rocAL/include/loaders/audio/node_audio_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ class AudioLoaderNode : public Node {
/// \param load_batch_count Defines the quantum count of the Audios to be loaded. It's usually equal to the user's batch size.
/// \param mem_type Memory type, host or device
/// \param meta_data_reader Determines the meta-data information
/// \param sharding_info The members of ShardingInfo determines how the data is distributed among the shards and how the last batch is processed by the pipeline.
/// The loader will repeat Audios if necessary to be able to have Audios in multiples of the load_batch_count,
/// for example if there are 10 Audios in the dataset and load_batch_count is 3, the loader repeats 2 Audios as if there are 12 Audios available.
void Init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path,
const std::string &file_list_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop,
size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader,
const ShardingInfo& sharding_info);
size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader);
std::shared_ptr<LoaderModule> GetLoaderModule();

protected:
Expand Down
4 changes: 1 addition & 3 deletions rocAL/include/loaders/audio/node_audio_loader_single_shard.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,11 @@ class AudioLoaderSingleShardNode : public Node {
/// \param load_batch_count Defines the quantum count of the Audios to be loaded. It's usually equal to the user's batch size.
/// \param mem_type Memory type, host or device
/// \param meta_data_reader Determines the meta-data information
/// \param sharding_info The members of ShardingInfo determines how the data is distributed among the shards and how the last batch is processed by the pipeline.
/// The loader will repeat Audios if necessary to be able to have Audios in multiples of the load_batch_count,
/// for example if there are 10 Audios in the dataset and load_batch_count is 3, the loader repeats 2 Audios as if there are 12 Audios available.
void Init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path,
const std::string &file_list_path, StorageType storage_type, DecoderType decoder_type, bool shuffle,
bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader,
const ShardingInfo& sharding_info);
bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader);
std::shared_ptr<LoaderModule> GetLoaderModule();

protected:
Expand Down
2 changes: 1 addition & 1 deletion rocAL/include/loaders/image/node_fused_jpeg_crop.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class FusedJpegCropNode : public Node {
/// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available.
void init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, StorageType storage_type,
DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader,
unsigned num_attempts, std::vector<float> &random_area, std::vector<float> &random_aspect_ratio, const ShardingInfo& sharding_info = ShardingInfo(), const std::string &index_path = "");
unsigned num_attempts, std::vector<float> &random_area, std::vector<float> &random_aspect_ratio, std::pair<RocalBatchPolicy, bool> last_batch_info = {RocalBatchPolicy::FILL, true}, const std::string &index_path = "");

std::shared_ptr<LoaderModule> get_loader_module();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class FusedJpegCropSingleShardNode : public Node {
/// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available.
void init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, StorageType storage_type,
DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader,
unsigned num_attempts, std::vector<float> &random_area, std::vector<float> &random_aspect_ratio, const ShardingInfo& sharding_info = ShardingInfo(), const std::string &index_path = "");
unsigned num_attempts, std::vector<float> &random_area, std::vector<float> &random_aspect_ratio, std::pair<RocalBatchPolicy, bool> last_batch_info = {RocalBatchPolicy::FILL, true}, const std::string &index_path = "");

std::shared_ptr<LoaderModule> get_loader_module();

Expand Down
2 changes: 1 addition & 1 deletion rocAL/include/loaders/image/node_image_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ImageLoaderNode : public Node {
/// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count,
/// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available.
void init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, const std::map<std::string, std::string> feature_key_map, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop,
size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader, bool decoder_keep_orig = false, const ShardingInfo& sharding_info = ShardingInfo(), const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0, ExternalSourceFileMode external_file_mode = ExternalSourceFileMode::NONE, const std::string &index_path = "");
size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader, bool decoder_keep_orig = false, std::pair<RocalBatchPolicy, bool> last_batch_info = {RocalBatchPolicy::FILL, true}, const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0, ExternalSourceFileMode external_file_mode = ExternalSourceFileMode::NONE, const std::string &index_path = "");

std::shared_ptr<LoaderModule> get_loader_module();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ImageLoaderSingleShardNode : public Node {
/// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count,
/// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available.
void init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type,
bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader, bool decoder_keep_orig = false, const ShardingInfo& sharding_info = ShardingInfo(),
bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr<MetaDataReader> meta_data_reader, bool decoder_keep_orig = false, std::pair<RocalBatchPolicy, bool> last_batch_info = {RocalBatchPolicy::FILL, true},
const std::map<std::string, std::string> feature_key_map = std::map<std::string, std::string>(), unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0, ExternalSourceFileMode external_file_mode = ExternalSourceFileMode::NONE, const std::string &index_path = "");

std::shared_ptr<LoaderModule> get_loader_module();
Expand Down
1 change: 0 additions & 1 deletion rocAL/include/meta_data/meta_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,4 @@ class MetaDataReader {
virtual ImgSize lookup_image_size(const std::string& image_name) { return {}; }
virtual void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { return; }
virtual bool get_aspect_ratio_grouping() const { return {}; }
virtual std::vector<std::string> get_relative_file_path() { return {}; } // Returns the relative file_path's of the reader
};
2 changes: 0 additions & 2 deletions rocAL/include/meta_data/text_file_meta_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class TextFileMetaDataReader : public MetaDataReader {
bool set_timestamp_mode() override { return false; }

const std::map<std::string, std::shared_ptr<MetaData>>& get_map_content() override { return _map_content; }
std::vector<std::string> get_relative_file_path() override { return _relative_file_path; }
TextFileMetaDataReader();

private:
Expand All @@ -46,5 +45,4 @@ class TextFileMetaDataReader : public MetaDataReader {
void add(std::string image_name, int label);
std::map<std::string, std::shared_ptr<MetaData>> _map_content;
std::string _path;
std::vector<std::string> _relative_file_path {};
};
4 changes: 2 additions & 2 deletions rocAL/include/pipeline/commons.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ struct Timing {
long long unsigned video_process_time= 0;
};

/*! \brief Tensor Last Batch Policy Type enum
/*! \brief Tensor Last Batch Policies
These policies the last batch policies determine the behavior when there are not enough samples in the epoch to fill the last batch
FILL - The last batch is filled by either repeating the last sample or by wrapping up the data set.
DROP - The last batch is dropped if it cannot be fully filled with data from the current epoch.
PARTIAL - The last batch is partially filled with the remaining data from the current epoch, keeping the rest of the samples empty. (currently this policy works similar to FILL in rocAL, PARTIAL policy needs to be handled in the pytorch iterator)
PARTIAL - The last batch is partially filled with the remaining data from the current epoch, and padding the remaining samples with either last image or wrapping up the dataset - the padded images are removed in the python end
*/
enum RocalBatchPolicy {
FILL = 0,
Expand Down
2 changes: 1 addition & 1 deletion rocAL/include/pipeline/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ class Tensor : public rocalTensor {
#endif
unsigned copy_data(void* user_buffer, RocalOutputMemType external_mem_type) override;
//! Copying the output buffer with specified max_cols and max_rows values for the 2D buffer of size batch_size
unsigned copy_data(void* user_buffer, uint x_offset, uint y_offset, uint max_rows, uint max_cols);
unsigned copy_data(void* user_buffer, uint max_rows, uint max_cols);
//! Default destructor
/*! Releases the OpenVX Tensor object */
~Tensor();
Expand Down
41 changes: 16 additions & 25 deletions rocAL/include/readers/file_source_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ THE SOFTWARE.
#include <vector>

#include "pipeline/commons.h"
#include "pipeline/timing_debug.h"
#include "readers/image/image_reader.h"
#include "pipeline/timing_debug.h"

class FileSourceReader : public Reader {
public:
Expand Down Expand Up @@ -67,11 +67,9 @@ class FileSourceReader : public Reader {

FileSourceReader();

size_t last_batch_padded_size() override; // The size of the number of samples padded in the last batch

std::string get_root_folder_path() override; // Returns the root folder path
//! Returns the number of images in the last batch
size_t last_batch_padded_size() override;

std::vector<std::string> get_file_paths_from_meta_data_reader() override; // Returns the relative file path from the meta-data reader
private:
//! opens the folder containnig the images
Reader::Status open_folder();
Expand All @@ -85,37 +83,30 @@ class FileSourceReader : public Reader {
unsigned _curr_file_idx;
FILE *_current_fPtr;
unsigned _current_file_size;
unsigned _shard_start_idx;
std::vector<unsigned> _shard_start_idx_vector, _shard_end_idx_vector;
std::string _last_id;
std::string _last_file_name, _last_file_path, _absolute_file_path;
std::string _last_file_name, _last_file_path;
size_t _shard_id = 0;
size_t _shard_count = 1; // equivalent of batch size
int32_t _shard_size = -1;
size_t _batch_size = 1;
size_t _padded_samples = 0;
//!< _batch_count Defines the quantum count of the images to be read. It's usually equal to the user's batch size.
/// The loader will repeat images if necessary to be able to have images available in multiples of the load_batch_count,
/// for instance if there are 10 images in the dataset and _batch_count is 3, the loader repeats 2 images as if there are 12 images available.
size_t _batch_count = 1;
size_t _file_id = 0;
size_t _in_batch_read_count = 0;
bool _loop;
bool _shuffle;
int _read_counter = 0;
//!< _file_count_all_shards total_number of files in to figure out the max_batch_size (usually needed for distributed training).
size_t _file_count_all_shards;
void incremenet_read_ptr();
void increment_curr_file_idx();
int release();
size_t get_file_shard_id();
void incremenet_file_id() { _file_id++; }
void fill_last_batch();
void replicate_last_batch_to_pad_partial_shard();
std::shared_ptr<MetaDataReader> _meta_data_reader = nullptr;
//! Pair containing the last batch policy and pad_last_batch_repeated values for deciding what to do with last batch
ShardingInfo _last_batch_info = ShardingInfo(); // The members of ShardingInfo determines how the data is distributed among the shards and how the last batch is processed by the pipeline.
size_t _last_batch_padded_size = 0; // The size of number of padded samples in the last batch
size_t _num_padded_samples = 0; //! Number of samples that are padded in the last batch which would differ for each shard.
bool _stick_to_shard = false;
bool _pad_last_batch_repeated = false;
Reader::Status generate_file_names(); // Function that would generate _file_names containing all the samples in the dataset
void compute_start_and_end_idx_of_all_shards(); // Start Idx of all the Shards
size_t get_dataset_size(); // DataSet Size
size_t actual_shard_size_without_padding(); // Actual Number of Files present in the shard (without padding)
size_t largest_shard_size_without_padding(); // The size of the shard having largest files (without padding)
//!< Used to advance to the next shard's data to increase the entropy of the data seen by the pipeline>
void increment_shard_id();
//! Pair containing the last batch policy and last_batch_padded values for deciding what to do with last batch
std::pair<RocalBatchPolicy, bool> _last_batch_info;
size_t _last_batch_padded_size = 0;
Reader::Status generate_file_names();
};
Loading

0 comments on commit b494d4e

Please sign in to comment.