diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index 19ddb0d15a..01b7e8cecf 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -401,27 +401,6 @@ class PointerVector : public GenericVector { GenericVector::truncate(size); } - // Compact the vector by deleting elements for which delete_cb returns - // true. delete_cb is a permanent callback and will be deleted. - void compact(std::function delete_cb) { - int new_size = 0; - int old_index = 0; - // Until the callback returns true, the elements stay the same. - while (old_index < GenericVector::size_used_ && - !delete_cb(GenericVector::data_[old_index++])) { - ++new_size; - } - // Now just copy anything else that gets false from delete_cb. - for (; old_index < GenericVector::size_used_; ++old_index) { - if (!delete_cb(GenericVector::data_[old_index])) { - GenericVector::data_[new_size++] = GenericVector::data_[old_index]; - } else { - delete GenericVector::data_[old_index]; - } - } - GenericVector::size_used_ = new_size; - } - // Clear the array, calling the clear callback function if any. // All the owned callbacks are also deleted. // If you don't want the callbacks to be deleted, before calling clear, set diff --git a/src/training/common/trainingsampleset.cpp b/src/training/common/trainingsampleset.cpp index cfc3c5fe05..1dc74e128c 100644 --- a/src/training/common/trainingsampleset.cpp +++ b/src/training/common/trainingsampleset.cpp @@ -82,12 +82,15 @@ TrainingSampleSet::TrainingSampleSet(const FontInfoTable &font_table) , fontinfo_table_(font_table) {} TrainingSampleSet::~TrainingSampleSet() { + for (auto sample : samples_) { + delete sample; + } delete font_class_array_; } // Writes to the given file. Returns false in case of error. bool TrainingSampleSet::Serialize(FILE *fp) const { - if (!samples_.Serialize(fp)) + if (!tesseract::Serialize(fp, samples_)) return false; if (!unicharset_.save_to_file(fp)) return false; @@ -106,7 +109,7 @@ bool TrainingSampleSet::Serialize(FILE *fp) const { // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool TrainingSampleSet::DeSerialize(bool swap, FILE *fp) { - if (!samples_.DeSerialize(swap, fp)) + if (!tesseract::DeSerialize(swap, fp, samples_)) return false; num_raw_samples_ = samples_.size(); if (!unicharset_.load_from_file(fp)) @@ -498,17 +501,26 @@ void TrainingSampleSet::KillSample(TrainingSample *sample) { // Deletes all samples with zero features marked by KillSample. void TrainingSampleSet::DeleteDeadSamples() { using namespace std::placeholders; // for _1 - samples_.compact(std::bind(&TrainingSampleSet::DeleteableSample, this, _1)); + auto old_it = samples_.begin(); + for (; old_it < samples_.end(); ++old_it) { + if (*old_it == nullptr || (*old_it)->class_id() < 0) { + break; + } + } + auto new_it = old_it; + for (; old_it < samples_.end(); ++old_it) { + if (*old_it == nullptr || (*old_it)->class_id() < 0) { + delete *old_it; + } else { + *new_it = *old_it; + ++new_it; + } + } + samples_.resize(new_it - samples_.begin() + 1); num_raw_samples_ = samples_.size(); // Samples must be re-organized now we have deleted a few. } -// Callback function returns true if the given sample is to be deleted, due -// to having a negative classid. -bool TrainingSampleSet::DeleteableSample(const TrainingSample *sample) { - return sample == nullptr || sample->class_id() < 0; -} - // Construct an array to access the samples by font,class pair. void TrainingSampleSet::OrganizeByFontAndClass() { // Font indexes are sparse, so we used a map to compact them, so we can diff --git a/src/training/common/trainingsampleset.h b/src/training/common/trainingsampleset.h index da5def5fe5..0329f68e83 100644 --- a/src/training/common/trainingsampleset.h +++ b/src/training/common/trainingsampleset.h @@ -172,10 +172,6 @@ class TrainingSampleSet { // must be called after as the samples have been renumbered. void DeleteDeadSamples(); - // Callback function returns true if the given sample is to be deleted, due - // to having a negative classid. - bool DeleteableSample(const TrainingSample *sample); - // Construct an array to access the samples by font,class pair. void OrganizeByFontAndClass(); @@ -254,7 +250,7 @@ class TrainingSampleSet { std::vector distance_cache; }; - PointerVector samples_; + std::vector samples_; // Number of samples before replication/randomization. int num_raw_samples_; // Character set we are training for.