Skip to content

Commit

Permalink
Support training without lstmf files
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Apr 4, 2024
1 parent 912deb3 commit a4a22c0
Showing 1 changed file with 26 additions and 1 deletion.
27 changes: 26 additions & 1 deletion src/ccstruct/imagedata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@

#include <allheaders.h> // for pixDestroy, pixGetHeight, pixGetWidth, lept_...

#include <cinttypes> // for PRId64
#include <cinttypes> // for PRId64
#include <fstream> // for std::ifstream

namespace tesseract {

Expand Down Expand Up @@ -546,6 +547,30 @@ bool DocumentData::ReCachePages() {
delete page;
}
pages_.clear();
#if !defined(TESSERACT_IMAGEDATA_AS_PIX)
if (document_name_.ends_with("png")) {
// PNG image given instead of LSTMF file.
std::string gt_name = document_name_.substr(0, document_name_.length() - 3) + "gt.txt";
std::ifstream t(gt_name);
std::string line;
std::getline(t, line);
t.close();
ImageData *image_data = ImageData::Build(document_name_.c_str(), 0, "", nullptr, 0, line.c_str(), nullptr);
Image image = pixRead(document_name_.c_str());
image_data->SetPix(image);
pages_.push_back(image_data);
loaded_pages = 1;
pages_offset_ %= loaded_pages;
set_total_pages(loaded_pages);
set_memory_used(memory_used() + image_data->MemoryUsed());
if (true) {
tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(),
loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(),
document_name_.c_str());
}
return !pages_.empty();
}
#endif
TFile fp;
if (!fp.Open(document_name_.c_str(), reader_) ||
!fp.DeSerializeSize(&loaded_pages) || loaded_pages <= 0) {
Expand Down

0 comments on commit a4a22c0

Please sign in to comment.