From e38b98468d87d2a57484c8d062c4015c0f076dab Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Mon, 19 Nov 2018 20:21:41 -0500 Subject: [PATCH 1/9] Add support for ALTO output --- src/api/baseapi.cpp | 4537 +++++++++++++++++++------------------ src/api/baseapi.h | 1692 +++++++------- src/api/capi.cpp | 44 +- src/api/capi.h | 96 +- src/api/renderer.cpp | 426 ++-- src/api/renderer.h | 362 +-- src/api/tesseractmain.cpp | 868 +++---- tessdata/configs/alto | 1 + 8 files changed, 4148 insertions(+), 3878 deletions(-) create mode 100644 tessdata/configs/alto diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 4caf4428f8..05f04a443c 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -94,34 +94,34 @@ BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin"); namespace tesseract { /** Minimum sensible image size to be worth running tesseract. */ -const int kMinRectSize = 10; + const int kMinRectSize = 10; /** Character returned when Tesseract couldn't recognize as anything. */ -const char kTesseractReject = '~'; + const char kTesseractReject = '~'; /** Character used by UNLV error counter as a reject. */ -const char kUNLVReject = '~'; + const char kUNLVReject = '~'; /** Character used by UNLV as a suspect marker. */ -const char kUNLVSuspect = '^'; + const char kUNLVSuspect = '^'; /** * Filename used for input image file, from which to derive a name to search * for a possible UNLV zone file, if none is specified by SetInputName. */ -const char* kInputFile = "noname.tif"; + const char* kInputFile = "noname.tif"; /** * Temp file used for storing current parameters before applying retry values. */ -const char* kOldVarsFile = "failed_vars.txt"; + const char* kOldVarsFile = "failed_vars.txt"; /** Max string length of an int. */ -const int kMaxIntSize = 22; + const int kMaxIntSize = 22; /* Add all available languages recursively. */ -static void addAvailableLanguages(const STRING &datadir, const STRING &base, - GenericVector* langs) -{ - const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; - const size_t extlen = sizeof(kTrainedDataSuffix); + static void addAvailableLanguages(const STRING &datadir, const STRING &base, + GenericVector* langs) + { + const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; + const size_t extlen = sizeof(kTrainedDataSuffix); #ifdef _WIN32 - WIN32_FIND_DATA data; + WIN32_FIND_DATA data; HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); if (handle != INVALID_HANDLE_VALUE) { BOOL result = TRUE; @@ -146,83 +146,83 @@ static void addAvailableLanguages(const STRING &datadir, const STRING &base, FindClose(handle); } #else // _WIN32 - DIR* dir = opendir((datadir + base).string()); - if (dir != nullptr) { - dirent *de; - while ((de = readdir(dir))) { - char *name = de->d_name; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - struct stat st; - if (stat((datadir + base2 + name).string(), &st) == 0 && - (st.st_mode & S_IFDIR) == S_IFDIR) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); + DIR* dir = opendir((datadir + base).string()); + if (dir != nullptr) { + dirent *de; + while ((de = readdir(dir))) { + char *name = de->d_name; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + struct stat st; + if (stat((datadir + base2 + name).string(), &st) == 0 && + (st.st_mode & S_IFDIR) == S_IFDIR) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } + } } } + closedir(dir); } - } - closedir(dir); - } #endif -} + } // Compare two STRING values (used for sorting). -static int CompareSTRING(const void* p1, const void* p2) { - const STRING* s1 = static_cast(p1); - const STRING* s2 = static_cast(p2); - return strcmp(s1->c_str(), s2->c_str()); -} - -TessBaseAPI::TessBaseAPI() - : tesseract_(nullptr), - osd_tesseract_(nullptr), - equ_detect_(nullptr), - reader_(nullptr), - // Thresholder is initialized to nullptr here, but will be set before use by: - // A constructor of a derived API, SetThresholder(), or - // created implicitly when used in InternalSetImage. - thresholder_(nullptr), - paragraph_models_(nullptr), - block_list_(nullptr), - page_res_(nullptr), - input_file_(nullptr), - output_file_(nullptr), - datapath_(nullptr), - language_(nullptr), - last_oem_requested_(OEM_DEFAULT), - recognition_done_(false), - truth_cb_(nullptr), - rect_left_(0), - rect_top_(0), - rect_width_(0), - rect_height_(0), - image_width_(0), - image_height_(0) { - const char *locale; - locale = std::setlocale(LC_ALL, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_CTYPE, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_NUMERIC, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); -} - -TessBaseAPI::~TessBaseAPI() { - End(); -} + static int CompareSTRING(const void* p1, const void* p2) { + const STRING* s1 = static_cast(p1); + const STRING* s2 = static_cast(p2); + return strcmp(s1->c_str(), s2->c_str()); + } + + TessBaseAPI::TessBaseAPI() + : tesseract_(nullptr), + osd_tesseract_(nullptr), + equ_detect_(nullptr), + reader_(nullptr), + // Thresholder is initialized to nullptr here, but will be set before use by: + // A constructor of a derived API, SetThresholder(), or + // created implicitly when used in InternalSetImage. + thresholder_(nullptr), + paragraph_models_(nullptr), + block_list_(nullptr), + page_res_(nullptr), + input_file_(nullptr), + output_file_(nullptr), + datapath_(nullptr), + language_(nullptr), + last_oem_requested_(OEM_DEFAULT), + recognition_done_(false), + truth_cb_(nullptr), + rect_left_(0), + rect_top_(0), + rect_width_(0), + rect_height_(0), + image_width_(0), + image_height_(0) { + const char *locale; + locale = std::setlocale(LC_ALL, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + locale = std::setlocale(LC_CTYPE, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + locale = std::setlocale(LC_NUMERIC, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + } + + TessBaseAPI::~TessBaseAPI() { + End(); + } /** * Returns the version identifier as a static string. Do not delete. */ -const char* TessBaseAPI::Version() { - return PACKAGE_VERSION; -} + const char* TessBaseAPI::Version() { + return PACKAGE_VERSION; + } /** * If compiled with OpenCL AND an available OpenCL @@ -232,13 +232,13 @@ const char* TessBaseAPI::Version() { * otherwise *device=nullptr and returns 0. */ #ifdef USE_OPENCL -#ifdef USE_DEVICE_SELECTION + #ifdef USE_DEVICE_SELECTION #include "opencl_device_selection.h" #endif #endif -size_t TessBaseAPI::getOpenCLDevice(void **data) { + size_t TessBaseAPI::getOpenCLDevice(void **data) { #ifdef USE_OPENCL -#ifdef USE_DEVICE_SELECTION + #ifdef USE_DEVICE_SELECTION ds_device device = OpenclDevice::getDeviceSelection(); if (device.type == DS_DEVICE_OPENCL_DEVICE) { *data = new cl_device_id; @@ -248,17 +248,17 @@ size_t TessBaseAPI::getOpenCLDevice(void **data) { #endif #endif - *data = nullptr; - return 0; -} + *data = nullptr; + return 0; + } /** * Writes the thresholded image to stderr as a PBM file on receipt of a * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). */ -void TessBaseAPI::CatchSignals() { + void TessBaseAPI::CatchSignals() { #ifdef __linux__ - struct sigaction action; + struct sigaction action; memset(&action, 0, sizeof(action)); action.sa_handler = &signal_exit; action.sa_flags = SA_RESETHAND; @@ -266,81 +266,81 @@ void TessBaseAPI::CatchSignals() { sigaction(SIGFPE, &action, nullptr); sigaction(SIGBUS, &action, nullptr); #else - // Warn API users that an implementation is needed. - tprintf("CatchSignals has no non-linux implementation!\n"); + // Warn API users that an implementation is needed. + tprintf("CatchSignals has no non-linux implementation!\n"); #endif -} + } /** * Set the name of the input file. Needed only for training and * loading a UNLV zone file. */ -void TessBaseAPI::SetInputName(const char* name) { - if (input_file_ == nullptr) - input_file_ = new STRING(name); - else - *input_file_ = name; -} + void TessBaseAPI::SetInputName(const char* name) { + if (input_file_ == nullptr) + input_file_ = new STRING(name); + else + *input_file_ = name; + } /** Set the name of the output files. Needed only for debugging. */ -void TessBaseAPI::SetOutputName(const char* name) { - if (output_file_ == nullptr) - output_file_ = new STRING(name); - else - *output_file_ = name; -} - -bool TessBaseAPI::SetVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, - tesseract_->params()); -} - -bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, - tesseract_->params()); -} - -bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { - IntParam *p = ParamUtils::FindParam( - name, GlobalParams()->int_params, tesseract_->params()->int_params); - if (p == nullptr) return false; - *value = (int32_t)(*p); - return true; -} - -bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { - BoolParam *p = ParamUtils::FindParam( - name, GlobalParams()->bool_params, tesseract_->params()->bool_params); - if (p == nullptr) return false; - *value = (BOOL8)(*p); - return true; -} - -const char *TessBaseAPI::GetStringVariable(const char *name) const { - StringParam *p = ParamUtils::FindParam( - name, GlobalParams()->string_params, tesseract_->params()->string_params); - return (p != nullptr) ? p->string() : nullptr; -} - -bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { - DoubleParam *p = ParamUtils::FindParam( - name, GlobalParams()->double_params, tesseract_->params()->double_params); - if (p == nullptr) return false; - *value = (double)(*p); - return true; -} + void TessBaseAPI::SetOutputName(const char* name) { + if (output_file_ == nullptr) + output_file_ = new STRING(name); + else + *output_file_ = name; + } + + bool TessBaseAPI::SetVariable(const char* name, const char* value) { + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, + tesseract_->params()); + } + + bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, + tesseract_->params()); + } + + bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { + IntParam *p = ParamUtils::FindParam( + name, GlobalParams()->int_params, tesseract_->params()->int_params); + if (p == nullptr) return false; + *value = (int32_t)(*p); + return true; + } + + bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { + BoolParam *p = ParamUtils::FindParam( + name, GlobalParams()->bool_params, tesseract_->params()->bool_params); + if (p == nullptr) return false; + *value = (BOOL8)(*p); + return true; + } + + const char *TessBaseAPI::GetStringVariable(const char *name) const { + StringParam *p = ParamUtils::FindParam( + name, GlobalParams()->string_params, tesseract_->params()->string_params); + return (p != nullptr) ? p->string() : nullptr; + } + + bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { + DoubleParam *p = ParamUtils::FindParam( + name, GlobalParams()->double_params, tesseract_->params()->double_params); + if (p == nullptr) return false; + *value = (double)(*p); + return true; + } /** Get value of named variable as a string, if it exists. */ -bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { - return ParamUtils::GetParamAsString(name, tesseract_->params(), val); -} + bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { + return ParamUtils::GetParamAsString(name, tesseract_->params(), val); + } /** Print Tesseract parameters to the given file. */ -void TessBaseAPI::PrintVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params()); -} + void TessBaseAPI::PrintVariables(FILE *fp) const { + ParamUtils::PrintParams(fp, tesseract_->params()); + } /** * The datapath must be the name of the data directory (no ending /) or @@ -350,90 +350,90 @@ void TessBaseAPI::PrintVariables(FILE *fp) const { * be returned. * @return: 0 on success and -1 on initialization failure. */ -int TessBaseAPI::Init(const char* datapath, const char* language, - OcrEngineMode oem, char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params) { - return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, - vars_values, set_only_non_debug_params, nullptr); -} + int TessBaseAPI::Init(const char* datapath, const char* language, + OcrEngineMode oem, char **configs, int configs_size, + const GenericVector *vars_vec, + const GenericVector *vars_values, + bool set_only_non_debug_params) { + return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, + vars_values, set_only_non_debug_params, nullptr); + } // In-memory version reads the traineddata file directly from the given // data[data_size] array. Also implements the version with a datapath in data, // flagged by data_size = 0. -int TessBaseAPI::Init(const char* data, int data_size, const char* language, - OcrEngineMode oem, char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_non_debug_params, FileReader reader) { - PERF_COUNT_START("TessBaseAPI::Init") - // Default language is "eng". - if (language == nullptr) language = "eng"; - STRING datapath = data_size == 0 ? data : language; - // If the datapath, OcrEngineMode or the language have changed - start again. - // Note that the language_ field stores the last requested language that was - // initialized successfully, while tesseract_->lang stores the language - // actually used. They differ only if the requested language was nullptr, in - // which case tesseract_->lang is set to the Tesseract default ("eng"). - if (tesseract_ != nullptr && - (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || - last_oem_requested_ != oem || - (*language_ != language && tesseract_->lang != language))) { - delete tesseract_; - tesseract_ = nullptr; - } - // PERF_COUNT_SUB("delete tesseract_") + int TessBaseAPI::Init(const char* data, int data_size, const char* language, + OcrEngineMode oem, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, + bool set_only_non_debug_params, FileReader reader) { + PERF_COUNT_START("TessBaseAPI::Init") + // Default language is "eng". + if (language == nullptr) language = "eng"; + STRING datapath = data_size == 0 ? data : language; + // If the datapath, OcrEngineMode or the language have changed - start again. + // Note that the language_ field stores the last requested language that was + // initialized successfully, while tesseract_->lang stores the language + // actually used. They differ only if the requested language was nullptr, in + // which case tesseract_->lang is set to the Tesseract default ("eng"). + if (tesseract_ != nullptr && + (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || + last_oem_requested_ != oem || + (*language_ != language && tesseract_->lang != language))) { + delete tesseract_; + tesseract_ = nullptr; + } + // PERF_COUNT_SUB("delete tesseract_") #ifdef USE_OPENCL - OpenclDevice od; + OpenclDevice od; od.InitEnv(); #endif - PERF_COUNT_SUB("OD::InitEnv()") - bool reset_classifier = true; - if (tesseract_ == nullptr) { - reset_classifier = false; - tesseract_ = new Tesseract; - if (reader != nullptr) reader_ = reader; - TessdataManager mgr(reader_); - if (data_size != 0) { - mgr.LoadMemBuffer(language, data, data_size); - } - if (tesseract_->init_tesseract( - datapath.string(), - output_file_ != nullptr ? output_file_->string() : nullptr, - language, oem, configs, configs_size, vars_vec, vars_values, - set_only_non_debug_params, &mgr) != 0) { - return -1; - } - } + PERF_COUNT_SUB("OD::InitEnv()") + bool reset_classifier = true; + if (tesseract_ == nullptr) { + reset_classifier = false; + tesseract_ = new Tesseract; + if (reader != nullptr) reader_ = reader; + TessdataManager mgr(reader_); + if (data_size != 0) { + mgr.LoadMemBuffer(language, data, data_size); + } + if (tesseract_->init_tesseract( + datapath.string(), + output_file_ != nullptr ? output_file_->string() : nullptr, + language, oem, configs, configs_size, vars_vec, vars_values, + set_only_non_debug_params, &mgr) != 0) { + return -1; + } + } - PERF_COUNT_SUB("update tesseract_") - // Update datapath and language requested for the last valid initialization. - if (datapath_ == nullptr) - datapath_ = new STRING(datapath); - else - *datapath_ = datapath; - if ((strcmp(datapath_->string(), "") == 0) && - (strcmp(tesseract_->datadir.string(), "") != 0)) - *datapath_ = tesseract_->datadir; - - if (language_ == nullptr) - language_ = new STRING(language); - else - *language_ = language; - last_oem_requested_ = oem; + PERF_COUNT_SUB("update tesseract_") + // Update datapath and language requested for the last valid initialization. + if (datapath_ == nullptr) + datapath_ = new STRING(datapath); + else + *datapath_ = datapath; + if ((strcmp(datapath_->string(), "") == 0) && + (strcmp(tesseract_->datadir.string(), "") != 0)) + *datapath_ = tesseract_->datadir; + + if (language_ == nullptr) + language_ = new STRING(language); + else + *language_ = language; + last_oem_requested_ = oem; #ifndef DISABLED_LEGACY_ENGINE - // PERF_COUNT_SUB("update last_oem_requested_") - // For same language and datapath, just reset the adaptive classifier. - if (reset_classifier) { - tesseract_->ResetAdaptiveClassifier(); - PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") - } + // PERF_COUNT_SUB("update last_oem_requested_") + // For same language and datapath, just reset the adaptive classifier. + if (reset_classifier) { + tesseract_->ResetAdaptiveClassifier(); + PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") + } #endif // ndef DISABLED_LEGACY_ENGINE - PERF_COUNT_END - return 0; -} + PERF_COUNT_END + return 0; + } /** * Returns the languages string used in the last valid initialization. @@ -443,38 +443,38 @@ int TessBaseAPI::Init(const char* data, int data_size, const char* language, * loaded use GetLoadedLanguagesAsVector. * The returned string should NOT be deleted. */ -const char* TessBaseAPI::GetInitLanguagesAsString() const { - return (language_ == nullptr || language_->string() == nullptr) ? - "" : language_->string(); -} + const char* TessBaseAPI::GetInitLanguagesAsString() const { + return (language_ == nullptr || language_->string() == nullptr) ? + "" : language_->string(); + } /** * Returns the loaded languages in the vector of STRINGs. * Includes all languages loaded by the last Init, including those loaded * as dependencies of other loaded languages. */ -void TessBaseAPI::GetLoadedLanguagesAsVector( - GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - langs->push_back(tesseract_->lang); - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) - langs->push_back(tesseract_->get_sub_lang(i)->lang); - } -} + void TessBaseAPI::GetLoadedLanguagesAsVector( + GenericVector* langs) const { + langs->clear(); + if (tesseract_ != nullptr) { + langs->push_back(tesseract_->lang); + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) + langs->push_back(tesseract_->get_sub_lang(i)->lang); + } + } /** * Returns the available languages in the sorted vector of STRINGs. */ -void TessBaseAPI::GetAvailableLanguagesAsVector( - GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir, "", langs); - langs->sort(CompareSTRING); - } -} + void TessBaseAPI::GetAvailableLanguagesAsVector( + GenericVector* langs) const { + langs->clear(); + if (tesseract_ != nullptr) { + addAvailableLanguages(tesseract_->datadir, "", langs); + langs->sort(CompareSTRING); + } + } //TODO(amit): Adapt to lstm #ifndef DISABLED_LEGACY_ENGINE @@ -484,61 +484,61 @@ void TessBaseAPI::GetAvailableLanguagesAsVector( * WARNING: temporary! This function will be removed from here and placed * in a separate API at some future time. */ -int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - else - ParamUtils::ResetToDefaults(tesseract_->params()); - TessdataManager mgr; - return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); -} + int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + else + ParamUtils::ResetToDefaults(tesseract_->params()); + TessdataManager mgr; + return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); + } #endif // ndef DISABLED_LEGACY_ENGINE /** * Init only for page layout analysis. Use only for calls to SetImage and * AnalysePage. Calls that attempt recognition will generate an error. */ -void TessBaseAPI::InitForAnalysePage() { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; - #ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); - #endif - } -} + void TessBaseAPI::InitForAnalysePage() { + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; +#ifndef DISABLED_LEGACY_ENGINE + tesseract_->InitAdaptiveClassifier(nullptr); +#endif + } + } /** * Read a "config" file containing a set of parameter name, value pairs. * Searches the standard places: tessdata/configs, tessdata/tessconfigs * and also accepts a relative or absolute path name. */ -void TessBaseAPI::ReadConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); -} + void TessBaseAPI::ReadConfigFile(const char* filename) { + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); + } /** Same as above, but only set debug params from the given config file. */ -void TessBaseAPI::ReadDebugConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); -} + void TessBaseAPI::ReadDebugConfigFile(const char* filename) { + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); + } /** * Set the current page segmentation mode. Defaults to PSM_AUTO. * The mode is stored as an IntParam so it can also be modified by * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ -void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - tesseract_->tessedit_pageseg_mode.set_value(mode); -} + void TessBaseAPI::SetPageSegMode(PageSegMode mode) { + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + tesseract_->tessedit_pageseg_mode.set_value(mode); + } /** Return the current page segmentation mode. */ -PageSegMode TessBaseAPI::GetPageSegMode() const { - if (tesseract_ == nullptr) - return PSM_SINGLE_BLOCK; - return static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); -} + PageSegMode TessBaseAPI::GetPageSegMode() const { + if (tesseract_ == nullptr) + return PSM_SINGLE_BLOCK; + return static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); + } /** * Recognize a rectangle from an image and return the result as a string. @@ -553,35 +553,35 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. */ -char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, - int left, int top, - int width, int height) { - if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) - return nullptr; // Nothing worth doing. - - // Since this original api didn't give the exact size of the image, - // we have to invent a reasonable value. - int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; - SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, - bytes_per_pixel, bytes_per_line); - SetRectangle(left, top, width, height); - - return GetUTF8Text(); -} + char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + int left, int top, + int width, int height) { + if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) + return nullptr; // Nothing worth doing. + + // Since this original api didn't give the exact size of the image, + // we have to invent a reasonable value. + int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; + SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, + bytes_per_pixel, bytes_per_line); + SetRectangle(left, top, width, height); + + return GetUTF8Text(); + } #ifndef DISABLED_LEGACY_ENGINE /** * Call between pages or documents etc to free up memory and forget * adaptive data. */ -void TessBaseAPI::ClearAdaptiveClassifier() { - if (tesseract_ == nullptr) - return; - tesseract_->ResetAdaptiveClassifier(); - tesseract_->ResetDocumentDictionary(); -} + void TessBaseAPI::ClearAdaptiveClassifier() { + if (tesseract_ == nullptr) + return; + tesseract_->ResetAdaptiveClassifier(); + tesseract_->ResetDocumentDictionary(); + } #endif // ndef DISABLED_LEGACY_ENGINE /** @@ -591,22 +591,22 @@ void TessBaseAPI::ClearAdaptiveClassifier() { * full image, so it may be followed immediately by a GetUTF8Text, and it * will automatically perform recognition. */ -void TessBaseAPI::SetImage(const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, int bytes_per_line) { - if (InternalSetImage()) { - thresholder_->SetImage(imagedata, width, height, - bytes_per_pixel, bytes_per_line); - SetInputImage(thresholder_->GetPixRect()); - } -} + void TessBaseAPI::SetImage(const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, int bytes_per_line) { + if (InternalSetImage()) { + thresholder_->SetImage(imagedata, width, height, + bytes_per_pixel, bytes_per_line); + SetInputImage(thresholder_->GetPixRect()); + } + } -void TessBaseAPI::SetSourceResolution(int ppi) { - if (thresholder_) - thresholder_->SetSourceYResolution(ppi); - else - tprintf("Please call SetImage before SetSourceResolution.\n"); -} + void TessBaseAPI::SetSourceResolution(int ppi) { + if (thresholder_) + thresholder_->SetSourceYResolution(ppi); + else + tprintf("Please call SetImage before SetSourceResolution.\n"); + } /** * Provide an image for Tesseract to recognize. As with SetImage above, @@ -616,53 +616,53 @@ void TessBaseAPI::SetSourceResolution(int ppi) { * Use Pix where possible. Tesseract uses Pix as its internal representation * and it is therefore more efficient to provide a Pix directly. */ -void TessBaseAPI::SetImage(Pix* pix) { - if (InternalSetImage()) { - if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { - // remove alpha channel from png - PIX* p1 = pixRemoveAlpha(pix); - pixSetSpp(p1, 3); - pix = pixCopy(nullptr, p1); - pixDestroy(&p1); - } - thresholder_->SetImage(pix); - SetInputImage(thresholder_->GetPixRect()); - } -} + void TessBaseAPI::SetImage(Pix* pix) { + if (InternalSetImage()) { + if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { + // remove alpha channel from png + PIX* p1 = pixRemoveAlpha(pix); + pixSetSpp(p1, 3); + pix = pixCopy(nullptr, p1); + pixDestroy(&p1); + } + thresholder_->SetImage(pix); + SetInputImage(thresholder_->GetPixRect()); + } + } /** * Restrict recognition to a sub-rectangle of the image. Call after SetImage. * Each SetRectangle clears the recogntion results so multiple rectangles * can be recognized with the same image. */ -void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) - return; - thresholder_->SetRectangle(left, top, width, height); - ClearResults(); -} + void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { + if (thresholder_ == nullptr) + return; + thresholder_->SetRectangle(left, top, width, height); + ClearResults(); + } /** * ONLY available after SetImage if you have Leptonica installed. * Get a copy of the internal thresholded image from Tesseract. */ -Pix* TessBaseAPI::GetThresholdedImage() { - if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return nullptr; - } - return pixClone(tesseract_->pix_binary()); -} + Pix* TessBaseAPI::GetThresholdedImage() { + if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return nullptr; + } + return pixClone(tesseract_->pix_binary()); + } /** * Get the result of page layout analysis as a leptonica-style * Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. */ -Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { - return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); -} + Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { + return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); + } /** * Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. @@ -672,11 +672,11 @@ Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { * If paraids is not nullptr, the paragraph-id of each line within its block is * also returned as an array of one element per line. delete [] after use. */ -Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, - Pixa** pixa, int** blockids, int** paraids) { - return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, - pixa, blockids, paraids); -} + Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, + Pixa** pixa, int** blockids, int** paraids) { + return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, + pixa, blockids, paraids); + } /** * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa @@ -686,18 +686,18 @@ Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, * If blockids is not nullptr, the block-id of each line is also returned as an * array of one element per line. delete [] after use. */ -Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { - return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); -} + Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { + return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); + } /** * Get the words as a leptonica-style * Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. */ -Boxa* TessBaseAPI::GetWords(Pixa** pixa) { - return GetComponentImages(RIL_WORD, true, pixa, nullptr); -} + Boxa* TessBaseAPI::GetWords(Pixa** pixa) { + return GetComponentImages(RIL_WORD, true, pixa, nullptr); + } /** * Gets the individual connected (text) components (created @@ -705,9 +705,9 @@ Boxa* TessBaseAPI::GetWords(Pixa** pixa) { * as a leptonica-style Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. */ -Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { - return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); -} + Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { + return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); + } /** * Get the given level kind of components (block, textline, word etc.) as a @@ -717,94 +717,94 @@ Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { * as an array of one element per component. delete [] after use. * If text_only is true, then only text components are returned. */ -Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, - bool text_only, bool raw_image, - const int raw_padding, - Pixa** pixa, int** blockids, - int** paraids) { - PageIterator* page_it = GetIterator(); - if (page_it == nullptr) - page_it = AnalyseLayout(); - if (page_it == nullptr) - return nullptr; // Failed. - - // Count the components to get a size for the arrays. - int component_count = 0; - int left, top, right, bottom; - - TessResultCallback* get_bbox = nullptr; - if (raw_image) { - // Get bounding box in original raw image with padding. - get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, - level, raw_padding, - &left, &top, &right, &bottom); - } else { - // Get bounding box from binarized imaged. Note that this could be - // differently scaled from the original image. - get_bbox = NewPermanentTessCallback(page_it, - &PageIterator::BoundingBoxInternal, - level, &left, &top, &right, &bottom); - } - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) - ++component_count; - } while (page_it->Next(level)); - - Boxa* boxa = boxaCreate(component_count); - if (pixa != nullptr) - *pixa = pixaCreate(component_count); - if (blockids != nullptr) - *blockids = new int[component_count]; - if (paraids != nullptr) - *paraids = new int[component_count]; - - int blockid = 0; - int paraid = 0; - int component_index = 0; - page_it->Begin(); - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) { - Box* lbox = boxCreate(left, top, right - left, bottom - top); - boxaAddBox(boxa, lbox, L_INSERT); - if (pixa != nullptr) { - Pix* pix = nullptr; - if (raw_image) { - pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, - &top); - } else { - pix = page_it->GetBinaryImage(level); - } - pixaAddPix(*pixa, pix, L_INSERT); - pixaAddBox(*pixa, lbox, L_CLONE); - } - if (paraids != nullptr) { - (*paraids)[component_index] = paraid; - if (page_it->IsAtFinalElement(RIL_PARA, level)) - ++paraid; - } - if (blockids != nullptr) { - (*blockids)[component_index] = blockid; - if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { - ++blockid; - paraid = 0; - } + Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, + bool text_only, bool raw_image, + const int raw_padding, + Pixa** pixa, int** blockids, + int** paraids) { + PageIterator* page_it = GetIterator(); + if (page_it == nullptr) + page_it = AnalyseLayout(); + if (page_it == nullptr) + return nullptr; // Failed. + + // Count the components to get a size for the arrays. + int component_count = 0; + int left, top, right, bottom; + + TessResultCallback* get_bbox = nullptr; + if (raw_image) { + // Get bounding box in original raw image with padding. + get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, + level, raw_padding, + &left, &top, &right, &bottom); + } else { + // Get bounding box from binarized imaged. Note that this could be + // differently scaled from the original image. + get_bbox = NewPermanentTessCallback(page_it, + &PageIterator::BoundingBoxInternal, + level, &left, &top, &right, &bottom); } - ++component_index; + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) + ++component_count; + } while (page_it->Next(level)); + + Boxa* boxa = boxaCreate(component_count); + if (pixa != nullptr) + *pixa = pixaCreate(component_count); + if (blockids != nullptr) + *blockids = new int[component_count]; + if (paraids != nullptr) + *paraids = new int[component_count]; + + int blockid = 0; + int paraid = 0; + int component_index = 0; + page_it->Begin(); + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) { + Box* lbox = boxCreate(left, top, right - left, bottom - top); + boxaAddBox(boxa, lbox, L_INSERT); + if (pixa != nullptr) { + Pix* pix = nullptr; + if (raw_image) { + pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, + &top); + } else { + pix = page_it->GetBinaryImage(level); + } + pixaAddPix(*pixa, pix, L_INSERT); + pixaAddBox(*pixa, lbox, L_CLONE); + } + if (paraids != nullptr) { + (*paraids)[component_index] = paraid; + if (page_it->IsAtFinalElement(RIL_PARA, level)) + ++paraid; + } + if (blockids != nullptr) { + (*blockids)[component_index] = blockid; + if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { + ++blockid; + paraid = 0; + } + } + ++component_index; + } + } while (page_it->Next(level)); + delete page_it; + delete get_bbox; + return boxa; } - } while (page_it->Next(level)); - delete page_it; - delete get_bbox; - return boxa; -} -int TessBaseAPI::GetThresholdedImageScaleFactor() const { - if (thresholder_ == nullptr) { - return 0; - } - return thresholder_->GetScaleFactor(); -} + int TessBaseAPI::GetThresholdedImageScaleFactor() const { + if (thresholder_ == nullptr) { + return 0; + } + return thresholder_->GetScaleFactor(); + } /** * Runs page layout analysis in the mode set by SetPageSegMode. @@ -821,282 +821,282 @@ int TessBaseAPI::GetThresholdedImageScaleFactor() const { * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ -PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } - -PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { - if (FindLines() == 0) { - if (block_list_->empty()) - return nullptr; // The page was empty. - page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); - DetectParagraphs(false); - return new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - } - return nullptr; -} + PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } + + PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { + if (FindLines() == 0) { + if (block_list_->empty()) + return nullptr; // The page was empty. + page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); + DetectParagraphs(false); + return new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + } + return nullptr; + } /** * Recognize the tesseract global image and return the result as Tesseract * internal structures. */ -int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (FindLines() != 0) - return -1; - delete page_res_; - if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, - &tesseract_->prev_word_best_choice_); - return 0; // Empty page. - } + int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { + if (tesseract_ == nullptr) + return -1; + if (FindLines() != 0) + return -1; + delete page_res_; + if (block_list_->empty()) { + page_res_ = new PAGE_RES(false, block_list_, + &tesseract_->prev_word_best_choice_); + return 0; // Empty page. + } - tesseract_->SetBlackAndWhitelist(); - recognition_done_ = true; + tesseract_->SetBlackAndWhitelist(); + recognition_done_ = true; #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_resegment_from_line_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); - } else if (tesseract_->tessedit_resegment_from_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); - } else + if (tesseract_->tessedit_resegment_from_line_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); + } else if (tesseract_->tessedit_resegment_from_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); + } else #endif // ndef DISABLED_LEGACY_ENGINE - { - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), - block_list_, &tesseract_->prev_word_best_choice_); - } + { + page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), + block_list_, &tesseract_->prev_word_best_choice_); + } - if (page_res_ == nullptr) { - return -1; - } + if (page_res_ == nullptr) { + return -1; + } - if (tesseract_->tessedit_train_line_recognizer) { - tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } + if (tesseract_->tessedit_train_line_recognizer) { + tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_make_boxes_from_boxes) { - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } + if (tesseract_->tessedit_make_boxes_from_boxes) { + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } #endif // ndef DISABLED_LEGACY_ENGINE - if (truth_cb_ != nullptr) { - tesseract_->wordrec_run_blamer.set_value(true); - PageIterator *page_it = new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - truth_cb_->Run(tesseract_->getDict().getUnicharset(), - image_height_, page_it, this->tesseract()->pix_grey()); - delete page_it; - } + if (truth_cb_ != nullptr) { + tesseract_->wordrec_run_blamer.set_value(true); + PageIterator *page_it = new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + truth_cb_->Run(tesseract_->getDict().getUnicharset(), + image_height_, page_it, this->tesseract()->pix_grey()); + delete page_it; + } - int result = 0; - if (tesseract_->interactive_display_mode) { - #ifndef GRAPHICS_DISABLED - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); - #endif // GRAPHICS_DISABLED - // The page_res is invalid after an interactive session, so cleanup - // in a way that lets us continue to the next page without crashing. - delete page_res_; - page_res_ = nullptr; - return -1; - #ifndef DISABLED_LEGACY_ENGINE - } else if (tesseract_->tessedit_train_from_boxes) { - STRING fontname; - ExtractFontName(*output_file_, &fontname); - tesseract_->ApplyBoxTraining(fontname, page_res_); - } else if (tesseract_->tessedit_ambigs_training) { - FILE *training_output_file = tesseract_->init_recog_training(*input_file_); - // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented( - *input_file_, page_res_, monitor, training_output_file); - fclose(training_output_file); - #endif // ndef DISABLED_LEGACY_ENGINE - } else { - // Now run the main recognition. - bool wait_for_text = true; - GetBoolVariable("paragraph_text_based", &wait_for_text); - if (!wait_for_text) DetectParagraphs(false); - if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { - if (wait_for_text) DetectParagraphs(true); - } else { - result = -1; + int result = 0; + if (tesseract_->interactive_display_mode) { +#ifndef GRAPHICS_DISABLED + tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); +#endif // GRAPHICS_DISABLED + // The page_res is invalid after an interactive session, so cleanup + // in a way that lets us continue to the next page without crashing. + delete page_res_; + page_res_ = nullptr; + return -1; +#ifndef DISABLED_LEGACY_ENGINE + } else if (tesseract_->tessedit_train_from_boxes) { + STRING fontname; + ExtractFontName(*output_file_, &fontname); + tesseract_->ApplyBoxTraining(fontname, page_res_); + } else if (tesseract_->tessedit_ambigs_training) { + FILE *training_output_file = tesseract_->init_recog_training(*input_file_); + // OCR the page segmented into words by tesseract. + tesseract_->recog_training_segmented( + *input_file_, page_res_, monitor, training_output_file); + fclose(training_output_file); +#endif // ndef DISABLED_LEGACY_ENGINE + } else { + // Now run the main recognition. + bool wait_for_text = true; + GetBoolVariable("paragraph_text_based", &wait_for_text); + if (!wait_for_text) DetectParagraphs(false); + if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { + if (wait_for_text) DetectParagraphs(true); + } else { + result = -1; + } + } + return result; } - } - return result; -} #ifndef DISABLED_LEGACY_ENGINE /** Tests the chopper by exhaustively running chop_one_blob. */ -int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (page_res_ != nullptr) - ClearResults(); - if (FindLines() != 0) - return -1; - // Additional conditions under which chopper test cannot be run - if (tesseract_->interactive_display_mode) return -1; - - recognition_done_ = true; - - page_res_ = new PAGE_RES(false, block_list_, - &(tesseract_->prev_word_best_choice_)); - - PAGE_RES_IT page_res_it(page_res_); - - while (page_res_it.word() != nullptr) { - WERD_RES *word_res = page_res_it.word(); - GenericVector boxes; - tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, - page_res_it.row()->row, word_res); - page_res_it.forward(); - } - return 0; -} + int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { + if (tesseract_ == nullptr) + return -1; + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (page_res_ != nullptr) + ClearResults(); + if (FindLines() != 0) + return -1; + // Additional conditions under which chopper test cannot be run + if (tesseract_->interactive_display_mode) return -1; + + recognition_done_ = true; + + page_res_ = new PAGE_RES(false, block_list_, + &(tesseract_->prev_word_best_choice_)); + + PAGE_RES_IT page_res_it(page_res_); + + while (page_res_it.word() != nullptr) { + WERD_RES *word_res = page_res_it.word(); + GenericVector boxes; + tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, + page_res_it.row()->row, word_res); + page_res_it.forward(); + } + return 0; + } #endif // ndef DISABLED_LEGACY_ENGINE // Takes ownership of the input pix. -void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } + void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } -Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } + Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } -const char * TessBaseAPI::GetInputName() { - if (input_file_) - return input_file_->c_str(); - return nullptr; -} + const char * TessBaseAPI::GetInputName() { + if (input_file_) + return input_file_->c_str(); + return nullptr; + } -const char * TessBaseAPI::GetDatapath() { - return tesseract_->datadir.c_str(); -} + const char * TessBaseAPI::GetDatapath() { + return tesseract_->datadir.c_str(); + } -int TessBaseAPI::GetSourceYResolution() { - return thresholder_->GetSourceYResolution(); -} + int TessBaseAPI::GetSourceYResolution() { + return thresholder_->GetSourceYResolution(); + } // If flist exists, get data from there. Otherwise get data from buf. // Seems convoluted, but is the easiest way I know of to meet multiple // goals. Support streaming from stdin, and also work on platforms // lacking fmemopen. -bool TessBaseAPI::ProcessPagesFileList(FILE *flist, - STRING *buf, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number) { - if (!flist && !buf) return false; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - char pagename[MAX_PATH]; - - GenericVector lines; - if (!flist) { - buf->split('\n', &lines); - if (lines.empty()) return false; - } + bool TessBaseAPI::ProcessPagesFileList(FILE *flist, + STRING *buf, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number) { + if (!flist && !buf) return false; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + char pagename[MAX_PATH]; + + GenericVector lines; + if (!flist) { + buf->split('\n', &lines); + if (lines.empty()) return false; + } - // Skip to the requested page number. - for (int i = 0; i < page; i++) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } - } + // Skip to the requested page number. + for (int i = 0; i < page; i++) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } + } - // Begin producing output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - return false; - } + // Begin producing output + if (renderer && !renderer->BeginDocument(unknown_title_)) { + return false; + } - // Loop over all pages - or just the requested one - while (true) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } else { - if (page >= lines.size()) break; - snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); - } - chomp_string(pagename); - Pix *pix = pixRead(pagename); - if (pix == nullptr) { - tprintf("Image file %s cannot be read!\n", pagename); - return false; + // Loop over all pages - or just the requested one + while (true) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } else { + if (page >= lines.size()) break; + snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); + } + chomp_string(pagename); + Pix *pix = pixRead(pagename); + if (pix == nullptr) { + tprintf("Image file %s cannot be read!\n", pagename); + return false; + } + tprintf("Page %d : %s\n", page, pagename); + bool r = ProcessPage(pix, page, pagename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + ++page; + } + + // Finish producing output + if (renderer && !renderer->EndDocument()) { + return false; + } + return true; } - tprintf("Page %d : %s\n", page, pagename); - bool r = ProcessPage(pix, page, pagename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - ++page; - } - // Finish producing output - if (renderer && !renderer->EndDocument()) { - return false; - } - return true; -} - -bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, - size_t size, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number) { + bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, + size_t size, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number) { #ifndef ANDROID_BUILD - Pix *pix = nullptr; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - size_t offset = 0; - for (; ; ++page) { - if (tessedit_page_number >= 0) - page = tessedit_page_number; - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); - if (pix == nullptr) break; - tprintf("Page %d\n", page + 1); - char page_str[kMaxIntSize]; - snprintf(page_str, kMaxIntSize - 1, "%d", page); - SetVariable("applybox_page", page_str); - bool r = ProcessPage(pix, page, filename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - if (!offset) break; - } - return true; + Pix *pix = nullptr; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + size_t offset = 0; + for (; ; ++page) { + if (tessedit_page_number >= 0) + page = tessedit_page_number; + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); + if (pix == nullptr) break; + tprintf("Page %d\n", page + 1); + char page_str[kMaxIntSize]; + snprintf(page_str, kMaxIntSize - 1, "%d", page); + SetVariable("applybox_page", page_str); + bool r = ProcessPage(pix, page, filename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + if (!offset) break; + } + return true; #else - return false; + return false; #endif -} + } // Master ProcessPages calls ProcessPagesInternal and then does any post- // processing required due to being in a training mode. -bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer) { - bool result = - ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); - #ifndef DISABLED_LEGACY_ENGINE - if (result) { - if (tesseract_->tessedit_train_from_boxes && - !tesseract_->WriteTRFile(*output_file_)) { - tprintf("Write of TR file failed: %s\n", output_file_->string()); - return false; + bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + bool result = + ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); +#ifndef DISABLED_LEGACY_ENGINE + if (result) { + if (tesseract_->tessedit_train_from_boxes && + !tesseract_->WriteTRFile(*output_file_)) { + tprintf("Write of TR file failed: %s\n", output_file_->string()); + return false; + } + } +#endif // ndef DISABLED_LEGACY_ENGINE + return result; } - } - #endif // ndef DISABLED_LEGACY_ENGINE - return result; -} // In the ideal scenario, Tesseract will start working on data as soon // as it can. For example, if you stream a filelist through stdin, we @@ -1109,184 +1109,184 @@ bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, // impractical. So we support a command line flag to explicitly // identify the scenario that really matters: filelists on // stdin. We'll still do our best if the user likes pipes. -bool TessBaseAPI::ProcessPagesInternal(const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPages") - bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); - if (stdInput) { + bool TessBaseAPI::ProcessPagesInternal(const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + PERF_COUNT_START("ProcessPages") + bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); + if (stdInput) { #ifdef WIN32 - if (_setmode(_fileno(stdin), _O_BINARY) == -1) + if (_setmode(_fileno(stdin), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); #endif // WIN32 - } + } - if (stream_filelist) { - return ProcessPagesFileList(stdin, nullptr, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } + if (stream_filelist) { + return ProcessPagesFileList(stdin, nullptr, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } - // At this point we are officially in autodection territory. - // That means any data in stdin must be buffered, to make it - // seekable. - std::string buf; - const l_uint8 *data = nullptr; - if (stdInput) { - buf.assign((std::istreambuf_iterator(std::cin)), - (std::istreambuf_iterator())); - data = reinterpret_cast(buf.data()); - } else { - // Check whether the input file can be read. - if (FILE* file = fopen(filename, "rb")) { - fclose(file); - } else { - fprintf(stderr, "Error, cannot read input file %s: %s\n", - filename, strerror(errno)); - return false; - } - } + // At this point we are officially in autodection territory. + // That means any data in stdin must be buffered, to make it + // seekable. + std::string buf; + const l_uint8 *data = nullptr; + if (stdInput) { + buf.assign((std::istreambuf_iterator(std::cin)), + (std::istreambuf_iterator())); + data = reinterpret_cast(buf.data()); + } else { + // Check whether the input file can be read. + if (FILE* file = fopen(filename, "rb")) { + fclose(file); + } else { + fprintf(stderr, "Error, cannot read input file %s: %s\n", + filename, strerror(errno)); + return false; + } + } - // Here is our autodetection - int format; - int r = (stdInput) ? - findFileFormatBuffer(data, &format) : - findFileFormat(filename, &format); - - // Maybe we have a filelist - if (r != 0 || format == IFF_UNKNOWN) { - STRING s; - if (stdInput) { - s = buf.c_str(); - } else { - std::ifstream t(filename); - std::string u((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - s = u.c_str(); - } - return ProcessPagesFileList(nullptr, &s, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } + // Here is our autodetection + int format; + int r = (stdInput) ? + findFileFormatBuffer(data, &format) : + findFileFormat(filename, &format); + + // Maybe we have a filelist + if (r != 0 || format == IFF_UNKNOWN) { + STRING s; + if (stdInput) { + s = buf.c_str(); + } else { + std::ifstream t(filename); + std::string u((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + s = u.c_str(); + } + return ProcessPagesFileList(nullptr, &s, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } - // Maybe we have a TIFF which is potentially multipage - bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || - format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || - format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || - format == IFF_TIFF_ZIP); - - // Fail early if we can, before producing any output - Pix *pix = nullptr; - if (!tiff) { - pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); - if (pix == nullptr) { - return false; - } - } + // Maybe we have a TIFF which is potentially multipage + bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || + format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || + format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || + format == IFF_TIFF_ZIP); + + // Fail early if we can, before producing any output + Pix *pix = nullptr; + if (!tiff) { + pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); + if (pix == nullptr) { + return false; + } + } - // Begin the output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - pixDestroy(&pix); - return false; - } + // Begin the output + if (renderer && !renderer->BeginDocument(unknown_title_)) { + pixDestroy(&pix); + return false; + } - // Produce output - r = (tiff) ? - ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number) : - ProcessPage(pix, 0, filename, retry_config, - timeout_millisec, renderer); + // Produce output + r = (tiff) ? + ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number) : + ProcessPage(pix, 0, filename, retry_config, + timeout_millisec, renderer); - // Clean up memory as needed - pixDestroy(&pix); + // Clean up memory as needed + pixDestroy(&pix); - // End the output - if (!r || (renderer && !renderer->EndDocument())) { - return false; - } - PERF_COUNT_END - return true; -} - -bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPage") - SetInputName(filename); - SetImage(pix); - bool failed = false; - - if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { - // Disabled character recognition - PageIterator* it = AnalyseLayout(); - - if (it == nullptr) { - failed = true; - } else { - delete it; + // End the output + if (!r || (renderer && !renderer->EndDocument())) { + return false; + } + PERF_COUNT_END + return true; } - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { - failed = FindLines() != 0; - } else if (timeout_millisec > 0) { - // Running with a timeout. - ETEXT_DESC monitor; - monitor.cancel = nullptr; - monitor.cancel_this = nullptr; - monitor.set_deadline_msecs(timeout_millisec); - - // Now run the main recognition. - failed = Recognize(&monitor) < 0; - } else { - // Normal layout and character recognition with no timeout. - failed = Recognize(nullptr) < 0; - } - if (tesseract_->tessedit_write_images) { + bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer) { + PERF_COUNT_START("ProcessPage") + SetInputName(filename); + SetImage(pix); + bool failed = false; + + if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + // Disabled character recognition + PageIterator* it = AnalyseLayout(); + + if (it == nullptr) { + failed = true; + } else { + delete it; + } + } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + failed = FindLines() != 0; + } else if (timeout_millisec > 0) { + // Running with a timeout. + ETEXT_DESC monitor; + monitor.cancel = nullptr; + monitor.cancel_this = nullptr; + monitor.set_deadline_msecs(timeout_millisec); + + // Now run the main recognition. + failed = Recognize(&monitor) < 0; + } else { + // Normal layout and character recognition with no timeout. + failed = Recognize(nullptr) < 0; + } + + if (tesseract_->tessedit_write_images) { #ifndef ANDROID_BUILD - Pix* page_pix = GetThresholdedImage(); - pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); + Pix* page_pix = GetThresholdedImage(); + pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); #endif // ANDROID_BUILD - } + } - if (failed && retry_config != nullptr && retry_config[0] != '\0') { - // Save current config variables before switching modes. - FILE* fp = fopen(kOldVarsFile, "wb"); - if (fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); - } else { - PrintVariables(fp); - fclose(fp); - } - // Switch to alternate mode for retry. - ReadConfigFile(retry_config); - SetImage(pix); - Recognize(nullptr); - // Restore saved config variables. - ReadConfigFile(kOldVarsFile); - } + if (failed && retry_config != nullptr && retry_config[0] != '\0') { + // Save current config variables before switching modes. + FILE* fp = fopen(kOldVarsFile, "wb"); + if (fp == nullptr) { + tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); + } else { + PrintVariables(fp); + fclose(fp); + } + // Switch to alternate mode for retry. + ReadConfigFile(retry_config); + SetImage(pix); + Recognize(nullptr); + // Restore saved config variables. + ReadConfigFile(kOldVarsFile); + } - if (renderer && !failed) { - failed = !renderer->AddImage(this); - } + if (renderer && !failed) { + failed = !renderer->AddImage(this); + } - PERF_COUNT_END - return !failed; -} + PERF_COUNT_END + return !failed; + } /** * Get a left-to-right iterator to the results of LayoutAnalysis and/or * Recognize. The returned iterator must be deleted after use. */ -LTRResultIterator* TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); -} + LTRResultIterator* TessBaseAPI::GetLTRIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + } /** * Get a reading-order iterator to the results of LayoutAnalysis and/or @@ -1296,14 +1296,14 @@ LTRResultIterator* TessBaseAPI::GetLTRIterator() { * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ -ResultIterator* TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return ResultIterator::StartOfParagraph(LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_)); -} + ResultIterator* TessBaseAPI::GetIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return ResultIterator::StartOfParagraph(LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_)); + } /** * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. @@ -1313,45 +1313,45 @@ ResultIterator* TessBaseAPI::GetIterator() { * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ -MutableIterator* TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new MutableIterator(page_res_, tesseract_, - thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); -} + MutableIterator* TessBaseAPI::GetMutableIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new MutableIterator(page_res_, tesseract_, + thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + } /** Make a text string from the internal data structures. */ -char* TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - STRING text(""); - ResultIterator *it = GetIterator(); - do { - if (it->Empty(RIL_PARA)) continue; - const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); - text += para_text.get(); - } while (it->Next(RIL_PARA)); - char* result = new char[text.length() + 1]; - strncpy(result, text.string(), text.length() + 1); - delete it; - return result; -} + char* TessBaseAPI::GetUTF8Text() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + STRING text(""); + ResultIterator *it = GetIterator(); + do { + if (it->Empty(RIL_PARA)) continue; + const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); + text += para_text.get(); + } while (it->Next(RIL_PARA)); + char* result = new char[text.length() + 1]; + strncpy(result, text.string(), text.length() + 1); + delete it; + return result; + } /** * Gets the block orientation at the current iterator position. */ -static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { - tesseract::Orientation orientation; - tesseract::WritingDirection writing_direction; - tesseract::TextlineOrder textline_order; - float deskew_angle; - it->Orientation(&orientation, &writing_direction, &textline_order, - &deskew_angle); - return orientation; -} + static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { + tesseract::Orientation orientation; + tesseract::WritingDirection writing_direction; + tesseract::TextlineOrder textline_order; + float deskew_angle; + it->Orientation(&orientation, &writing_direction, &textline_order, + &deskew_angle); + return orientation; + } /** * Fits a line to the baseline at the given level, and appends its coefficients @@ -1361,104 +1361,149 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { * method currently only inserts a 'textangle' property to indicate the rotation * direction and does not add any baseline information to the hocr string. */ -static void AddBaselineCoordsTohOCR(const PageIterator *it, - PageIteratorLevel level, - STRING* hocr_str) { - tesseract::Orientation orientation = GetBlockTextOrientation(it); - if (orientation != ORIENTATION_PAGE_UP) { - hocr_str->add_str_int("; textangle ", 360 - orientation * 90); - return; - } + static void AddBaselineCoordsTohOCR(const PageIterator *it, + PageIteratorLevel level, + STRING* hocr_str) { + tesseract::Orientation orientation = GetBlockTextOrientation(it); + if (orientation != ORIENTATION_PAGE_UP) { + hocr_str->add_str_int("; textangle ", 360 - orientation * 90); + return; + } - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - - // Try to get the baseline coordinates at this level. - int x1, y1, x2, y2; - if (!it->Baseline(level, &x1, &y1, &x2, &y2)) - return; - // Following the description of this field of the hOCR spec, we convert the - // baseline coordinates so that "the bottom left of the bounding box is the - // origin". - x1 -= left; - x2 -= left; - y1 -= bottom; - y2 -= bottom; - - // Now fit a line through the points so we can extract coefficients for the - // equation: y = p1 x + p0 - double p1 = 0; - double p0 = 0; - if (x1 == x2) { - // Problem computing the polynomial coefficients. - return; - } - p1 = (y2 - y1) / static_cast(x2 - x1); - p0 = y1 - static_cast(p1 * x1); - - hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); - hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); -} - -static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, - int num2) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - if (num2 >= 0) { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); - } else { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); - } - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; -} - -static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, - int num2, int num3) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; -} - -static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, - STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - // This is the only place we use double quotes instead of single quotes, - // but it may too late to change for consistency - hocr_str->add_str_int(" title=\"bbox ", left); - hocr_str->add_str_int(" ", top); - hocr_str->add_str_int(" ", right); - hocr_str->add_str_int(" ", bottom); - // Add baseline coordinates & heights for textlines only. - if (level == RIL_TEXTLINE) { - AddBaselineCoordsTohOCR(it, level, hocr_str); - // add custom height measures - float row_height, descenders, ascenders; // row attributes - it->RowAttributes(&row_height, &descenders, &ascenders); - // TODO(rays): Do we want to limit these to a single decimal place? - hocr_str->add_str_double("; x_size ", row_height); - hocr_str->add_str_double("; x_descenders ", descenders * -1); - hocr_str->add_str_double("; x_ascenders ", ascenders); - } - *hocr_str += "\">"; -} - -static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, - STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - hocr_str->add_str_int("\t", left); - hocr_str->add_str_int("\t", top); - hocr_str->add_str_int("\t", right - left); - hocr_str->add_str_int("\t", bottom - top); -} + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + + // Try to get the baseline coordinates at this level. + int x1, y1, x2, y2; + if (!it->Baseline(level, &x1, &y1, &x2, &y2)) + return; + // Following the description of this field of the hOCR spec, we convert the + // baseline coordinates so that "the bottom left of the bounding box is the + // origin". + x1 -= left; + x2 -= left; + y1 -= bottom; + y2 -= bottom; + + // Now fit a line through the points so we can extract coefficients for the + // equation: y = p1 x + p0 + double p1 = 0; + double p0 = 0; + if (x1 == x2) { + // Problem computing the polynomial coefficients. + return; + } + p1 = (y2 - y1) / static_cast(x2 - x1); + p0 = y1 - static_cast(p1 * x1); + + hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); + hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); + } + + static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, + int num2) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + if (num2 >= 0) { + snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); + } else { + snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); + } + id_buffer[BUFSIZE - 1] = '\0'; + *hocr_str += " id='"; + *hocr_str += id_buffer; + *hocr_str += "'"; + } + + static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, + int num2, int num3) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); + id_buffer[BUFSIZE - 1] = '\0'; + *hocr_str += " id='"; + *hocr_str += id_buffer; + *hocr_str += "'"; + } + + static void AddIdToAlto(STRING* alto_str, const std::string base, int num1) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); + id_buffer[BUFSIZE - 1] = '\0'; + *alto_str += " ID=\""; + *alto_str += id_buffer; + *alto_str += "\""; + } + + static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, + STRING* hocr_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + // This is the only place we use double quotes instead of single quotes, + // but it may too late to change for consistency + hocr_str->add_str_int(" title=\"bbox ", left); + hocr_str->add_str_int(" ", top); + hocr_str->add_str_int(" ", right); + hocr_str->add_str_int(" ", bottom); + // Add baseline coordinates & heights for textlines only. + if (level == RIL_TEXTLINE) { + AddBaselineCoordsTohOCR(it, level, hocr_str); + // add custom height measures + float row_height, descenders, ascenders; // row attributes + it->RowAttributes(&row_height, &descenders, &ascenders); + // TODO(rays): Do we want to limit these to a single decimal place? + hocr_str->add_str_double("; x_size ", row_height); + hocr_str->add_str_double("; x_descenders ", descenders * -1); + hocr_str->add_str_double("; x_ascenders ", ascenders); + } + *hocr_str += "\">"; + } + + static void AddBoxToAlto(const ResultIterator* it, PageIteratorLevel level, + STRING* alto_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + + int hpos = left; + int vpos = top; + int height = bottom - top; + int width = right - left; + + *alto_str += " HPOS=\""; + alto_str->add_str_int("", hpos); + *alto_str += "\""; + *alto_str += " VPOS=\""; + alto_str->add_str_int("", vpos); + *alto_str += "\""; + *alto_str += " WIDTH=\""; + alto_str->add_str_int("", width); + *alto_str += "\""; + *alto_str += " HEIGHT=\""; + alto_str->add_str_int("", height); + *alto_str += "\""; + + if (level == RIL_WORD) { + int wc = it->Confidence(RIL_WORD); + *alto_str += " WC=\"0."; + alto_str->add_str_int("", wc); + *alto_str += "\""; + } + if (level != RIL_WORD) { + + *alto_str += ">"; + } + } + + static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, + STRING* hocr_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + hocr_str->add_str_int("\t", left); + hocr_str->add_str_int("\t", top); + hocr_str->add_str_int("\t", right - left); + hocr_str->add_str_int("\t", bottom - top); + } /** * Make a HTML-formatted string with hOCR markup from the internal @@ -1469,9 +1514,17 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, * STL removed from original patch submission and refactored by rays. * Returned string must be freed with the delete [] operator. */ -char* TessBaseAPI::GetHOCRText(int page_number) { - return GetHOCRText(nullptr, page_number); -} + char* TessBaseAPI::GetHOCRText(int page_number) { + return GetHOCRText(nullptr, page_number); + } + +/** + * Make an XML-formatted string with ALTO markup from the internal + * data structures. + */ + char* TessBaseAPI::GetAltoText(int page_number) { + return GetAltoText(nullptr, page_number); + } /** * Make a HTML-formatted string with hOCR markup from the internal @@ -1482,24 +1535,24 @@ char* TessBaseAPI::GetHOCRText(int page_number) { * STL removed from original patch submission and refactored by rays. * Returned string must be freed with the delete [] operator. */ -char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) - return nullptr; + char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) + return nullptr; - int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; - int page_id = page_number + 1; // hOCR uses 1-based page numbers. - bool para_is_ltr = true; // Default direction is LTR - const char* paragraph_lang = nullptr; - bool font_info = false; - GetBoolVariable("hocr_font_info", &font_info); + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; + int page_id = page_number + 1; // hOCR uses 1-based page numbers. + bool para_is_ltr = true; // Default direction is LTR + const char* paragraph_lang = nullptr; + bool font_info = false; + GetBoolVariable("hocr_font_info", &font_info); - STRING hocr_str(""); + STRING hocr_str(""); - if (input_file_ == nullptr) - SetInputName(nullptr); + if (input_file_ == nullptr) + SetInputName(nullptr); #ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 + // convert input name from ANSI encoding to utf-8 int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); wchar_t *uni16_str = new WCHAR[str16_len]; @@ -1515,325 +1568,439 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { delete[] utf8_str; #endif - hocr_str += "
string()); - } else { - hocr_str += "unknown"; - } - hocr_str.add_str_int("\"; bbox ", rect_left_); - hocr_str.add_str_int(" ", rect_top_); - hocr_str.add_str_int(" ", rect_width_); - hocr_str.add_str_int(" ", rect_height_); - hocr_str.add_str_int("; ppageno ", page_number); - hocr_str += "'>\n"; - - ResultIterator *res_it = GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Open any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - para_is_ltr = true; // reset to default direction - hocr_str += "
IsAtBeginningOf(RIL_PARA)) { - hocr_str += "\n

ParagraphIsLtr(); - if (!para_is_ltr) { - hocr_str += " dir='rtl'"; - } - AddIdTohOCR(&hocr_str, "par", page_id, pcnt); - paragraph_lang = res_it->WordRecognitionLanguage(); - if (paragraph_lang) { - hocr_str += " lang='"; - hocr_str += paragraph_lang; - hocr_str += "'"; + hocr_str += "

string()); + } else { + hocr_str += "unknown"; } - AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - hocr_str += "\n >>* confidencemap = nullptr; - if (tesseract_->lstm_choice_mode) { - confidencemap = res_it->GetBestLSTMSymbolChoices(); - } - hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); - hocr_str.add_str_int(" title='bbox ", left); - hocr_str.add_str_int(" ", top); - hocr_str.add_str_int(" ", right); - hocr_str.add_str_int(" ", bottom); - hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); - if (font_info) { - if (font_name) { - hocr_str += "; x_font "; - hocr_str += HOcrEscape(font_name); - } - hocr_str.add_str_int("; x_fsize ", pointsize); - } - hocr_str += "'"; - const char* lang = res_it->WordRecognitionLanguage(); - if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { - hocr_str += " lang='"; - hocr_str += lang; - hocr_str += "'"; - } - switch (res_it->WordDirection()) { - // Only emit direction if different from current paragraph direction - case DIR_LEFT_TO_RIGHT: - if (!para_is_ltr) hocr_str += " dir='ltr'"; - break; - case DIR_RIGHT_TO_LEFT: - if (para_is_ltr) hocr_str += " dir='rtl'"; - break; - case DIR_MIX: - case DIR_NEUTRAL: - default: // Do nothing. - break; - } - hocr_str += ">"; - bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); - bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); - bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); - if (bold) hocr_str += ""; - if (italic) hocr_str += ""; - do { - const std::unique_ptr grapheme( - res_it->GetUTF8Text(RIL_SYMBOL)); - if (grapheme && grapheme[0] != 0) { - hocr_str += HOcrEscape(grapheme.get()); - } - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - if (italic) hocr_str += ""; - if (bold) hocr_str += ""; - // If the lstm choice mode is required it is added here - if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - hocr_str += "\n > timestep = (*confidencemap)[i]; - for (std::pair conf : timestep) { - hocr_str += "Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; } - hocr_str += ""; - tcnt++; - } - } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - std::vector> timestep = (*confidencemap)[i]; - if (timestep.size() > 0) { - hocr_str += "\n IsAtBeginningOf(RIL_PARA)) { + hocr_str += "\n

ParagraphIsLtr(); + if (!para_is_ltr) { + hocr_str += " dir='rtl'"; + } + AddIdTohOCR(&hocr_str, "par", page_id, pcnt); + paragraph_lang = res_it->WordRecognitionLanguage(); + if (paragraph_lang) { + hocr_str += " lang='"; + hocr_str += paragraph_lang; hocr_str += "'"; - hocr_str += ">"; - hocr_str += timestep[j].first; - hocr_str += ""; - gcnt++; } - hocr_str += ""; - tcnt++; + AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + hocr_str += "\n >>* confidencemap = nullptr; + if (tesseract_->lstm_choice_mode) { + confidencemap = res_it->GetBestLSTMSymbolChoices(); + } + hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, + &monospace, &serif, &smallcaps, + &pointsize, &font_id); + hocr_str.add_str_int(" title='bbox ", left); + hocr_str.add_str_int(" ", top); + hocr_str.add_str_int(" ", right); + hocr_str.add_str_int(" ", bottom); + hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); + if (font_info) { + if (font_name) { + hocr_str += "; x_font "; + hocr_str += HOcrEscape(font_name); + } + hocr_str.add_str_int("; x_fsize ", pointsize); + } + hocr_str += "'"; + const char* lang = res_it->WordRecognitionLanguage(); + if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { + hocr_str += " lang='"; + hocr_str += lang; + hocr_str += "'"; + } + switch (res_it->WordDirection()) { + // Only emit direction if different from current paragraph direction + case DIR_LEFT_TO_RIGHT: + if (!para_is_ltr) hocr_str += " dir='ltr'"; + break; + case DIR_RIGHT_TO_LEFT: + if (para_is_ltr) hocr_str += " dir='rtl'"; + break; + case DIR_MIX: + case DIR_NEUTRAL: + default: // Do nothing. + break; + } + hocr_str += ">"; + bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); + bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); + bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); + if (bold) hocr_str += ""; + if (italic) hocr_str += ""; + do { + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); + if (grapheme && grapheme[0] != 0) { + hocr_str += HOcrEscape(grapheme.get()); + } + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + if (italic) hocr_str += ""; + if (bold) hocr_str += ""; + // If the lstm choice mode is required it is added here + if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { + for (size_t i = 0; i < confidencemap->size(); i++) { + hocr_str += "\n > timestep = (*confidencemap)[i]; + for (std::pair conf : timestep) { + hocr_str += "lstm_choice_mode == 2 && confidencemap != nullptr) { + for (size_t i = 0; i < confidencemap->size(); i++) { + std::vector> timestep = (*confidencemap)[i]; + if (timestep.size() > 0) { + hocr_str += "\n string(), -1, nullptr, 0); + wchar_t *uni16_str = new WCHAR[str16_len]; + str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, + uni16_str, str16_len); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, + nullptr, nullptr); + char *utf8_str = new char[utf8_len]; + WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, + utf8_len, nullptr, nullptr); + *input_file_ = utf8_str; + delete[] uni16_str; + delete[] utf8_str; +#endif + + alto_str += "\t\t\n"; + + ResultIterator *res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + alto_str += "\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { + + alto_str += "\t\t\t\t\tIsAtFinalElement(RIL_TEXTLINE, RIL_WORD); + bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); + + do { + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); + if (grapheme && grapheme[0] != 0) { + alto_str += HOcrEscape(grapheme.get()); + } + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + + alto_str += "\"/>\n"; + + wcnt++; + + if (last_word_in_line) { + alto_str += "\t\t\t\t\t\n"; + lcnt++; + } + + if (last_word_in_block) { + alto_str += "\t\t\t\t\n"; + bcnt++; + } + } + + alto_str += "\t\t\t\n"; + alto_str += "\t\t\n"; + + char *ret = new char[alto_str.length() + 1]; + strcpy(ret, alto_str.string()); + delete res_it; + return ret; + } /** * Make a TSV-formatted string from the internal data structures. * page_number is 0-based but will appear in the output as 1-based. * Returned string must be freed with the delete [] operator. */ -char* TessBaseAPI::GetTSVText(int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) - return nullptr; - - int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; - int page_id = page_number + 1; // we use 1-based page numbers. - - STRING tsv_str(""); - - int page_num = page_id; - int block_num = 0; - int par_num = 0; - int line_num = 0; - int word_num = 0; - - tsv_str.add_str_int("1\t", page_num); // level 1 - page - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - tsv_str.add_str_int("\t", rect_left_); - tsv_str.add_str_int("\t", rect_top_); - tsv_str.add_str_int("\t", rect_width_); - tsv_str.add_str_int("\t", rect_height_); - tsv_str += "\t-1\t\n"; - - ResultIterator* res_it = GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Add rows for any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - block_num++; - par_num = 0; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("2\t", page_num); // level 2 - block - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for block - } - if (res_it->IsAtBeginningOf(RIL_PARA)) { - par_num++; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_PARA, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for para - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - line_num++; - word_num = 0; - tsv_str.add_str_int("4\t", page_num); // level 4 - line + char* TessBaseAPI::GetTSVText(int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) + return nullptr; + + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; + int page_id = page_number + 1; // we use 1-based page numbers. + + STRING tsv_str(""); + + int page_num = page_id; + int block_num = 0; + int par_num = 0; + int line_num = 0; + int word_num = 0; + + tsv_str.add_str_int("1\t", page_num); // level 1 - page tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for line - } - - // Now, process the word... - int left, top, right, bottom; - res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - word_num++; - tsv_str.add_str_int("5\t", page_num); // level 5 - word - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - tsv_str.add_str_int("\t", left); - tsv_str.add_str_int("\t", top); - tsv_str.add_str_int("\t", right - left); - tsv_str.add_str_int("\t", bottom - top); - tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); - tsv_str += "\t"; - - // Increment counts if at end of block/paragraph/textline. - if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; - if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; - if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; - - do { - tsv_str += - std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - tsv_str += "\n"; // end of row - wcnt++; - } + tsv_str.add_str_int("\t", rect_left_); + tsv_str.add_str_int("\t", rect_top_); + tsv_str.add_str_int("\t", rect_width_); + tsv_str.add_str_int("\t", rect_height_); + tsv_str += "\t-1\t\n"; + + ResultIterator* res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + // Add rows for any new block/paragraph/textline. + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + block_num++; + par_num = 0; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("2\t", page_num); // level 2 - block + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for block + } + if (res_it->IsAtBeginningOf(RIL_PARA)) { + par_num++; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_PARA, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for para + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + line_num++; + word_num = 0; + tsv_str.add_str_int("4\t", page_num); // level 4 - line + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for line + } - char* ret = new char[tsv_str.length() + 1]; - strcpy(ret, tsv_str.string()); - delete res_it; - return ret; -} + // Now, process the word... + int left, top, right, bottom; + res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + word_num++; + tsv_str.add_str_int("5\t", page_num); // level 5 - word + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + tsv_str.add_str_int("\t", left); + tsv_str.add_str_int("\t", top); + tsv_str.add_str_int("\t", right - left); + tsv_str.add_str_int("\t", bottom - top); + tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); + tsv_str += "\t"; + + // Increment counts if at end of block/paragraph/textline. + if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; + if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; + if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; + + do { + tsv_str += + std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + tsv_str += "\n"; // end of row + wcnt++; + } + + char* ret = new char[tsv_str.length() + 1]; + strcpy(ret, tsv_str.string()); + delete res_it; + return ret; + } /** The 5 numbers output for each box (the usual 4 and a page number.) */ -const int kNumbersPerBlob = 5; + const int kNumbersPerBlob = 5; /** * The number of bytes taken by each number. Since we use int16_t for ICOORD, * assume only 5 digits max. */ -const int kBytesPerNumber = 5; + const int kBytesPerNumber = 5; /** * Multiplier for max expected textlength assumes (kBytesPerNumber + space) * * kNumbersPerBlob plus the newline. Add to this the * original UTF8 characters, and one kMaxBytesPerLine for safety. */ -const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; + const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; /** Max bytes in the decimal representation of int64_t. */ -const int kBytesPer64BitNumber = 20; + const int kBytesPer64BitNumber = 20; /** * A maximal single box could occupy kNumbersPerBlob numbers at * kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a * space plus the newline and the maximum length of a UNICHAR. * Test against this on each iteration for safety. */ -const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + - UNICHAR_LEN; + const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + + UNICHAR_LEN; /** * The recognized text is returned as a char* which is coded @@ -1841,160 +2008,160 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + * page_number is a 0-base page index that will appear in the box file. * Returned string must be freed with the delete [] operator. */ -char* TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int blob_count; - int utf8_length = TextLength(&blob_count); - int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + - kMaxBytesPerLine; - char* result = new char[total_length]; - result[0] = '\0'; - int output_length = 0; - LTRResultIterator* it = GetLTRIterator(); - do { - int left, top, right, bottom; - if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - const std::unique_ptr text( - it->GetUTF8Text(RIL_SYMBOL)); - // Tesseract uses space for recognition failure. Fix to a reject - // character, kTesseractReject so we don't create illegal box files. - for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') - text[i] = kTesseractReject; - } - snprintf(result + output_length, total_length - output_length, - "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, - right, image_height_ - top, page_number); - output_length += strlen(result + output_length); - // Just in case... - if (output_length + kMaxBytesPerLine > total_length) - break; - } - } while (it->Next(RIL_SYMBOL)); - delete it; - return result; -} + char* TessBaseAPI::GetBoxText(int page_number) { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int blob_count; + int utf8_length = TextLength(&blob_count); + int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + + kMaxBytesPerLine; + char* result = new char[total_length]; + result[0] = '\0'; + int output_length = 0; + LTRResultIterator* it = GetLTRIterator(); + do { + int left, top, right, bottom; + if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { + const std::unique_ptr text( + it->GetUTF8Text(RIL_SYMBOL)); + // Tesseract uses space for recognition failure. Fix to a reject + // character, kTesseractReject so we don't create illegal box files. + for (int i = 0; text[i] != '\0'; ++i) { + if (text[i] == ' ') + text[i] = kTesseractReject; + } + snprintf(result + output_length, total_length - output_length, + "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, + right, image_height_ - top, page_number); + output_length += strlen(result + output_length); + // Just in case... + if (output_length + kMaxBytesPerLine > total_length) + break; + } + } while (it->Next(RIL_SYMBOL)); + delete it; + return result; + } /** * Conversion table for non-latin characters. * Maps characters out of the latin set into the latin set. * TODO(rays) incorporate this translation into unicharset. */ -const int kUniChs[] = { - 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 -}; + const int kUniChs[] = { + 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 + }; /** Latin chars corresponding to the unicode chars above. */ -const int kLatinChs[] = { - 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 -}; + const int kLatinChs[] = { + 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 + }; /** * The recognized text is returned as a char* which is coded * as UNLV format Latin-1 with specific reject and suspect codes. * Returned string must be freed with the delete [] operator. */ -char* TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - bool tilde_crunch_written = false; - bool last_char_was_newline = true; - bool last_char_was_tilde = false; - - int total_length = TextLength(nullptr); - PAGE_RES_IT page_res_it(page_res_); - char* result = new char[total_length]; - char* ptr = result; - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - // Process the current word. - if (word->unlv_crunch_mode != CR_NONE) { - if (word->unlv_crunch_mode != CR_DELETE && - (!tilde_crunch_written || - (word->unlv_crunch_mode == CR_KEEP_SPACE && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)))) { - if (!word->word->flag(W_BOL) && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)) { - /* Write a space to separate from preceding good text */ - *ptr++ = ' '; - last_char_was_tilde = false; - } - if (!last_char_was_tilde) { - // Write a reject char. - last_char_was_tilde = true; - *ptr++ = kUNLVReject; - tilde_crunch_written = true; - last_char_was_newline = false; - } - } - } else { - // NORMAL PROCESSING of non tilde crunched words. - tilde_crunch_written = false; - tesseract_->set_unlv_suspects(word); - const char* wordstr = word->best_choice->unichar_string().string(); - const STRING& lengths = word->best_choice->unichar_lengths(); - int length = lengths.length(); - int i = 0; - int offset = 0; - - if (last_char_was_tilde && - word->word->space() == 0 && wordstr[offset] == ' ') { - // Prevent adjacent tilde across words - we know that adjacent tildes - // within words have been removed. - // Skip the first character. - offset = lengths[i++]; - } - if (i < length && wordstr[offset] != 0) { - if (!last_char_was_newline) - *ptr++ = ' '; - else - last_char_was_newline = false; - for (; i < length; offset += lengths[i++]) { - if (wordstr[offset] == ' ' || - wordstr[offset] == kTesseractReject) { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; - } else { - if (word->reject_map[i].rejected()) - *ptr++ = kUNLVSuspect; - UNICHAR ch(wordstr + offset, lengths[i]); - int uni_ch = ch.first_uni(); - for (int j = 0; kUniChs[j] != 0; ++j) { - if (kUniChs[j] == uni_ch) { - uni_ch = kLatinChs[j]; - break; - } - } - if (uni_ch <= 0xff) { - *ptr++ = static_cast(uni_ch); + char* TessBaseAPI::GetUNLVText() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + bool tilde_crunch_written = false; + bool last_char_was_newline = true; + bool last_char_was_tilde = false; + + int total_length = TextLength(nullptr); + PAGE_RES_IT page_res_it(page_res_); + char* result = new char[total_length]; + char* ptr = result; + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + // Process the current word. + if (word->unlv_crunch_mode != CR_NONE) { + if (word->unlv_crunch_mode != CR_DELETE && + (!tilde_crunch_written || + (word->unlv_crunch_mode == CR_KEEP_SPACE && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)))) { + if (!word->word->flag(W_BOL) && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)) { + /* Write a space to separate from preceding good text */ + *ptr++ = ' '; last_char_was_tilde = false; - } else { - *ptr++ = kUNLVReject; + } + if (!last_char_was_tilde) { + // Write a reject char. last_char_was_tilde = true; + *ptr++ = kUNLVReject; + tilde_crunch_written = true; + last_char_was_newline = false; + } + } + } else { + // NORMAL PROCESSING of non tilde crunched words. + tilde_crunch_written = false; + tesseract_->set_unlv_suspects(word); + const char* wordstr = word->best_choice->unichar_string().string(); + const STRING& lengths = word->best_choice->unichar_lengths(); + int length = lengths.length(); + int i = 0; + int offset = 0; + + if (last_char_was_tilde && + word->word->space() == 0 && wordstr[offset] == ' ') { + // Prevent adjacent tilde across words - we know that adjacent tildes + // within words have been removed. + // Skip the first character. + offset = lengths[i++]; + } + if (i < length && wordstr[offset] != 0) { + if (!last_char_was_newline) + *ptr++ = ' '; + else + last_char_was_newline = false; + for (; i < length; offset += lengths[i++]) { + if (wordstr[offset] == ' ' || + wordstr[offset] == kTesseractReject) { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } else { + if (word->reject_map[i].rejected()) + *ptr++ = kUNLVSuspect; + UNICHAR ch(wordstr + offset, lengths[i]); + int uni_ch = ch.first_uni(); + for (int j = 0; kUniChs[j] != 0; ++j) { + if (kUniChs[j] == uni_ch) { + uni_ch = kLatinChs[j]; + break; + } + } + if (uni_ch <= 0xff) { + *ptr++ = static_cast(uni_ch); + last_char_was_tilde = false; + } else { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } + } } } } + if (word->word->flag(W_EOL) && !last_char_was_newline) { + /* Add a new line output */ + *ptr++ = '\n'; + tilde_crunch_written = false; + last_char_was_newline = true; + last_char_was_tilde = false; + } } - } - if (word->word->flag(W_EOL) && !last_char_was_newline) { - /* Add a new line output */ *ptr++ = '\n'; - tilde_crunch_written = false; - last_char_was_newline = true; - last_char_was_tilde = false; + *ptr = '\0'; + return result; } - } - *ptr++ = '\n'; - *ptr = '\0'; - return result; -} #ifndef DISABLED_LEGACY_ENGINE @@ -2007,103 +2174,103 @@ char* TessBaseAPI::GetUNLVText() { * script_conf is confidence level in the script * Returns true on success and writes values to each parameter as an output */ -bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, - const char** script_name, - float* script_conf) { - OSResults osr; - - bool osd = DetectOS(&osr); - if (!osd) { - return false; - } + bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, + const char** script_name, + float* script_conf) { + OSResults osr; + + bool osd = DetectOS(&osr); + if (!osd) { + return false; + } - int orient_id = osr.best_result.orientation_id; - int script_id = osr.get_best_script(orient_id); - if (orient_conf) *orient_conf = osr.best_result.oconfidence; - if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees + int orient_id = osr.best_result.orientation_id; + int script_id = osr.get_best_script(orient_id); + if (orient_conf) *orient_conf = osr.best_result.oconfidence; + if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees - if (script_name) { - const char* script = osr.unicharset->get_script_from_script_id(script_id); + if (script_name) { + const char* script = osr.unicharset->get_script_from_script_id(script_id); - *script_name = script; - } + *script_name = script; + } - if (script_conf) *script_conf = osr.best_result.sconfidence; + if (script_conf) *script_conf = osr.best_result.sconfidence; - return true; -} + return true; + } /** * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. * page_number is a 0-based page index that will appear in the osd file. */ -char* TessBaseAPI::GetOsdText(int page_number) { - int orient_deg; - float orient_conf; - const char* script_name; - float script_conf; - - if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, - &script_conf)) - return nullptr; - - // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_deg / 90); - - const int kOsdBufsize = 255; - char* osd_buf = new char[kOsdBufsize]; - snprintf(osd_buf, kOsdBufsize, - "Page number: %d\n" - "Orientation in degrees: %d\n" - "Rotate: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name, - script_conf); - - return osd_buf; -} + char* TessBaseAPI::GetOsdText(int page_number) { + int orient_deg; + float orient_conf; + const char* script_name; + float script_conf; + + if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, + &script_conf)) + return nullptr; + + // clockwise rotation needed to make the page upright + int rotate = OrientationIdToValue(orient_deg / 90); + + const int kOsdBufsize = 255; + char* osd_buf = new char[kOsdBufsize]; + snprintf(osd_buf, kOsdBufsize, + "Page number: %d\n" + "Orientation in degrees: %d\n" + "Rotate: %d\n" + "Orientation confidence: %.2f\n" + "Script: %s\n" + "Script confidence: %.2f\n", + page_number, orient_deg, rotate, orient_conf, script_name, + script_conf); + + return osd_buf; + } #endif // ndef DISABLED_LEGACY_ENGINE /** Returns the average word confidence for Tesseract page result. */ -int TessBaseAPI::MeanTextConf() { - int* conf = AllWordConfidences(); - if (!conf) return 0; - int sum = 0; - int *pt = conf; - while (*pt >= 0) sum += *pt++; - if (pt != conf) sum /= pt - conf; - delete [] conf; - return sum; -} + int TessBaseAPI::MeanTextConf() { + int* conf = AllWordConfidences(); + if (!conf) return 0; + int sum = 0; + int *pt = conf; + while (*pt >= 0) sum += *pt++; + if (pt != conf) sum /= pt - conf; + delete [] conf; + return sum; + } /** Returns an array of all word confidences, terminated by -1. */ -int* TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int n_word = 0; - PAGE_RES_IT res_it(page_res_); - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) - n_word++; - - int* conf = new int[n_word+1]; - n_word = 0; - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { - WERD_RES *word = res_it.word(); - WERD_CHOICE* choice = word->best_choice; - int w_conf = static_cast(100 + 5 * choice->certainty()); - // This is the eq for converting Tesseract confidence to 1..100 - if (w_conf < 0) w_conf = 0; - if (w_conf > 100) w_conf = 100; - conf[n_word++] = w_conf; - } - conf[n_word] = -1; - return conf; -} + int* TessBaseAPI::AllWordConfidences() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int n_word = 0; + PAGE_RES_IT res_it(page_res_); + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) + n_word++; + + int* conf = new int[n_word+1]; + n_word = 0; + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { + WERD_RES *word = res_it.word(); + WERD_CHOICE* choice = word->best_choice; + int w_conf = static_cast(100 + 5 * choice->certainty()); + // This is the eq for converting Tesseract confidence to 1..100 + if (w_conf < 0) w_conf = 0; + if (w_conf > 100) w_conf = 100; + conf[n_word++] = w_conf; + } + conf[n_word] = -1; + return conf; + } #ifndef DISABLED_LEGACY_ENGINE /** @@ -2116,61 +2283,61 @@ int* TessBaseAPI::AllWordConfidences() { * The currently set PageSegMode is preserved. * Returns false if adaption was not possible for some reason. */ -bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { - int debug = 0; - GetIntVariable("applybox_debug", &debug); - bool success = true; - PageSegMode current_psm = GetPageSegMode(); - SetPageSegMode(mode); - SetVariable("classify_enable_learning", "0"); - const std::unique_ptr text(GetUTF8Text()); - if (debug) { - tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); - } - if (text != nullptr) { - PAGE_RES_IT it(page_res_); - WERD_RES* word_res = it.word(); - if (word_res != nullptr) { - word_res->word->set_text(wordstr); - // Check to see if text matches wordstr. - int w = 0; - int t; - for (t = 0; text[t] != '\0'; ++t) { - if (text[t] == '\n' || text[t] == ' ') - continue; - while (wordstr[w] == ' ') ++w; - if (text[t] != wordstr[w]) - break; - ++w; + bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { + int debug = 0; + GetIntVariable("applybox_debug", &debug); + bool success = true; + PageSegMode current_psm = GetPageSegMode(); + SetPageSegMode(mode); + SetVariable("classify_enable_learning", "0"); + const std::unique_ptr text(GetUTF8Text()); + if (debug) { + tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); } - if (text[t] != '\0' || wordstr[w] != '\0') { - // No match. - delete page_res_; - GenericVector boxes; - page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); - tesseract_->ReSegmentByClassification(page_res_); - tesseract_->TidyUp(page_res_); - PAGE_RES_IT pr_it(page_res_); - if (pr_it.word() == nullptr) + if (text != nullptr) { + PAGE_RES_IT it(page_res_); + WERD_RES* word_res = it.word(); + if (word_res != nullptr) { + word_res->word->set_text(wordstr); + // Check to see if text matches wordstr. + int w = 0; + int t; + for (t = 0; text[t] != '\0'; ++t) { + if (text[t] == '\n' || text[t] == ' ') + continue; + while (wordstr[w] == ' ') ++w; + if (text[t] != wordstr[w]) + break; + ++w; + } + if (text[t] != '\0' || wordstr[w] != '\0') { + // No match. + delete page_res_; + GenericVector boxes; + page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); + tesseract_->ReSegmentByClassification(page_res_); + tesseract_->TidyUp(page_res_); + PAGE_RES_IT pr_it(page_res_); + if (pr_it.word() == nullptr) + success = false; + else + word_res = pr_it.word(); + } else { + word_res->BestChoiceToCorrectText(); + } + if (success) { + tesseract_->EnableLearning = true; + tesseract_->LearnWord(nullptr, word_res); + } + } else { success = false; - else - word_res = pr_it.word(); + } } else { - word_res->BestChoiceToCorrectText(); - } - if (success) { - tesseract_->EnableLearning = true; - tesseract_->LearnWord(nullptr, word_res); + success = false; } - } else { - success = false; + SetPageSegMode(current_psm); + return success; } - } else { - success = false; - } - SetPageSegMode(current_psm); - return success; -} #endif // ndef DISABLED_LEGACY_ENGINE /** @@ -2179,12 +2346,12 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { * Afterwards, you must call SetImage or TesseractRect before doing * any Recognize or Get* operation. */ -void TessBaseAPI::Clear() { - if (thresholder_ != nullptr) - thresholder_->Clear(); - ClearResults(); - if (tesseract_ != nullptr) SetInputImage(nullptr); -} + void TessBaseAPI::Clear() { + if (thresholder_ != nullptr) + thresholder_->Clear(); + ClearResults(); + if (tesseract_ != nullptr) SetInputImage(nullptr); + } /** * Close down tesseract and free up all memory. End() is equivalent to @@ -2192,100 +2359,100 @@ void TessBaseAPI::Clear() { * Once End() has been used, none of the other API functions may be used * other than Init and anything declared above it in the class definition. */ -void TessBaseAPI::End() { - Clear(); - delete thresholder_; - thresholder_ = nullptr; - delete page_res_; - page_res_ = nullptr; - delete block_list_; - block_list_ = nullptr; - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; - delete tesseract_; - tesseract_ = nullptr; - delete osd_tesseract_; - osd_tesseract_ = nullptr; - delete equ_detect_; - equ_detect_ = nullptr; - delete input_file_; - input_file_ = nullptr; - delete output_file_; - output_file_ = nullptr; - delete datapath_; - datapath_ = nullptr; - delete language_; - language_ = nullptr; -} + void TessBaseAPI::End() { + Clear(); + delete thresholder_; + thresholder_ = nullptr; + delete page_res_; + page_res_ = nullptr; + delete block_list_; + block_list_ = nullptr; + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; + delete tesseract_; + tesseract_ = nullptr; + delete osd_tesseract_; + osd_tesseract_ = nullptr; + delete equ_detect_; + equ_detect_ = nullptr; + delete input_file_; + input_file_ = nullptr; + delete output_file_; + output_file_ = nullptr; + delete datapath_; + datapath_ = nullptr; + delete language_; + language_ = nullptr; + } // Clear any library-level memory caches. // There are a variety of expensive-to-load constant data structures (mostly // language dictionaries) that are cached globally -- surviving the Init() // and End() of individual TessBaseAPI's. This function allows the clearing // of these caches. -void TessBaseAPI::ClearPersistentCache() { - Dict::GlobalDawgCache()->DeleteUnusedDawgs(); -} + void TessBaseAPI::ClearPersistentCache() { + Dict::GlobalDawgCache()->DeleteUnusedDawgs(); + } /** * Check whether a word is valid according to Tesseract's language model * returns 0 if the word is invalid, non-zero if valid */ -int TessBaseAPI::IsValidWord(const char *word) { - return tesseract_->getDict().valid_word(word); -} + int TessBaseAPI::IsValidWord(const char *word) { + return tesseract_->getDict().valid_word(word); + } // Returns true if utf8_character is defined in the UniCharset. -bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { - return tesseract_->unicharset.contains_unichar(utf8_character); -} + bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { + return tesseract_->unicharset.contains_unichar(utf8_character); + } // TODO(rays) Obsolete this function and replace with a more aptly named // function that returns image coordinates rather than tesseract coordinates. -bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { - PageIterator* it = AnalyseLayout(); - if (it == nullptr) { - return false; - } - int x1, x2, y1, y2; - it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); - // Calculate offset and slope (NOTE: Kind of ugly) - if (x2 <= x1) x2 = x1 + 1; - // Convert the point pair to slope/offset of the baseline (in image coords.) - *out_slope = static_cast(y2 - y1) / (x2 - x1); - *out_offset = static_cast(y1 - *out_slope * x1); - // Get the y-coord of the baseline at the left and right edges of the - // textline's bounding box. - int left, top, right, bottom; - if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { - delete it; - return false; - } - int left_y = IntCastRounded(*out_slope * left + *out_offset); - int right_y = IntCastRounded(*out_slope * right + *out_offset); - // Shift the baseline down so it passes through the nearest bottom-corner - // of the textline's bounding box. This is the difference between the y - // at the lowest (max) edge of the box and the actual box bottom. - *out_offset += bottom - std::max(left_y, right_y); - // Switch back to bottom-up tesseract coordinates. Requires negation of - // the slope and height - offset for the offset. - *out_slope = -*out_slope; - *out_offset = rect_height_ - *out_offset; - delete it; - - return true; -} + bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { + PageIterator* it = AnalyseLayout(); + if (it == nullptr) { + return false; + } + int x1, x2, y1, y2; + it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); + // Calculate offset and slope (NOTE: Kind of ugly) + if (x2 <= x1) x2 = x1 + 1; + // Convert the point pair to slope/offset of the baseline (in image coords.) + *out_slope = static_cast(y2 - y1) / (x2 - x1); + *out_offset = static_cast(y1 - *out_slope * x1); + // Get the y-coord of the baseline at the left and right edges of the + // textline's bounding box. + int left, top, right, bottom; + if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { + delete it; + return false; + } + int left_y = IntCastRounded(*out_slope * left + *out_offset); + int right_y = IntCastRounded(*out_slope * right + *out_offset); + // Shift the baseline down so it passes through the nearest bottom-corner + // of the textline's bounding box. This is the difference between the y + // at the lowest (max) edge of the box and the actual box bottom. + *out_offset += bottom - std::max(left_y, right_y); + // Switch back to bottom-up tesseract coordinates. Requires negation of + // the slope and height - offset for the offset. + *out_slope = -*out_slope; + *out_offset = rect_height_ - *out_offset; + delete it; + + return true; + } /** Sets Dict::letter_is_okay_ function to point to the given function. */ -void TessBaseAPI::SetDictFunc(DictFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().letter_is_okay_ = f; - } -} + void TessBaseAPI::SetDictFunc(DictFunc f) { + if (tesseract_ != nullptr) { + tesseract_->getDict().letter_is_okay_ = f; + } + } /** * Sets Dict::probability_in_context_ function to point to the given @@ -2295,35 +2462,35 @@ void TessBaseAPI::SetDictFunc(DictFunc f) { * "character" (in general a utf-8 string), given the context of a previous * utf-8 string. */ -void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().probability_in_context_ = f; - // Set it for the sublangs too. - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) { - tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { + if (tesseract_ != nullptr) { + tesseract_->getDict().probability_in_context_ = f; + // Set it for the sublangs too. + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) { + tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + } + } } - } -} #ifndef DISABLED_LEGACY_ENGINE /** Sets Wordrec::fill_lattice_ function to point to the given function. */ -void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { - if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; -} + void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { + if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; + } #endif // ndef DISABLED_LEGACY_ENGINE /** Common code for setting the image. */ -bool TessBaseAPI::InternalSetImage() { - if (tesseract_ == nullptr) { - tprintf("Please call Init before attempting to set an image.\n"); - return false; - } - if (thresholder_ == nullptr) - thresholder_ = new ImageThresholder; - ClearResults(); - return true; -} + bool TessBaseAPI::InternalSetImage() { + if (tesseract_ == nullptr) { + tprintf("Please call Init before attempting to set an image.\n"); + return false; + } + if (thresholder_ == nullptr) + thresholder_ = new ImageThresholder; + ClearResults(); + return true; + } /** * Run the thresholder to make the thresholded image, returned in pix, @@ -2331,155 +2498,155 @@ bool TessBaseAPI::InternalSetImage() { * to an existing pixDestroyable Pix. * The usual argument to Threshold is Tesseract::mutable_pix_binary(). */ -bool TessBaseAPI::Threshold(Pix** pix) { - ASSERT_HOST(pix != nullptr); - if (*pix != nullptr) - pixDestroy(pix); - // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = 0; - bool a = GetIntVariable("user_defined_dpi", &user_dpi); - int y_res = thresholder_->GetScaledYResolution(); - if (user_dpi && (user_dpi < kMinCredibleResolution || - user_dpi > kMaxCredibleResolution)) { - tprintf("Warning: User defined image dpi is outside of expected range " - "(%d - %d)!\n", - kMinCredibleResolution, kMaxCredibleResolution); - } - // Always use user defined dpi - if (user_dpi) { - thresholder_->SetSourceYResolution(user_dpi); - } else if (y_res < kMinCredibleResolution || - y_res > kMaxCredibleResolution) { - tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", - y_res, kMinCredibleResolution); - thresholder_->SetSourceYResolution(kMinCredibleResolution); - } - PageSegMode pageseg_mode = - static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); - if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; - thresholder_->GetImageSizes(&rect_left_, &rect_top_, - &rect_width_, &rect_height_, - &image_width_, &image_height_); - if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); - } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); - } - // Set the internal resolution that is used for layout parameters from the - // estimated resolution, rather than the image resolution, which may be - // fabricated, but we will use the image resolution, if there is one, to - // report output point sizes. - int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), - kMinCredibleResolution, - kMaxCredibleResolution); - if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { - tprintf("Estimated internal resolution %d out of range! " - "Corrected to %d.\n", - thresholder_->GetScaledEstimatedResolution(), estimated_res); - } - tesseract_->set_source_resolution(estimated_res); - SavePixForCrash(estimated_res, *pix); - return true; -} + bool TessBaseAPI::Threshold(Pix** pix) { + ASSERT_HOST(pix != nullptr); + if (*pix != nullptr) + pixDestroy(pix); + // Zero resolution messes up the algorithms, so make sure it is credible. + int user_dpi = 0; + bool a = GetIntVariable("user_defined_dpi", &user_dpi); + int y_res = thresholder_->GetScaledYResolution(); + if (user_dpi && (user_dpi < kMinCredibleResolution || + user_dpi > kMaxCredibleResolution)) { + tprintf("Warning: User defined image dpi is outside of expected range " + "(%d - %d)!\n", + kMinCredibleResolution, kMaxCredibleResolution); + } + // Always use user defined dpi + if (user_dpi) { + thresholder_->SetSourceYResolution(user_dpi); + } else if (y_res < kMinCredibleResolution || + y_res > kMaxCredibleResolution) { + tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", + y_res, kMinCredibleResolution); + thresholder_->SetSourceYResolution(kMinCredibleResolution); + } + PageSegMode pageseg_mode = + static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); + if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; + thresholder_->GetImageSizes(&rect_left_, &rect_top_, + &rect_width_, &rect_height_, + &image_width_, &image_height_); + if (!thresholder_->IsBinary()) { + tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + } else { + tesseract_->set_pix_thresholds(nullptr); + tesseract_->set_pix_grey(nullptr); + } + // Set the internal resolution that is used for layout parameters from the + // estimated resolution, rather than the image resolution, which may be + // fabricated, but we will use the image resolution, if there is one, to + // report output point sizes. + int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), + kMinCredibleResolution, + kMaxCredibleResolution); + if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { + tprintf("Estimated internal resolution %d out of range! " + "Corrected to %d.\n", + thresholder_->GetScaledEstimatedResolution(), estimated_res); + } + tesseract_->set_source_resolution(estimated_res); + SavePixForCrash(estimated_res, *pix); + return true; + } /** Find lines from the image making the BLOCK_LIST. */ -int TessBaseAPI::FindLines() { - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (recognition_done_) - ClearResults(); - if (!block_list_->empty()) { - return 0; - } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; - #ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); - #endif - } - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return -1; - } + int TessBaseAPI::FindLines() { + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (recognition_done_) + ClearResults(); + if (!block_list_->empty()) { + return 0; + } + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; +#ifndef DISABLED_LEGACY_ENGINE + tesseract_->InitAdaptiveClassifier(nullptr); +#endif + } + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return -1; + } - tesseract_->PrepareForPageseg(); + tesseract_->PrepareForPageseg(); #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->textord_equation_detect) { - if (equ_detect_ == nullptr && datapath_ != nullptr) { - equ_detect_ = new EquationDetect(datapath_->string(), nullptr); - } - if (equ_detect_ == nullptr) { - tprintf("Warning: Could not set equation detector\n"); - } else { - tesseract_->SetEquationDetect(equ_detect_); - } - } + if (tesseract_->textord_equation_detect) { + if (equ_detect_ == nullptr && datapath_ != nullptr) { + equ_detect_ = new EquationDetect(datapath_->string(), nullptr); + } + if (equ_detect_ == nullptr) { + tprintf("Warning: Could not set equation detector\n"); + } else { + tesseract_->SetEquationDetect(equ_detect_); + } + } #endif // ndef DISABLED_LEGACY_ENGINE - Tesseract* osd_tess = osd_tesseract_; - OSResults osr; - if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && - osd_tess == nullptr) { - if (strcmp(language_->string(), "osd") == 0) { - osd_tess = tesseract_; - } else { - osd_tesseract_ = new Tesseract; - TessdataManager mgr(reader_); - if (datapath_ == nullptr) { - tprintf("Warning: Auto orientation and script detection requested," - " but data path is undefined\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, - "osd", OEM_TESSERACT_ONLY, - nullptr, 0, nullptr, nullptr, - false, &mgr) == 0) { - osd_tess = osd_tesseract_; - osd_tesseract_->set_source_resolution( - thresholder_->GetSourceYResolution()); - } else { - tprintf("Warning: Auto orientation and script detection requested," - " but osd language failed to load\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; + Tesseract* osd_tess = osd_tesseract_; + OSResults osr; + if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && + osd_tess == nullptr) { + if (strcmp(language_->string(), "osd") == 0) { + osd_tess = tesseract_; + } else { + osd_tesseract_ = new Tesseract; + TessdataManager mgr(reader_); + if (datapath_ == nullptr) { + tprintf("Warning: Auto orientation and script detection requested," + " but data path is undefined\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, + "osd", OEM_TESSERACT_ONLY, + nullptr, 0, nullptr, nullptr, + false, &mgr) == 0) { + osd_tess = osd_tesseract_; + osd_tesseract_->set_source_resolution( + thresholder_->GetSourceYResolution()); + } else { + tprintf("Warning: Auto orientation and script detection requested," + " but osd language failed to load\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } + } } - } - } - if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) - return -1; + if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) + return -1; - // If Devanagari is being recognized, we use different images for page seg - // and for OCR. - tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); - return 0; -} + // If Devanagari is being recognized, we use different images for page seg + // and for OCR. + tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); + return 0; + } /** Delete the pageres and clear the block list ready for a new page. */ -void TessBaseAPI::ClearResults() { - if (tesseract_ != nullptr) { - tesseract_->Clear(); - } - delete page_res_; - page_res_ = nullptr; - recognition_done_ = false; - if (block_list_ == nullptr) - block_list_ = new BLOCK_LIST; - else - block_list_->clear(); - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - SavePixForCrash(0, nullptr); -} + void TessBaseAPI::ClearResults() { + if (tesseract_ != nullptr) { + tesseract_->Clear(); + } + delete page_res_; + page_res_ = nullptr; + recognition_done_ = false; + if (block_list_ == nullptr) + block_list_ = new BLOCK_LIST; + else + block_list_->clear(); + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + SavePixForCrash(0, nullptr); + } /** * Return the length of the output text string, as UTF8, assuming @@ -2488,55 +2655,55 @@ void TessBaseAPI::ClearResults() { * character. * Also return the number of recognized blobs in blob_count. */ -int TessBaseAPI::TextLength(int* blob_count) { - if (tesseract_ == nullptr || page_res_ == nullptr) - return 0; - - PAGE_RES_IT page_res_it(page_res_); - int total_length = 2; - int total_blobs = 0; - // Iterate over the data structures to extract the recognition result. - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - WERD_CHOICE* choice = word->best_choice; - if (choice != nullptr) { - total_blobs += choice->length() + 2; - total_length += choice->unichar_string().length() + 2; - for (int i = 0; i < word->reject_map.length(); ++i) { - if (word->reject_map[i].rejected()) - ++total_length; + int TessBaseAPI::TextLength(int* blob_count) { + if (tesseract_ == nullptr || page_res_ == nullptr) + return 0; + + PAGE_RES_IT page_res_it(page_res_); + int total_length = 2; + int total_blobs = 0; + // Iterate over the data structures to extract the recognition result. + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + WERD_CHOICE* choice = word->best_choice; + if (choice != nullptr) { + total_blobs += choice->length() + 2; + total_length += choice->unichar_string().length() + 2; + for (int i = 0; i < word->reject_map.length(); ++i) { + if (word->reject_map[i].rejected()) + ++total_length; + } + } } + if (blob_count != nullptr) + *blob_count = total_blobs; + return total_length; } - } - if (blob_count != nullptr) - *blob_count = total_blobs; - return total_length; -} #ifndef DISABLED_LEGACY_ENGINE /** * Estimates the Orientation And Script of the image. * Returns true if the image was processed successfully. */ -bool TessBaseAPI::DetectOS(OSResults* osr) { - if (tesseract_ == nullptr) - return false; - ClearResults(); - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return false; - } + bool TessBaseAPI::DetectOS(OSResults* osr) { + if (tesseract_ == nullptr) + return false; + ClearResults(); + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return false; + } - if (input_file_ == nullptr) - input_file_ = new STRING(kInputFile); - return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; -} + if (input_file_ == nullptr) + input_file_ = new STRING(kInputFile); + return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; + } #endif // ndef DISABLED_LEGACY_ENGINE -void TessBaseAPI::set_min_orientation_margin(double margin) { - tesseract_->min_orientation_margin.set_value(margin); -} + void TessBaseAPI::set_min_orientation_margin(double margin) { + tesseract_->min_orientation_margin.set_value(margin); + } /** * Return text orientation of each block as determined in an earlier page layout @@ -2552,98 +2719,98 @@ void TessBaseAPI::set_min_orientation_margin(double margin) { * be less than the total number of blocks. The ordering is intended to be * consistent with GetTextLines(). */ -void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, - bool** vertical_writing) { - delete[] *block_orientation; - *block_orientation = nullptr; - delete[] *vertical_writing; - *vertical_writing = nullptr; - BLOCK_IT block_it(block_list_); - - block_it.move_to_first(); - int num_blocks = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - ++num_blocks; - } - if (!num_blocks) { - tprintf("WARNING: Found no blocks\n"); - return; - } - *block_orientation = new int[num_blocks]; - *vertical_writing = new bool[num_blocks]; - block_it.move_to_first(); - int i = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - FCOORD re_rotation = block_it.data()->re_rotation(); - float re_theta = re_rotation.angle(); - FCOORD classify_rotation = block_it.data()->classify_rotation(); - float classify_theta = classify_rotation.angle(); - double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; - if (rot_theta < 0) rot_theta += 4; - int num_rotations = static_cast(rot_theta + 0.5); - (*block_orientation)[i] = num_rotations; - // The classify_rotation is non-zero only if the text has vertical - // writing direction. - (*vertical_writing)[i] = classify_rotation.y() != 0.0f; - ++i; - } -} - - -void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { - int debug_level = 0; - GetIntVariable("paragraph_debug_level", &debug_level); - if (paragraph_models_ == nullptr) - paragraph_models_ = new GenericVector; - MutableIterator *result_it = GetMutableIterator(); - do { // Detect paragraphs for this block - GenericVector models; - ::tesseract::DetectParagraphs(debug_level, after_text_recognition, - result_it, &models); - *paragraph_models_ += models; - } while (result_it->Next(RIL_BLOCK)); - delete result_it; -} + void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing) { + delete[] *block_orientation; + *block_orientation = nullptr; + delete[] *vertical_writing; + *vertical_writing = nullptr; + BLOCK_IT block_it(block_list_); + + block_it.move_to_first(); + int num_blocks = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + ++num_blocks; + } + if (!num_blocks) { + tprintf("WARNING: Found no blocks\n"); + return; + } + *block_orientation = new int[num_blocks]; + *vertical_writing = new bool[num_blocks]; + block_it.move_to_first(); + int i = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + FCOORD re_rotation = block_it.data()->re_rotation(); + float re_theta = re_rotation.angle(); + FCOORD classify_rotation = block_it.data()->classify_rotation(); + float classify_theta = classify_rotation.angle(); + double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; + if (rot_theta < 0) rot_theta += 4; + int num_rotations = static_cast(rot_theta + 0.5); + (*block_orientation)[i] = num_rotations; + // The classify_rotation is non-zero only if the text has vertical + // writing direction. + (*vertical_writing)[i] = classify_rotation.y() != 0.0f; + ++i; + } + } + + + void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { + int debug_level = 0; + GetIntVariable("paragraph_debug_level", &debug_level); + if (paragraph_models_ == nullptr) + paragraph_models_ = new GenericVector; + MutableIterator *result_it = GetMutableIterator(); + do { // Detect paragraphs for this block + GenericVector models; + ::tesseract::DetectParagraphs(debug_level, after_text_recognition, + result_it, &models); + *paragraph_models_ += models; + } while (result_it->Next(RIL_BLOCK)); + delete result_it; + } /** This method returns the string form of the specified unichar. */ -const char* TessBaseAPI::GetUnichar(int unichar_id) { - return tesseract_->unicharset.id_to_unichar(unichar_id); -} + const char* TessBaseAPI::GetUnichar(int unichar_id) { + return tesseract_->unicharset.id_to_unichar(unichar_id); + } /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ -const Dawg *TessBaseAPI::GetDawg(int i) const { - if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; - return tesseract_->getDict().GetDawg(i); -} + const Dawg *TessBaseAPI::GetDawg(int i) const { + if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; + return tesseract_->getDict().GetDawg(i); + } /** Return the number of dawgs loaded into tesseract_ object. */ -int TessBaseAPI::NumDawgs() const { - return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); -} + int TessBaseAPI::NumDawgs() const { + return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); + } /** Escape a char string - remove <>&"' with HTML codes. */ -STRING HOcrEscape(const char* text) { - STRING ret; - const char *ptr; - for (ptr = text; *ptr; ptr++) { - switch (*ptr) { - case '<': ret += "<"; break; - case '>': ret += ">"; break; - case '&': ret += "&"; break; - case '"': ret += """; break; - case '\'': ret += "'"; break; - default: ret += *ptr; + STRING HOcrEscape(const char* text) { + STRING ret; + const char *ptr; + for (ptr = text; *ptr; ptr++) { + switch (*ptr) { + case '<': ret += "<"; break; + case '>': ret += ">"; break; + case '&': ret += "&"; break; + case '"': ret += """; break; + case '\'': ret += "'"; break; + default: ret += *ptr; + } + } + return ret; } - } - return ret; -} #ifndef DISABLED_LEGACY_ENGINE @@ -2653,271 +2820,271 @@ STRING HOcrEscape(const char* text) { // Ocropus add-ons. /** Find lines from the image making the BLOCK_LIST. */ -BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { - ASSERT_HOST(FindLines() == 0); - BLOCK_LIST* result = block_list_; - block_list_ = nullptr; - return result; -} + BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { + ASSERT_HOST(FindLines() == 0); + BLOCK_LIST* result = block_list_; + block_list_ = nullptr; + return result; + } /** * Delete a block list. * This is to keep BLOCK_LIST pointer opaque * and let go of including the other headers. */ -void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { - delete block_list; -} - - -ROW *TessBaseAPI::MakeTessOCRRow(float baseline, - float xheight, - float descender, - float ascender) { - int32_t xstarts[] = {-32000}; - double quad_coeffs[] = {0, 0, baseline}; - return new ROW(1, - xstarts, - quad_coeffs, - xheight, - ascender - (baseline + xheight), - descender - baseline, - 0, - 0); -} + void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { + delete block_list; + } + + + ROW *TessBaseAPI::MakeTessOCRRow(float baseline, + float xheight, + float descender, + float ascender) { + int32_t xstarts[] = {-32000}; + double quad_coeffs[] = {0, 0, baseline}; + return new ROW(1, + xstarts, + quad_coeffs, + xheight, + ascender - (baseline + xheight), + descender - baseline, + 0, + 0); + } /** Creates a TBLOB* from the whole pix. */ -TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); - - // Create C_BLOBs from the page - extract_edges(pix, &block); - - // Merge all C_BLOBs - C_BLOB_LIST *list = block.blob_list(); - C_BLOB_IT c_blob_it(list); - if (c_blob_it.empty()) - return nullptr; - // Move all the outlines to the first blob. - C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); - for (c_blob_it.forward(); - !c_blob_it.at_first(); - c_blob_it.forward()) { - C_BLOB *c_blob = c_blob_it.data(); - ol_it.add_list_after(c_blob->out_list()); - } - // Convert the first blob to the output TBLOB. - return TBLOB::PolygonalCopy(false, c_blob_it.data()); -} + TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); + + // Create C_BLOBs from the page + extract_edges(pix, &block); + + // Merge all C_BLOBs + C_BLOB_LIST *list = block.blob_list(); + C_BLOB_IT c_blob_it(list); + if (c_blob_it.empty()) + return nullptr; + // Move all the outlines to the first blob. + C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); + for (c_blob_it.forward(); + !c_blob_it.at_first(); + c_blob_it.forward()) { + C_BLOB *c_blob = c_blob_it.data(); + ol_it.add_list_after(c_blob->out_list()); + } + // Convert the first blob to the output TBLOB. + return TBLOB::PolygonalCopy(false, c_blob_it.data()); + } /** * This method baseline normalizes a TBLOB in-place. The input row is used * for normalization. The denorm is an optional parameter in which the * normalization-antidote is returned. */ -void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { - TBOX box = tblob->bounding_box(); - float x_center = (box.left() + box.right()) / 2.0f; - float baseline = row->base_line(x_center); - float scale = kBlnXHeight / row->x_height(); - tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); -} + void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { + TBOX box = tblob->bounding_box(); + float x_center = (box.left() + box.right()) / 2.0f; + float baseline = row->base_line(x_center); + float scale = kBlnXHeight / row->x_height(); + tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, + 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); + } /** * Return a TBLOB * from the whole pix. * To be freed later with delete. */ -static TBLOB *make_tesseract_blob(float baseline, float xheight, - float descender, float ascender, - bool numeric_mode, Pix* pix) { - TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); - - // Normalize TBLOB - ROW *row = - TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); - delete row; - return tblob; -} + static TBLOB *make_tesseract_blob(float baseline, float xheight, + float descender, float ascender, + bool numeric_mode, Pix* pix) { + TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); + + // Normalize TBLOB + ROW *row = + TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); + delete row; + return tblob; + } /** * Adapt to recognize the current image as the given character. * The image must be preloaded into pix_binary_ and be just an image * of a single character. */ -void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender) { - UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, - tesseract_->classify_bln_numeric_mode, - tesseract_->pix_binary()); - float threshold; - float best_rating = -100; - - - // Classify to get a raw choice. - BLOB_CHOICE_LIST choices; - tesseract_->AdaptiveClassifier(blob, &choices); - BLOB_CHOICE_IT choice_it; - choice_it.set_to_list(&choices); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - if (choice_it.data()->rating() > best_rating) { - best_rating = choice_it.data()->rating(); - } - } + void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, + int length, + float baseline, + float xheight, + float descender, + float ascender) { + UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); + TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, + tesseract_->classify_bln_numeric_mode, + tesseract_->pix_binary()); + float threshold; + float best_rating = -100; + + + // Classify to get a raw choice. + BLOB_CHOICE_LIST choices; + tesseract_->AdaptiveClassifier(blob, &choices); + BLOB_CHOICE_IT choice_it; + choice_it.set_to_list(&choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + if (choice_it.data()->rating() > best_rating) { + best_rating = choice_it.data()->rating(); + } + } - threshold = tesseract_->matcher_good_threshold; + threshold = tesseract_->matcher_good_threshold; + + if (blob->outlines) + tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, + tesseract_->AdaptedTemplates); + delete blob; + } - if (blob->outlines) - tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, - tesseract_->AdaptedTemplates); - delete blob; -} + PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { + PAGE_RES *page_res = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); + return page_res; + } -PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - PAGE_RES *page_res = new PAGE_RES(false, block_list, + PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, + PAGE_RES* pass1_result) { + if (!pass1_result) + pass1_result = new PAGE_RES(false, block_list, &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); - return page_res; -} - -PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result) { - if (!pass1_result) - pass1_result = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); - return pass1_result; -} - -struct TESS_CHAR : ELIST_LINK { - char *unicode_repr; - int length; // of unicode_repr - float cost; - TBOX box; - - TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { - length = (len == -1 ? strlen(repr) : len); - unicode_repr = new char[length + 1]; - strncpy(unicode_repr, repr, length); - } + tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); + return pass1_result; + } - TESS_CHAR() - : unicode_repr(nullptr), - length(0), - cost(0.0f) - { // Satisfies ELISTIZE. - } - ~TESS_CHAR() { - delete [] unicode_repr; - } -}; + struct TESS_CHAR : ELIST_LINK { + char *unicode_repr; + int length; // of unicode_repr + float cost; + TBOX box; + + TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { + length = (len == -1 ? strlen(repr) : len); + unicode_repr = new char[length + 1]; + strncpy(unicode_repr, repr, length); + } -ELISTIZEH(TESS_CHAR) -ELISTIZE(TESS_CHAR) + TESS_CHAR() + : unicode_repr(nullptr), + length(0), + cost(0.0f) + { // Satisfies ELISTIZE. + } + ~TESS_CHAR() { + delete [] unicode_repr; + } + }; -static void add_space(TESS_CHAR_IT* it) { - TESS_CHAR *t = new TESS_CHAR(0, " "); - it->add_after_then_move(t); -} + ELISTIZEH(TESS_CHAR) + ELISTIZE(TESS_CHAR) + static void add_space(TESS_CHAR_IT* it) { + TESS_CHAR *t = new TESS_CHAR(0, " "); + it->add_after_then_move(t); + } -static float rating_to_cost(float rating) { - rating = 100 + rating; - // cuddled that to save from coverage profiler - // (I have never seen ratings worse than -100, - // but the check won't hurt) - if (rating < 0) rating = 0; - return rating; -} + + static float rating_to_cost(float rating) { + rating = 100 + rating; + // cuddled that to save from coverage profiler + // (I have never seen ratings worse than -100, + // but the check won't hurt) + if (rating < 0) rating = 0; + return rating; + } /** * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ -static void extract_result(TESS_CHAR_IT* out, - PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - int word_count = 0; - while (page_res_it.word() != nullptr) { - WERD_RES *word = page_res_it.word(); - const char *str = word->best_choice->unichar_string().string(); - const char *len = word->best_choice->unichar_lengths().string(); - TBOX real_rect = word->word->bounding_box(); - - if (word_count) - add_space(out); - int n = strlen(len); - for (int i = 0; i < n; i++) { - TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), - str, *len); - tc->box = real_rect.intersection(word->box_word->BlobBox(i)); - out->add_after_then_move(tc); - str += *len; - len++; - } - page_res_it.forward(); - word_count++; - } -} + static void extract_result(TESS_CHAR_IT* out, + PAGE_RES* page_res) { + PAGE_RES_IT page_res_it(page_res); + int word_count = 0; + while (page_res_it.word() != nullptr) { + WERD_RES *word = page_res_it.word(); + const char *str = word->best_choice->unichar_string().string(); + const char *len = word->best_choice->unichar_lengths().string(); + TBOX real_rect = word->word->bounding_box(); + + if (word_count) + add_space(out); + int n = strlen(len); + for (int i = 0; i < n; i++) { + TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), + str, *len); + tc->box = real_rect.intersection(word->box_word->BlobBox(i)); + out->add_after_then_move(tc); + str += *len; + len++; + } + page_res_it.forward(); + word_count++; + } + } /** * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ -int TessBaseAPI::TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res) { - TESS_CHAR_LIST tess_chars; - TESS_CHAR_IT tess_chars_it(&tess_chars); - extract_result(&tess_chars_it, page_res); - tess_chars_it.move_to_first(); - int n = tess_chars.length(); - int text_len = 0; - *lengths = new int[n]; - *costs = new float[n]; - *x0 = new int[n]; - *y0 = new int[n]; - *x1 = new int[n]; - *y1 = new int[n]; - int i = 0; - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward(), i++) { - TESS_CHAR *tc = tess_chars_it.data(); - text_len += (*lengths)[i] = tc->length; - (*costs)[i] = tc->cost; - (*x0)[i] = tc->box.left(); - (*y0)[i] = tc->box.bottom(); - (*x1)[i] = tc->box.right(); - (*y1)[i] = tc->box.top(); - } - char *p = *text = new char[text_len]; - - tess_chars_it.move_to_first(); - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward()) { - TESS_CHAR *tc = tess_chars_it.data(); - strncpy(p, tc->unicode_repr, tc->length); - p += tc->length; - } - return n; -} + int TessBaseAPI::TesseractExtractResult(char** text, + int** lengths, + float** costs, + int** x0, + int** y0, + int** x1, + int** y1, + PAGE_RES* page_res) { + TESS_CHAR_LIST tess_chars; + TESS_CHAR_IT tess_chars_it(&tess_chars); + extract_result(&tess_chars_it, page_res); + tess_chars_it.move_to_first(); + int n = tess_chars.length(); + int text_len = 0; + *lengths = new int[n]; + *costs = new float[n]; + *x0 = new int[n]; + *y0 = new int[n]; + *x1 = new int[n]; + *y1 = new int[n]; + int i = 0; + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward(), i++) { + TESS_CHAR *tc = tess_chars_it.data(); + text_len += (*lengths)[i] = tc->length; + (*costs)[i] = tc->cost; + (*x0)[i] = tc->box.left(); + (*y0)[i] = tc->box.bottom(); + (*x1)[i] = tc->box.right(); + (*y1)[i] = tc->box.top(); + } + char *p = *text = new char[text_len]; + + tess_chars_it.move_to_first(); + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward()) { + TESS_CHAR *tc = tess_chars_it.data(); + strncpy(p, tc->unicode_repr, tc->length); + p += tc->length; + } + return n; + } /** This method returns the features associated with the input blob. */ // The resulting features are returned in int_features, which must be @@ -2926,80 +3093,80 @@ int TessBaseAPI::TesseractExtractResult(char** text, // On return feature_outline_index is filled with an index of the outline // corresponding to each feature in int_features. // TODO(rays) Fix the caller to out outline_counts instead. -void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, - INT_FEATURE_STRUCT* int_features, - int* num_features, - int* feature_outline_index) { - GenericVector outline_counts; - GenericVector bl_features; - GenericVector cn_features; - INT_FX_RESULT_STRUCT fx_info; - tesseract_->ExtractFeatures(*blob, false, &bl_features, - &cn_features, &fx_info, &outline_counts); - if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { - *num_features = 0; - return; // Feature extraction failed. - } - *num_features = cn_features.size(); - memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); - // TODO(rays) Pass outline_counts back and simplify the calling code. - if (feature_outline_index != nullptr) { - int f = 0; - for (int i = 0; i < outline_counts.size(); ++i) { - while (f < outline_counts[i]) - feature_outline_index[f++] = i; + void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, + INT_FEATURE_STRUCT* int_features, + int* num_features, + int* feature_outline_index) { + GenericVector outline_counts; + GenericVector bl_features; + GenericVector cn_features; + INT_FX_RESULT_STRUCT fx_info; + tesseract_->ExtractFeatures(*blob, false, &bl_features, + &cn_features, &fx_info, &outline_counts); + if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { + *num_features = 0; + return; // Feature extraction failed. + } + *num_features = cn_features.size(); + memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); + // TODO(rays) Pass outline_counts back and simplify the calling code. + if (feature_outline_index != nullptr) { + int f = 0; + for (int i = 0; i < outline_counts.size(); ++i) { + while (f < outline_counts[i]) + feature_outline_index[f++] = i; + } + } } - } -} // This method returns the row to which a box of specified dimensions would // belong. If no good match is found, it returns nullptr. -ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, - int left, int top, int right, int bottom) { - TBOX box(left, bottom, right, top); - BLOCK_IT b_it(blocks); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (box.major_overlap(word->bounding_box())) - return row; + ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, + int left, int top, int right, int bottom) { + TBOX box(left, bottom, right, top); + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + if (!box.major_overlap(block->pdblk.bounding_box())) + continue; + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { + ROW* row = r_it.data(); + if (!box.major_overlap(row->bounding_box())) + continue; + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (box.major_overlap(word->bounding_box())) + return row; + } + } } + return nullptr; } - } - return nullptr; -} /** Method to run adaptive classifier on a blob. */ -void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned) { - BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; - tesseract_->AdaptiveClassifier(blob, choices); - BLOB_CHOICE_IT choices_it(choices); - int& index = *num_matches_returned; - index = 0; - for (choices_it.mark_cycle_pt(); - !choices_it.cycled_list() && index < num_max_matches; - choices_it.forward()) { - BLOB_CHOICE* choice = choices_it.data(); - unichar_ids[index] = choice->unichar_id(); - ratings[index] = choice->rating(); - ++index; - } - *num_matches_returned = index; - delete choices; -} + void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, + int num_max_matches, + int* unichar_ids, + float* ratings, + int* num_matches_returned) { + BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; + tesseract_->AdaptiveClassifier(blob, choices); + BLOB_CHOICE_IT choices_it(choices); + int& index = *num_matches_returned; + index = 0; + for (choices_it.mark_cycle_pt(); + !choices_it.cycled_list() && index < num_max_matches; + choices_it.forward()) { + BLOB_CHOICE* choice = choices_it.data(); + unichar_ids[index] = choice->unichar_id(); + ratings[index] = choice->rating(); + ++index; + } + *num_matches_returned = index; + delete choices; + } #endif // ndef DISABLED_LEGACY_ENGINE } // namespace tesseract. diff --git a/src/api/baseapi.h b/src/api/baseapi.h index da12d647b8..6da486011a 100644 --- a/src/api/baseapi.h +++ b/src/api/baseapi.h @@ -61,34 +61,34 @@ struct TBLOB; namespace tesseract { -class Dawg; -class Dict; -class EquationDetect; -class PageIterator; -class LTRResultIterator; -class ResultIterator; -class MutableIterator; -class TessResultRenderer; -class Tesseract; -class Trie; -class Wordrec; - -typedef int (Dict::*DictFunc)(void* void_dawg_args, - const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, bool word_end) const; -typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, - const char* context, - int context_bytes, - const char* character, - int character_bytes); -typedef float (Dict::*ParamsModelClassifyFunc)( - const char *lang, void *path); -typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle); -typedef TessCallback4 - TruthCallback; + class Dawg; + class Dict; + class EquationDetect; + class PageIterator; + class LTRResultIterator; + class ResultIterator; + class MutableIterator; + class TessResultRenderer; + class Tesseract; + class Trie; + class Wordrec; + + typedef int (Dict::*DictFunc)(void* void_dawg_args, + const UNICHARSET& unicharset, + UNICHAR_ID unichar_id, bool word_end) const; + typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, + const char* context, + int context_bytes, + const char* character, + int character_bytes); + typedef float (Dict::*ParamsModelClassifyFunc)( + const char *lang, void *path); + typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, + const WERD_CHOICE_LIST &best_choices, + const UNICHARSET &unicharset, + BlamerBundle *blamer_bundle); + typedef TessCallback4 + TruthCallback; /** * Base class for all tesseract APIs. @@ -98,829 +98,841 @@ typedef TessCallback4 * class to hide the data types so that users of this class don't have to * include any other Tesseract headers. */ -class TESS_API TessBaseAPI { - public: - TessBaseAPI(); - virtual ~TessBaseAPI(); - - /** - * Returns the version identifier as a static string. Do not delete. - */ - static const char* Version(); - - /** - * If compiled with OpenCL AND an available OpenCL - * device is deemed faster than serial code, then - * "device" is populated with the cl_device_id - * and returns sizeof(cl_device_id) - * otherwise *device=nullptr and returns 0. - */ - static size_t getOpenCLDevice(void **device); - - /** - * Writes the thresholded image to stderr as a PBM file on receipt of a - * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). - */ - static void CatchSignals(); - - /** - * Set the name of the input file. Needed for training and - * reading a UNLV zone file, and for searchable PDF output. - */ - void SetInputName(const char* name); - /** - * These functions are required for searchable PDF output. - * We need our hands on the input file so that we can include - * it in the PDF without transcoding. If that is not possible, - * we need the original image. Finally, resolution metadata - * is stored in the PDF so we need that as well. - */ - const char* GetInputName(); - // Takes ownership of the input pix. - void SetInputImage(Pix *pix); - Pix* GetInputImage(); - int GetSourceYResolution(); - const char* GetDatapath(); - - /** Set the name of the bonus output files. Needed only for debugging. */ - void SetOutputName(const char* name); - - /** - * Set the value of an internal "parameter." - * Supply the name of the parameter and the value as a string, just as - * you would in a config file. - * Returns false if the name lookup failed. - * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. - * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. - * SetVariable may be used before Init, but settings will revert to - * defaults on End(). - * - * Note: Must be called after Init(). Only works for non-init variables - * (init variables should be passed to Init()). - */ - bool SetVariable(const char* name, const char* value); - bool SetDebugVariable(const char* name, const char* value); - - /** - * Returns true if the parameter was found among Tesseract parameters. - * Fills in value with the value of the parameter. - */ - bool GetIntVariable(const char *name, int *value) const; - bool GetBoolVariable(const char *name, bool *value) const; - bool GetDoubleVariable(const char *name, double *value) const; - - /** - * Returns the pointer to the string that represents the value of the - * parameter if it was found among Tesseract parameters. - */ - const char *GetStringVariable(const char *name) const; - - /** - * Print Tesseract parameters to the given file. - */ - void PrintVariables(FILE *fp) const; - - /** - * Get value of named variable as a string, if it exists. - */ - bool GetVariableAsString(const char *name, STRING *val); - - /** - * Instances are now mostly thread-safe and totally independent, - * but some global parameters remain. Basically it is safe to use multiple - * TessBaseAPIs in different threads in parallel, UNLESS: - * you use SetVariable on some of the Params in classify and textord. - * If you do, then the effect will be to change it for all your instances. - * - * Start tesseract. Returns zero on success and -1 on failure. - * NOTE that the only members that may be called before Init are those - * listed above here in the class definition. - * - * The datapath must be the name of the parent directory of tessdata and - * must end in / . Any name after the last / will be stripped. - * The language is (usually) an ISO 639-3 string or nullptr will default to eng. - * It is entirely safe (and eventually will be efficient too) to call - * Init multiple times on the same instance to change language, or just - * to reset the classifier. - * The language may be a string of the form [~][+[~]]* indicating - * that multiple languages are to be loaded. Eg hin+eng will load Hindi and - * English. Languages may specify internally that they want to be loaded - * with one or more other languages, so the ~ sign is available to override - * that. Eg if hin were set to load eng by default, then hin+~eng would force - * loading only hin. The number of loaded languages is limited only by - * memory, with the caveat that loading additional languages will impact - * both speed and accuracy, as there is more work to do to decide on the - * applicable language, and there is more chance of hallucinating incorrect - * words. - * WARNING: On changing languages, all Tesseract parameters are reset - * back to their default values. (Which may vary between languages.) - * If you have a rare need to set a Variable that controls - * initialization for a second call to Init you should explicitly - * call End() and then use SetVariable before Init. This is only a very - * rare use case, since there are very few uses that require any parameters - * to be set before Init. - * - * If set_only_non_debug_params is true, only params that do not contain - * "debug" in the name will be set. - */ - int Init(const char* datapath, const char* language, OcrEngineMode mode, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params); - int Init(const char* datapath, const char* language, OcrEngineMode oem) { - return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); - } - int Init(const char* datapath, const char* language) { - return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); - } - // In-memory version reads the traineddata file directly from the given - // data[data_size] array, and/or reads data via a FileReader. - int Init(const char* data, int data_size, const char* language, - OcrEngineMode mode, char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_non_debug_params, FileReader reader); - - /** - * Returns the languages string used in the last valid initialization. - * If the last initialization specified "deu+hin" then that will be - * returned. If hin loaded eng automatically as well, then that will - * not be included in this list. To find the languages actually - * loaded use GetLoadedLanguagesAsVector. - * The returned string should NOT be deleted. - */ - const char* GetInitLanguagesAsString() const; - - /** - * Returns the loaded languages in the vector of STRINGs. - * Includes all languages loaded by the last Init, including those loaded - * as dependencies of other loaded languages. - */ - void GetLoadedLanguagesAsVector(GenericVector* langs) const; - - /** - * Returns the available languages in the sorted vector of STRINGs. - */ - void GetAvailableLanguagesAsVector(GenericVector* langs) const; - - /** - * Init only the lang model component of Tesseract. The only functions - * that work after this init are SetVariable and IsValidWord. - * WARNING: temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ - int InitLangMod(const char* datapath, const char* language); - - /** - * Init only for page layout analysis. Use only for calls to SetImage and - * AnalysePage. Calls that attempt recognition will generate an error. - */ - void InitForAnalysePage(); - - /** - * Read a "config" file containing a set of param, value pairs. - * Searches the standard places: tessdata/configs, tessdata/tessconfigs - * and also accepts a relative or absolute path name. - * Note: only non-init params will be set (init params are set by Init()). - */ - void ReadConfigFile(const char* filename); - /** Same as above, but only set debug params from the given config file. */ - void ReadDebugConfigFile(const char* filename); - - /** - * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. - * The mode is stored as an IntParam so it can also be modified by - * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). - */ - void SetPageSegMode(PageSegMode mode); - - /** Return the current page segmentation mode. */ - PageSegMode GetPageSegMode() const; - - /** - * Recognize a rectangle from an image and return the result as a string. - * May be called many times for a single Init. - * Currently has no error checking. - * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. - * Palette color images will not work properly and must be converted to - * 24 bit. - * Binary images of 1 bit per pixel may also be given but they must be - * byte packed with the MSB of the first byte being the first pixel, and a - * 1 represents WHITE. For binary images set bytes_per_pixel=0. - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * - * Note that TesseractRect is the simplified convenience interface. - * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, - * and one or more of the Get*Text functions below. - */ - char* TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); - - /** - * Call between pages or documents etc to free up memory and forget - * adaptive data. - */ - void ClearAdaptiveClassifier(); - - /** - * @defgroup AdvancedAPI Advanced API - * The following methods break TesseractRect into pieces, so you can - * get hold of the thresholded image, get the text in different formats, - * get bounding boxes, confidences etc. - */ - /* @{ */ - - /** - * Provide an image for Tesseract to recognize. Format is as - * TesseractRect above. Copies the image buffer and converts to Pix. - * SetImage clears all recognition results, and sets the rectangle to the - * full image, so it may be followed immediately by a GetUTF8Text, and it - * will automatically perform recognition. - */ - void SetImage(const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line); - - /** - * Provide an image for Tesseract to recognize. As with SetImage above, - * Tesseract takes its own copy of the image, so it need not persist until - * after Recognize. - * Pix vs raw, which to use? - * Use Pix where possible. Tesseract uses Pix as its internal representation - * and it is therefore more efficient to provide a Pix directly. - */ - void SetImage(Pix* pix); - - /** - * Set the resolution of the source image in pixels per inch so font size - * information can be calculated in results. Call this after SetImage(). - */ - void SetSourceResolution(int ppi); - - /** - * Restrict recognition to a sub-rectangle of the image. Call after SetImage. - * Each SetRectangle clears the recogntion results so multiple rectangles - * can be recognized with the same image. - */ - void SetRectangle(int left, int top, int width, int height); - - /** - * In extreme cases only, usually with a subclass of Thresholder, it - * is possible to provide a different Thresholder. The Thresholder may - * be preloaded with an image, settings etc, or they may be set after. - * Note that Tesseract takes ownership of the Thresholder and will - * delete it when it it is replaced or the API is destructed. - */ - void SetThresholder(ImageThresholder* thresholder) { - delete thresholder_; - thresholder_ = thresholder; - ClearResults(); - } - - /** - * Get a copy of the internal thresholded image from Tesseract. - * Caller takes ownership of the Pix and must pixDestroy it. - * May be called any time after SetImage, or after TesseractRect. - */ - Pix* GetThresholdedImage(); - - /** - * Get the result of page layout analysis as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ - Boxa* GetRegions(Pixa** pixa); - - /** - * Get the textlines as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If raw_image is true, then extract from the original image instead of the - * thresholded image and pad by raw_padding pixels. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - * If paraids is not nullptr, the paragraph-id of each line within its block is - * also returned as an array of one element per line. delete [] after use. - */ - Boxa* GetTextlines(const bool raw_image, const int raw_padding, - Pixa** pixa, int** blockids, int** paraids); - /* - Helper method to extract from the thresholded image. (most common usage) - */ - Boxa* GetTextlines(Pixa** pixa, int** blockids) { - return GetTextlines(false, 0, pixa, blockids, nullptr); - } - - /** - * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa - * pair, in reading order. Enables downstream handling of non-rectangular - * regions. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - */ - Boxa* GetStrips(Pixa** pixa, int** blockids); - - /** - * Get the words as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ - Boxa* GetWords(Pixa** pixa); - - /** - * Gets the individual connected (text) components (created - * after pages segmentation step, but before recognition) - * as a leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * Note: the caller is responsible for calling boxaDestroy() - * on the returned Boxa array and pixaDestroy() on cc array. - */ - Boxa* GetConnectedComponents(Pixa** cc); - - /** - * Get the given level kind of components (block, textline, word etc.) as a - * leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each component is also returned - * as an array of one element per component. delete [] after use. - * If blockids is not nullptr, the paragraph-id of each component with its block - * is also returned as an array of one element per component. delete [] after - * use. - * If raw_image is true, then portions of the original image are extracted - * instead of the thresholded image and padded with raw_padding. - * If text_only is true, then only text components are returned. - */ - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, const bool raw_image, - const int raw_padding, + class TESS_API TessBaseAPI { + public: + TessBaseAPI(); + virtual ~TessBaseAPI(); + + /** + * Returns the version identifier as a static string. Do not delete. + */ + static const char* Version(); + + /** + * If compiled with OpenCL AND an available OpenCL + * device is deemed faster than serial code, then + * "device" is populated with the cl_device_id + * and returns sizeof(cl_device_id) + * otherwise *device=nullptr and returns 0. + */ + static size_t getOpenCLDevice(void **device); + + /** + * Writes the thresholded image to stderr as a PBM file on receipt of a + * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). + */ + static void CatchSignals(); + + /** + * Set the name of the input file. Needed for training and + * reading a UNLV zone file, and for searchable PDF output. + */ + void SetInputName(const char* name); + /** + * These functions are required for searchable PDF output. + * We need our hands on the input file so that we can include + * it in the PDF without transcoding. If that is not possible, + * we need the original image. Finally, resolution metadata + * is stored in the PDF so we need that as well. + */ + const char* GetInputName(); + // Takes ownership of the input pix. + void SetInputImage(Pix *pix); + Pix* GetInputImage(); + int GetSourceYResolution(); + const char* GetDatapath(); + + /** Set the name of the bonus output files. Needed only for debugging. */ + void SetOutputName(const char* name); + + /** + * Set the value of an internal "parameter." + * Supply the name of the parameter and the value as a string, just as + * you would in a config file. + * Returns false if the name lookup failed. + * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. + * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. + * SetVariable may be used before Init, but settings will revert to + * defaults on End(). + * + * Note: Must be called after Init(). Only works for non-init variables + * (init variables should be passed to Init()). + */ + bool SetVariable(const char* name, const char* value); + bool SetDebugVariable(const char* name, const char* value); + + /** + * Returns true if the parameter was found among Tesseract parameters. + * Fills in value with the value of the parameter. + */ + bool GetIntVariable(const char *name, int *value) const; + bool GetBoolVariable(const char *name, bool *value) const; + bool GetDoubleVariable(const char *name, double *value) const; + + /** + * Returns the pointer to the string that represents the value of the + * parameter if it was found among Tesseract parameters. + */ + const char *GetStringVariable(const char *name) const; + + /** + * Print Tesseract parameters to the given file. + */ + void PrintVariables(FILE *fp) const; + + /** + * Get value of named variable as a string, if it exists. + */ + bool GetVariableAsString(const char *name, STRING *val); + + /** + * Instances are now mostly thread-safe and totally independent, + * but some global parameters remain. Basically it is safe to use multiple + * TessBaseAPIs in different threads in parallel, UNLESS: + * you use SetVariable on some of the Params in classify and textord. + * If you do, then the effect will be to change it for all your instances. + * + * Start tesseract. Returns zero on success and -1 on failure. + * NOTE that the only members that may be called before Init are those + * listed above here in the class definition. + * + * The datapath must be the name of the parent directory of tessdata and + * must end in / . Any name after the last / will be stripped. + * The language is (usually) an ISO 639-3 string or nullptr will default to eng. + * It is entirely safe (and eventually will be efficient too) to call + * Init multiple times on the same instance to change language, or just + * to reset the classifier. + * The language may be a string of the form [~][+[~]]* indicating + * that multiple languages are to be loaded. Eg hin+eng will load Hindi and + * English. Languages may specify internally that they want to be loaded + * with one or more other languages, so the ~ sign is available to override + * that. Eg if hin were set to load eng by default, then hin+~eng would force + * loading only hin. The number of loaded languages is limited only by + * memory, with the caveat that loading additional languages will impact + * both speed and accuracy, as there is more work to do to decide on the + * applicable language, and there is more chance of hallucinating incorrect + * words. + * WARNING: On changing languages, all Tesseract parameters are reset + * back to their default values. (Which may vary between languages.) + * If you have a rare need to set a Variable that controls + * initialization for a second call to Init you should explicitly + * call End() and then use SetVariable before Init. This is only a very + * rare use case, since there are very few uses that require any parameters + * to be set before Init. + * + * If set_only_non_debug_params is true, only params that do not contain + * "debug" in the name will be set. + */ + int Init(const char* datapath, const char* language, OcrEngineMode mode, + char **configs, int configs_size, + const GenericVector *vars_vec, + const GenericVector *vars_values, + bool set_only_non_debug_params); + int Init(const char* datapath, const char* language, OcrEngineMode oem) { + return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); + } + int Init(const char* datapath, const char* language) { + return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); + } + // In-memory version reads the traineddata file directly from the given + // data[data_size] array, and/or reads data via a FileReader. + int Init(const char* data, int data_size, const char* language, + OcrEngineMode mode, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, + bool set_only_non_debug_params, FileReader reader); + + /** + * Returns the languages string used in the last valid initialization. + * If the last initialization specified "deu+hin" then that will be + * returned. If hin loaded eng automatically as well, then that will + * not be included in this list. To find the languages actually + * loaded use GetLoadedLanguagesAsVector. + * The returned string should NOT be deleted. + */ + const char* GetInitLanguagesAsString() const; + + /** + * Returns the loaded languages in the vector of STRINGs. + * Includes all languages loaded by the last Init, including those loaded + * as dependencies of other loaded languages. + */ + void GetLoadedLanguagesAsVector(GenericVector* langs) const; + + /** + * Returns the available languages in the sorted vector of STRINGs. + */ + void GetAvailableLanguagesAsVector(GenericVector* langs) const; + + /** + * Init only the lang model component of Tesseract. The only functions + * that work after this init are SetVariable and IsValidWord. + * WARNING: temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ + int InitLangMod(const char* datapath, const char* language); + + /** + * Init only for page layout analysis. Use only for calls to SetImage and + * AnalysePage. Calls that attempt recognition will generate an error. + */ + void InitForAnalysePage(); + + /** + * Read a "config" file containing a set of param, value pairs. + * Searches the standard places: tessdata/configs, tessdata/tessconfigs + * and also accepts a relative or absolute path name. + * Note: only non-init params will be set (init params are set by Init()). + */ + void ReadConfigFile(const char* filename); + /** Same as above, but only set debug params from the given config file. */ + void ReadDebugConfigFile(const char* filename); + + /** + * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. + * The mode is stored as an IntParam so it can also be modified by + * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). + */ + void SetPageSegMode(PageSegMode mode); + + /** Return the current page segmentation mode. */ + PageSegMode GetPageSegMode() const; + + /** + * Recognize a rectangle from an image and return the result as a string. + * May be called many times for a single Init. + * Currently has no error checking. + * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + * Palette color images will not work properly and must be converted to + * 24 bit. + * Binary images of 1 bit per pixel may also be given but they must be + * byte packed with the MSB of the first byte being the first pixel, and a + * 1 represents WHITE. For binary images set bytes_per_pixel=0. + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * + * Note that TesseractRect is the simplified convenience interface. + * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, + * and one or more of the Get*Text functions below. + */ + char* TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height); + + /** + * Call between pages or documents etc to free up memory and forget + * adaptive data. + */ + void ClearAdaptiveClassifier(); + + /** + * @defgroup AdvancedAPI Advanced API + * The following methods break TesseractRect into pieces, so you can + * get hold of the thresholded image, get the text in different formats, + * get bounding boxes, confidences etc. + */ + /* @{ */ + + /** + * Provide an image for Tesseract to recognize. Format is as + * TesseractRect above. Copies the image buffer and converts to Pix. + * SetImage clears all recognition results, and sets the rectangle to the + * full image, so it may be followed immediately by a GetUTF8Text, and it + * will automatically perform recognition. + */ + void SetImage(const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line); + + /** + * Provide an image for Tesseract to recognize. As with SetImage above, + * Tesseract takes its own copy of the image, so it need not persist until + * after Recognize. + * Pix vs raw, which to use? + * Use Pix where possible. Tesseract uses Pix as its internal representation + * and it is therefore more efficient to provide a Pix directly. + */ + void SetImage(Pix* pix); + + /** + * Set the resolution of the source image in pixels per inch so font size + * information can be calculated in results. Call this after SetImage(). + */ + void SetSourceResolution(int ppi); + + /** + * Restrict recognition to a sub-rectangle of the image. Call after SetImage. + * Each SetRectangle clears the recogntion results so multiple rectangles + * can be recognized with the same image. + */ + void SetRectangle(int left, int top, int width, int height); + + /** + * In extreme cases only, usually with a subclass of Thresholder, it + * is possible to provide a different Thresholder. The Thresholder may + * be preloaded with an image, settings etc, or they may be set after. + * Note that Tesseract takes ownership of the Thresholder and will + * delete it when it it is replaced or the API is destructed. + */ + void SetThresholder(ImageThresholder* thresholder) { + delete thresholder_; + thresholder_ = thresholder; + ClearResults(); + } + + /** + * Get a copy of the internal thresholded image from Tesseract. + * Caller takes ownership of the Pix and must pixDestroy it. + * May be called any time after SetImage, or after TesseractRect. + */ + Pix* GetThresholdedImage(); + + /** + * Get the result of page layout analysis as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ + Boxa* GetRegions(Pixa** pixa); + + /** + * Get the textlines as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If raw_image is true, then extract from the original image instead of the + * thresholded image and pad by raw_padding pixels. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + * If paraids is not nullptr, the paragraph-id of each line within its block is + * also returned as an array of one element per line. delete [] after use. + */ + Boxa* GetTextlines(const bool raw_image, const int raw_padding, Pixa** pixa, int** blockids, int** paraids); - // Helper function to get binary images with no padding (most common usage). - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, - Pixa** pixa, int** blockids) { - return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); - } - - /** - * Returns the scale factor of the thresholded image that would be returned by - * GetThresholdedImage() and the various GetX() methods that call - * GetComponentImages(). - * Returns 0 if no thresholder has been set. - */ - int GetThresholdedImageScaleFactor() const; - - /** - * Runs page layout analysis in the mode set by SetPageSegMode. - * May optionally be called prior to Recognize to get access to just - * the page layout results. Returns an iterator to the results. - * If merge_similar_words is true, words are combined where suitable for use - * with a line recognizer. Use if you want to use AnalyseLayout to find the - * textlines, and then want to process textline fragments with an external - * line recognizer. - * Returns nullptr on error or an empty page. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - PageIterator* AnalyseLayout(); - PageIterator* AnalyseLayout(bool merge_similar_words); - - /** - * Recognize the image from SetAndThresholdImage, generating Tesseract - * internal structures. Returns 0 on success. - * Optional. The Get*Text functions below will call Recognize if needed. - * After Recognize, the output is kept internally until the next SetImage. - */ - int Recognize(ETEXT_DESC* monitor); - - /** - * Methods to retrieve information after SetAndThresholdImage(), - * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) - */ - - #ifndef DISABLED_LEGACY_ENGINE - /** Variant on Recognize used for testing chopper. */ - int RecognizeForChopTest(ETEXT_DESC* monitor); - #endif - - /** - * Turns images into symbolic text. - * - * filename can point to a single image, a multi-page TIFF, - * or a plain text list of image filenames. - * - * retry_config is useful for debugging. If not nullptr, you can fall - * back to an alternate configuration if a page fails for some - * reason. - * - * timeout_millisec terminates processing if any single page - * takes too long. Set to 0 for unlimited time. - * - * renderer is responible for creating the output. For example, - * use the TessTextRenderer if you want plaintext output, or - * the TessPDFRender to produce searchable PDF. - * - * If tessedit_page_number is non-negative, will only process that - * single page. Works for multi-page tiff file, or filelist. - * - * Returns true if successful, false on error. - */ - bool ProcessPages(const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); - // Does the real work of ProcessPages. - bool ProcessPagesInternal(const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); - - /** - * Turn a single image into symbolic text. - * - * The pix is the image processed. filename and page_index are - * metadata used by side-effect processes, such as reading a box - * file or formatting as hOCR. - * - * See ProcessPages for desciptions of other parameters. - */ - bool ProcessPage(Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer); - - /** - * Get a reading-order iterator to the results of LayoutAnalysis and/or - * Recognize. The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - ResultIterator* GetIterator(); - - /** - * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - MutableIterator* GetMutableIterator(); - - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - */ - char* GetUTF8Text(); - - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * monitor can be used to - * cancel the recognition - * receive progress callbacks - * Returned string must be freed with the delete [] operator. - */ - char* GetHOCRText(ETEXT_DESC* monitor, int page_number); - - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Returned string must be freed with the delete [] operator. - */ - char* GetHOCRText(int page_number); - - /** - * Make a TSV-formatted string from the internal data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Returned string must be freed with the delete [] operator. - */ - char* GetTSVText(int page_number); - - /** - * The recognized text is returned as a char* which is coded in the same - * format as a box file used in training. - * Constructs coordinates in the original image - not just the rectangle. - * page_number is a 0-based page index that will appear in the box file. - * Returned string must be freed with the delete [] operator. - */ - char* GetBoxText(int page_number); - - /** - * The recognized text is returned as a char* which is coded - * as UNLV format Latin-1 with specific reject and suspect codes. - * Returned string must be freed with the delete [] operator. - */ - char* GetUNLVText(); - - /** - * Detect the orientation of the input image and apparent script (alphabet). - * orient_deg is the detected clockwise rotation of the input image in degrees - * (0, 90, 180, 270) - * orient_conf is the confidence (15.0 is reasonably confident) - * script_name is an ASCII string, the name of the script, e.g. "Latin" - * script_conf is confidence level in the script - * Returns true on success and writes values to each parameter as an output - */ - bool DetectOrientationScript(int* orient_deg, float* orient_conf, - const char** script_name, float* script_conf); - - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ - char* GetOsdText(int page_number); - - /** Returns the (average) confidence value between 0 and 100. */ - int MeanTextConf(); - /** - * Returns all word confidences (between 0 and 100) in an array, terminated - * by -1. The calling function must delete [] after use. - * The number of confidences should correspond to the number of space- - * delimited words in GetUTF8Text. - */ - int* AllWordConfidences(); + /* + Helper method to extract from the thresholded image. (most common usage) + */ + Boxa* GetTextlines(Pixa** pixa, int** blockids) { + return GetTextlines(false, 0, pixa, blockids, nullptr); + } + + /** + * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa + * pair, in reading order. Enables downstream handling of non-rectangular + * regions. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + */ + Boxa* GetStrips(Pixa** pixa, int** blockids); + + /** + * Get the words as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ + Boxa* GetWords(Pixa** pixa); + + /** + * Gets the individual connected (text) components (created + * after pages segmentation step, but before recognition) + * as a leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * Note: the caller is responsible for calling boxaDestroy() + * on the returned Boxa array and pixaDestroy() on cc array. + */ + Boxa* GetConnectedComponents(Pixa** cc); + + /** + * Get the given level kind of components (block, textline, word etc.) as a + * leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each component is also returned + * as an array of one element per component. delete [] after use. + * If blockids is not nullptr, the paragraph-id of each component with its block + * is also returned as an array of one element per component. delete [] after + * use. + * If raw_image is true, then portions of the original image are extracted + * instead of the thresholded image and padded with raw_padding. + * If text_only is true, then only text components are returned. + */ + Boxa* GetComponentImages(const PageIteratorLevel level, + const bool text_only, const bool raw_image, + const int raw_padding, + Pixa** pixa, int** blockids, int** paraids); + // Helper function to get binary images with no padding (most common usage). + Boxa* GetComponentImages(const PageIteratorLevel level, + const bool text_only, + Pixa** pixa, int** blockids) { + return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); + } + + /** + * Returns the scale factor of the thresholded image that would be returned by + * GetThresholdedImage() and the various GetX() methods that call + * GetComponentImages(). + * Returns 0 if no thresholder has been set. + */ + int GetThresholdedImageScaleFactor() const; + + /** + * Runs page layout analysis in the mode set by SetPageSegMode. + * May optionally be called prior to Recognize to get access to just + * the page layout results. Returns an iterator to the results. + * If merge_similar_words is true, words are combined where suitable for use + * with a line recognizer. Use if you want to use AnalyseLayout to find the + * textlines, and then want to process textline fragments with an external + * line recognizer. + * Returns nullptr on error or an empty page. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + PageIterator* AnalyseLayout(); + PageIterator* AnalyseLayout(bool merge_similar_words); + + /** + * Recognize the image from SetAndThresholdImage, generating Tesseract + * internal structures. Returns 0 on success. + * Optional. The Get*Text functions below will call Recognize if needed. + * After Recognize, the output is kept internally until the next SetImage. + */ + int Recognize(ETEXT_DESC* monitor); + + /** + * Methods to retrieve information after SetAndThresholdImage(), + * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) + */ #ifndef DISABLED_LEGACY_ENGINE - /** - * Applies the given word to the adaptive classifier if possible. - * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can - * tell the boundaries of the graphemes. - * Assumes that SetImage/SetRectangle have been used to set the image - * to the given word. The mode arg should be PSM_SINGLE_WORD or - * PSM_CIRCLE_WORD, as that will be used to control layout analysis. - * The currently set PageSegMode is preserved. - * Returns false if adaption was not possible for some reason. - */ - bool AdaptToWordStr(PageSegMode mode, const char* wordstr); + /** Variant on Recognize used for testing chopper. */ + int RecognizeForChopTest(ETEXT_DESC* monitor); +#endif + + /** + * Turns images into symbolic text. + * + * filename can point to a single image, a multi-page TIFF, + * or a plain text list of image filenames. + * + * retry_config is useful for debugging. If not nullptr, you can fall + * back to an alternate configuration if a page fails for some + * reason. + * + * timeout_millisec terminates processing if any single page + * takes too long. Set to 0 for unlimited time. + * + * renderer is responible for creating the output. For example, + * use the TessTextRenderer if you want plaintext output, or + * the TessPDFRender to produce searchable PDF. + * + * If tessedit_page_number is non-negative, will only process that + * single page. Works for multi-page tiff file, or filelist. + * + * Returns true if successful, false on error. + */ + bool ProcessPages(const char* filename, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer); + // Does the real work of ProcessPages. + bool ProcessPagesInternal(const char* filename, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer); + + /** + * Turn a single image into symbolic text. + * + * The pix is the image processed. filename and page_index are + * metadata used by side-effect processes, such as reading a box + * file or formatting as hOCR. + * + * See ProcessPages for desciptions of other parameters. + */ + bool ProcessPage(Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer); + + /** + * Get a reading-order iterator to the results of LayoutAnalysis and/or + * Recognize. The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + ResultIterator* GetIterator(); + + /** + * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + MutableIterator* GetMutableIterator(); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + */ + char* GetUTF8Text(); + + /** + * Make a HTML-formatted string with hOCR markup from the internal + * data structures. + * page_number is 0-based but will appear in the output as 1-based. + * monitor can be used to + * cancel the recognition + * receive progress callbacks + * Returned string must be freed with the delete [] operator. + */ + char* GetHOCRText(ETEXT_DESC* monitor, int page_number); + + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(ETEXT_DESC* monitor, int page_number); + + /** + * Make a HTML-formatted string with hOCR markup from the internal + * data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ + char* GetHOCRText(int page_number); + + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(int page_number); + + /** + * Make a TSV-formatted string from the internal data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ + char* GetTSVText(int page_number); + + /** + * The recognized text is returned as a char* which is coded in the same + * format as a box file used in training. + * Constructs coordinates in the original image - not just the rectangle. + * page_number is a 0-based page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. + */ + char* GetBoxText(int page_number); + + /** + * The recognized text is returned as a char* which is coded + * as UNLV format Latin-1 with specific reject and suspect codes. + * Returned string must be freed with the delete [] operator. + */ + char* GetUNLVText(); + + /** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image in degrees + * (0, 90, 180, 270) + * orient_conf is the confidence (15.0 is reasonably confident) + * script_name is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + * Returns true on success and writes values to each parameter as an output + */ + bool DetectOrientationScript(int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ + char* GetOsdText(int page_number); + + /** Returns the (average) confidence value between 0 and 100. */ + int MeanTextConf(); + /** + * Returns all word confidences (between 0 and 100) in an array, terminated + * by -1. The calling function must delete [] after use. + * The number of confidences should correspond to the number of space- + * delimited words in GetUTF8Text. + */ + int* AllWordConfidences(); + +#ifndef DISABLED_LEGACY_ENGINE + /** + * Applies the given word to the adaptive classifier if possible. + * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can + * tell the boundaries of the graphemes. + * Assumes that SetImage/SetRectangle have been used to set the image + * to the given word. The mode arg should be PSM_SINGLE_WORD or + * PSM_CIRCLE_WORD, as that will be used to control layout analysis. + * The currently set PageSegMode is preserved. + * Returns false if adaption was not possible for some reason. + */ + bool AdaptToWordStr(PageSegMode mode, const char* wordstr); #endif // ndef DISABLED_LEGACY_ENGINE - /** - * Free up recognition results and any stored image data, without actually - * freeing any recognition data that would be time-consuming to reload. - * Afterwards, you must call SetImage or TesseractRect before doing - * any Recognize or Get* operation. - */ - void Clear(); - - /** - * Close down tesseract and free up all memory. End() is equivalent to - * destructing and reconstructing your TessBaseAPI. - * Once End() has been used, none of the other API functions may be used - * other than Init and anything declared above it in the class definition. - */ - void End(); - - /** - * Clear any library-level memory caches. - * There are a variety of expensive-to-load constant data structures (mostly - * language dictionaries) that are cached globally -- surviving the Init() - * and End() of individual TessBaseAPI's. This function allows the clearing - * of these caches. - **/ - static void ClearPersistentCache(); - - /** - * Check whether a word is valid according to Tesseract's language model - * @return 0 if the word is invalid, non-zero if valid. - * @warning temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ - int IsValidWord(const char *word); - // Returns true if utf8_character is defined in the UniCharset. - bool IsValidCharacter(const char *utf8_character); - - - bool GetTextDirection(int* out_offset, float* out_slope); - - /** Sets Dict::letter_is_okay_ function to point to the given function. */ - void SetDictFunc(DictFunc f); - - /** Sets Dict::probability_in_context_ function to point to the given - * function. - */ - void SetProbabilityInContextFunc(ProbabilityInContextFunc f); - - /** - * Estimates the Orientation And Script of the image. - * @return true if the image was processed successfully. - */ - bool DetectOS(OSResults*); - - /** - * Return text orientation of each block as determined by an earlier run - * of layout analysis. - */ - void GetBlockTextOrientations(int** block_orientation, - bool** vertical_writing); - - - #ifndef DISABLED_LEGACY_ENGINE - - /** Sets Wordrec::fill_lattice_ function to point to the given function. */ - void SetFillLatticeFunc(FillLatticeFunc f); - - /** Find lines from the image making the BLOCK_LIST. */ - BLOCK_LIST* FindLinesCreateBlockList(); - - /** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ - static void DeleteBlockList(BLOCK_LIST* block_list); - - /** Returns a ROW object created from the input row specification. */ - static ROW *MakeTessOCRRow(float baseline, float xheight, - float descender, float ascender); - - /** Returns a TBLOB corresponding to the entire input image. */ - static TBLOB *MakeTBLOB(Pix *pix); - - /** - * This method baseline normalizes a TBLOB in-place. The input row is used - * for normalization. The denorm is an optional parameter in which the - * normalization-antidote is returned. - */ - static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); - - /** This method returns the features associated with the input image. */ - void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* feature_outline_index); - - /** - * This method returns the row to which a box of specified dimensions would - * belong. If no good match is found, it returns nullptr. - */ - static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); - - /** - * Method to run adaptive classifier on a blob. - * It returns at max num_max_matches results. - */ - void RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned); + /** + * Free up recognition results and any stored image data, without actually + * freeing any recognition data that would be time-consuming to reload. + * Afterwards, you must call SetImage or TesseractRect before doing + * any Recognize or Get* operation. + */ + void Clear(); + + /** + * Close down tesseract and free up all memory. End() is equivalent to + * destructing and reconstructing your TessBaseAPI. + * Once End() has been used, none of the other API functions may be used + * other than Init and anything declared above it in the class definition. + */ + void End(); + + /** + * Clear any library-level memory caches. + * There are a variety of expensive-to-load constant data structures (mostly + * language dictionaries) that are cached globally -- surviving the Init() + * and End() of individual TessBaseAPI's. This function allows the clearing + * of these caches. + **/ + static void ClearPersistentCache(); + + /** + * Check whether a word is valid according to Tesseract's language model + * @return 0 if the word is invalid, non-zero if valid. + * @warning temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ + int IsValidWord(const char *word); + // Returns true if utf8_character is defined in the UniCharset. + bool IsValidCharacter(const char *utf8_character); + + + bool GetTextDirection(int* out_offset, float* out_slope); + + /** Sets Dict::letter_is_okay_ function to point to the given function. */ + void SetDictFunc(DictFunc f); + + /** Sets Dict::probability_in_context_ function to point to the given + * function. + */ + void SetProbabilityInContextFunc(ProbabilityInContextFunc f); + + /** + * Estimates the Orientation And Script of the image. + * @return true if the image was processed successfully. + */ + bool DetectOS(OSResults*); + + /** + * Return text orientation of each block as determined by an earlier run + * of layout analysis. + */ + void GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing); + + +#ifndef DISABLED_LEGACY_ENGINE + + /** Sets Wordrec::fill_lattice_ function to point to the given function. */ + void SetFillLatticeFunc(FillLatticeFunc f); + + /** Find lines from the image making the BLOCK_LIST. */ + BLOCK_LIST* FindLinesCreateBlockList(); + + /** + * Delete a block list. + * This is to keep BLOCK_LIST pointer opaque + * and let go of including the other headers. + */ + static void DeleteBlockList(BLOCK_LIST* block_list); + + /** Returns a ROW object created from the input row specification. */ + static ROW *MakeTessOCRRow(float baseline, float xheight, + float descender, float ascender); + + /** Returns a TBLOB corresponding to the entire input image. */ + static TBLOB *MakeTBLOB(Pix *pix); + + /** + * This method baseline normalizes a TBLOB in-place. The input row is used + * for normalization. The denorm is an optional parameter in which the + * normalization-antidote is returned. + */ + static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); + + /** This method returns the features associated with the input image. */ + void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* feature_outline_index); + + /** + * This method returns the row to which a box of specified dimensions would + * belong. If no good match is found, it returns nullptr. + */ + static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom); + + /** + * Method to run adaptive classifier on a blob. + * It returns at max num_max_matches results. + */ + void RunAdaptiveClassifier(TBLOB* blob, + int num_max_matches, + int* unichar_ids, + float* ratings, + int* num_matches_returned); #endif // ndef DISABLED_LEGACY_ENGINE - /** This method returns the string form of the specified unichar. */ - const char* GetUnichar(int unichar_id); - - /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ - const Dawg *GetDawg(int i) const; - - /** Return the number of dawgs loaded into tesseract_ object. */ - int NumDawgs() const; - - Tesseract* tesseract() const { return tesseract_; } - - OcrEngineMode oem() const { return last_oem_requested_; } - - void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } - - void set_min_orientation_margin(double margin); - /* @} */ - - protected: - - /** Common code for setting the image. Returns true if Init has been called. */ - TESS_LOCAL bool InternalSetImage(); - - /** - * Run the thresholder to make the thresholded image. If pix is not nullptr, - * the source is thresholded to pix instead of the internal IMAGE. - */ - TESS_LOCAL virtual bool Threshold(Pix** pix); - - /** - * Find lines from the image making the BLOCK_LIST. - * @return 0 on success. - */ - TESS_LOCAL int FindLines(); - - /** Delete the pageres and block list ready for a new page. */ - void ClearResults(); - - /** - * Return an LTR Result Iterator -- used only for training, as we really want - * to ignore all BiDi smarts at that point. - * delete once you're done with it. - */ - TESS_LOCAL LTRResultIterator* GetLTRIterator(); - - /** - * Return the length of the output text string, as UTF8, assuming - * one newline per line and one per block, with a terminator, - * and assuming a single character reject marker for each rejected character. - * Also return the number of recognized blobs in blob_count. - */ - TESS_LOCAL int TextLength(int* blob_count); - - //// paragraphs.cpp //////////////////////////////////////////////////// - TESS_LOCAL void DetectParagraphs(bool after_text_recognition); - - #ifndef DISABLED_LEGACY_ENGINE - - /** @defgroup ocropusAddOns ocropus add-ons */ - /* @{ */ - - /** - * Adapt to recognize the current image as the given character. - * The image must be preloaded and be just an image of a single character. - */ - TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender); - - /** Recognize text doing one pass only, using settings for a given pass. */ - TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); - - TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result); - - /** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ - TESS_LOCAL static int TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res); - - TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } - /* @} */ + /** This method returns the string form of the specified unichar. */ + const char* GetUnichar(int unichar_id); + + /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ + const Dawg *GetDawg(int i) const; + + /** Return the number of dawgs loaded into tesseract_ object. */ + int NumDawgs() const; + + Tesseract* tesseract() const { return tesseract_; } + + OcrEngineMode oem() const { return last_oem_requested_; } + + void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } + + void set_min_orientation_margin(double margin); + /* @} */ + + protected: + + /** Common code for setting the image. Returns true if Init has been called. */ + TESS_LOCAL bool InternalSetImage(); + + /** + * Run the thresholder to make the thresholded image. If pix is not nullptr, + * the source is thresholded to pix instead of the internal IMAGE. + */ + TESS_LOCAL virtual bool Threshold(Pix** pix); + + /** + * Find lines from the image making the BLOCK_LIST. + * @return 0 on success. + */ + TESS_LOCAL int FindLines(); + + /** Delete the pageres and block list ready for a new page. */ + void ClearResults(); + + /** + * Return an LTR Result Iterator -- used only for training, as we really want + * to ignore all BiDi smarts at that point. + * delete once you're done with it. + */ + TESS_LOCAL LTRResultIterator* GetLTRIterator(); + + /** + * Return the length of the output text string, as UTF8, assuming + * one newline per line and one per block, with a terminator, + * and assuming a single character reject marker for each rejected character. + * Also return the number of recognized blobs in blob_count. + */ + TESS_LOCAL int TextLength(int* blob_count); + + //// paragraphs.cpp //////////////////////////////////////////////////// + TESS_LOCAL void DetectParagraphs(bool after_text_recognition); + +#ifndef DISABLED_LEGACY_ENGINE + + /** @defgroup ocropusAddOns ocropus add-ons */ + /* @{ */ + + /** + * Adapt to recognize the current image as the given character. + * The image must be preloaded and be just an image of a single character. + */ + TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, + int length, + float baseline, + float xheight, + float descender, + float ascender); + + /** Recognize text doing one pass only, using settings for a given pass. */ + TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); + + TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, + PAGE_RES* pass1_result); + + /** + * Extract the OCR results, costs (penalty points for uncertainty), + * and the bounding boxes of the characters. + */ + TESS_LOCAL static int TesseractExtractResult(char** text, + int** lengths, + float** costs, + int** x0, + int** y0, + int** x1, + int** y1, + PAGE_RES* page_res); + + TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } + /* @} */ #endif // ndef DISABLED_LEGACY_ENGINE - protected: - Tesseract* tesseract_; ///< The underlying data object. - Tesseract* osd_tesseract_; ///< For orientation & script detection. - EquationDetect* equ_detect_; ///* paragraph_models_; - BLOCK_LIST* block_list_; ///< The page layout. - PAGE_RES* page_res_; ///< The page-level data. - STRING* input_file_; ///< Name used by training code. - STRING* output_file_; ///< Name used by debug code. - STRING* datapath_; ///< Current location of tessdata. - STRING* language_; ///< Last initialized language. - OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. - TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES - - /** - * @defgroup ThresholderParams Thresholder Parameters - * Parameters saved from the Thresholder. Needed to rebuild coordinates. - */ - /* @{ */ - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; - int image_width_; - int image_height_; - /* @} */ - - private: - // A list of image filenames gets special consideration - bool ProcessPagesFileList(FILE *fp, - STRING *buf, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number); - // TIFF supports multipage so gets special consideration. - bool ProcessPagesMultipageTiff(const unsigned char *data, - size_t size, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number); - // There's currently no way to pass a document title from the - // Tesseract command line, and we have multiple places that choose - // to set the title to an empty string. Using a single named - // variable will hopefully reduce confusion if the situation changes - // in the future. - const char *unknown_title_ = ""; -}; // class TessBaseAPI. + protected: + Tesseract* tesseract_; ///< The underlying data object. + Tesseract* osd_tesseract_; ///< For orientation & script detection. + EquationDetect* equ_detect_; ///* paragraph_models_; + BLOCK_LIST* block_list_; ///< The page layout. + PAGE_RES* page_res_; ///< The page-level data. + STRING* input_file_; ///< Name used by training code. + STRING* output_file_; ///< Name used by debug code. + STRING* datapath_; ///< Current location of tessdata. + STRING* language_; ///< Last initialized language. + OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. + bool recognition_done_; ///< page_res_ contains recognition data. + TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES + + /** + * @defgroup ThresholderParams Thresholder Parameters + * Parameters saved from the Thresholder. Needed to rebuild coordinates. + */ + /* @{ */ + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; + int image_width_; + int image_height_; + /* @} */ + + private: + // A list of image filenames gets special consideration + bool ProcessPagesFileList(FILE *fp, + STRING *buf, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number); + // TIFF supports multipage so gets special consideration. + bool ProcessPagesMultipageTiff(const unsigned char *data, + size_t size, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number); + // There's currently no way to pass a document title from the + // Tesseract command line, and we have multiple places that choose + // to set the title to an empty string. Using a single named + // variable will hopefully reduce confusion if the situation changes + // in the future. + const char *unknown_title_ = ""; + }; // class TessBaseAPI. /** Escape a char string - remove &<>"' with HTML codes. */ -STRING HOcrEscape(const char* text); + STRING HOcrEscape(const char* text); } // namespace tesseract. #endif // TESSERACT_API_BASEAPI_H_ diff --git a/src/api/capi.cpp b/src/api/capi.cpp index 2146e8c8f1..c9216b8ede 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -66,6 +66,16 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu return new TessHOcrRenderer(outputbase, font_info); } +TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase) +{ + return new TessHOcrRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info) +{ + return new TessHOcrRenderer(outputbase, font_info); +} + TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, BOOL textonly) { @@ -239,9 +249,9 @@ TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, cons } TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params) + TessOcrEngineMode mode, char** configs, int configs_size, + char** vars_vec, char** vars_values, size_t vars_vec_size, + BOOL set_only_non_debug_params) { GenericVector varNames; GenericVector varValues; @@ -332,8 +342,8 @@ TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* } TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height) + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height) { return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height); } @@ -346,7 +356,7 @@ TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) #endif TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line) + int bytes_per_pixel, int bytes_per_line) { handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); } @@ -387,7 +397,7 @@ TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, str } TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids) + struct Pixa** pixa, int** blockids, int** paraids) { return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids); } @@ -545,7 +555,7 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand #ifndef DISABLED_LEGACY_ENGINE TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, - int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) + int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) { bool success; success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); @@ -553,7 +563,7 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, } TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex) + int* num_features, int* FeatureOutlineIndex) { handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex); } @@ -564,7 +574,7 @@ TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, } TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches, - int* unichar_ids, float* ratings, int* num_matches_returned) + int* unichar_ids, float* ratings, int* num_matches_returned) { handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned); } @@ -656,13 +666,13 @@ TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* } TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element) + TessPageIteratorLevel element) { return handle->IsAtFinalElement(level, element) ? TRUE : FALSE; } TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, - int* left, int* top, int* right, int* bottom) + int* left, int* top, int* right, int* bottom) { return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE; } @@ -684,14 +694,14 @@ TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* } TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) + int* x1, int* y1, int* x2, int* y2) { return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE; } TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation, - TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, - float* deskew_angle) + TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, + float* deskew_angle) { handle->Orientation(orientation, writing_direction, textline_order, deskew_angle); } @@ -754,8 +764,8 @@ TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const T } TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id) + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, + BOOL* is_smallcaps, int* pointsize, int* font_id) { bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps; const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif, diff --git a/src/api/capi.h b/src/api/capi.h index ba4445b547..85908c78b0 100644 --- a/src/api/capi.h +++ b/src/api/capi.h @@ -56,6 +56,7 @@ extern "C" { typedef tesseract::TessResultRenderer TessResultRenderer; typedef tesseract::TessTextRenderer TessTextRenderer; typedef tesseract::TessHOcrRenderer TessHOcrRenderer; +typedef tesseract::TessAltoRenderer TessAltoRenderer; typedef tesseract::TessPDFRenderer TessPDFRenderer; typedef tesseract::TessUnlvRenderer TessUnlvRenderer; typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer; @@ -117,7 +118,7 @@ struct Pixa; /* General free functions */ TESS_API const char* - TESS_CALL TessVersion(); +TESS_CALL TessVersion(); TESS_API void TESS_CALL TessDeleteText(char* text); TESS_API void TESS_CALL TessDeleteTextArray(char** arr); TESS_API void TESS_CALL TessDeleteIntArray(int* arr); @@ -125,7 +126,9 @@ TESS_API void TESS_CALL TessDeleteIntArray(int* arr); /* Renderer API */ TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); +TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info); TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, BOOL textonly); TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); @@ -134,7 +137,7 @@ TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* out TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer); TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next); TESS_API TessResultRenderer* - TESS_CALL TessResultRendererNext(TessResultRenderer* renderer); +TESS_CALL TessResultRendererNext(TessResultRenderer* renderer); TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title); TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api); TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer); @@ -146,7 +149,7 @@ TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer); /* Base API */ TESS_API TessBaseAPI* - TESS_CALL TessBaseAPICreate(); +TESS_CALL TessBaseAPICreate(); TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device); @@ -169,7 +172,7 @@ TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, co TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value); TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value); TESS_API const char* - TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); +TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp); TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename); @@ -191,16 +194,16 @@ TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datap TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language); TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode, - char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params); + char** configs, int configs_size, + char** vars_vec, char** vars_values, size_t vars_vec_size, + BOOL set_only_non_debug_params); TESS_API const char* - TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); TESS_API char** - TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); TESS_API char** - TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language); TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); @@ -210,7 +213,7 @@ TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, con TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode); TESS_API TessPageSegMode - TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, @@ -231,32 +234,32 @@ TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImag #endif TESS_API struct Pix* - TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa); +TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); +TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); +TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); +TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa); +TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); +TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - struct Pixa** pixa, int** blockids); +TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, + struct Pixa** pixa, int** blockids); TESS_API struct Boxa* - TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); +TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, + const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); TESS_API TessPageIterator* - TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); +TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); @@ -267,15 +270,16 @@ TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ET TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); + const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); TESS_API TessResultIterator* - TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); TESS_API TessMutableIterator* - TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); +TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number); TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number); @@ -310,19 +314,19 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, #endif // def TESS_CAPI_INCLUDE_BASEAPI TESS_API const char* - TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); +TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin); #ifdef TESS_CAPI_INCLUDE_BASEAPI TESS_API const TessDawg* - TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); +TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); TESS_API TessOcrEngineMode - TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); +TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb); @@ -335,7 +339,7 @@ TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, i TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); TESS_API TessPageIterator* - TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); +TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); @@ -350,14 +354,14 @@ TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* han int* left, int* top, int* right, int* bottom); TESS_API TessPolyBlockType - TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); +TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); TESS_API struct Pix* - TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); +TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); TESS_API struct Pix* - TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top); +TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + struct Pix* original_image, int* left, int* top); TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, int* x1, int* y1, int* x2, int* y2); @@ -373,23 +377,23 @@ TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); TESS_API TessResultIterator* - TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); +TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); TESS_API TessPageIterator* - TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); +TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); TESS_API const TessPageIterator* - TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); +TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); TESS_API TessChoiceIterator* - TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); +TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level); TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level); TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level); TESS_API const char* - TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); +TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); TESS_API const char* - TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id); +TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, + BOOL* is_smallcaps, int* pointsize, int* font_id); TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); @@ -430,12 +434,12 @@ TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, T TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender); TESS_API TBLOB* - TESS_CALL TessMakeTBLOB(Pix* pix); +TESS_CALL TessMakeTBLOB(Pix* pix); TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode); TESS_API BLOCK_LIST* - TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); +TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp index af31be8e59..277bc47389 100644 --- a/src/api/renderer.cpp +++ b/src/api/renderer.cpp @@ -30,250 +30,298 @@ namespace tesseract { /********************************************************************** * Base Renderer interface implementation **********************************************************************/ -TessResultRenderer::TessResultRenderer(const char *outputbase, - const char* extension) - : file_extension_(extension), - title_(""), imagenum_(-1), - fout_(stdout), - next_(nullptr), - happy_(true) { - if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { - STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); - fout_ = fopen(outfile.string(), "wb"); - if (fout_ == nullptr) { - happy_ = false; + TessResultRenderer::TessResultRenderer(const char *outputbase, + const char* extension) + : file_extension_(extension), + title_(""), imagenum_(-1), + fout_(stdout), + next_(nullptr), + happy_(true) { + if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { + STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); + fout_ = fopen(outfile.string(), "wb"); + if (fout_ == nullptr) { + happy_ = false; + } + } } - } -} - -TessResultRenderer::~TessResultRenderer() { - if (fout_ != nullptr) { - if (fout_ != stdout) - fclose(fout_); - else - clearerr(fout_); - } - delete next_; -} - -void TessResultRenderer::insert(TessResultRenderer* next) { - if (next == nullptr) return; - - TessResultRenderer* remainder = next_; - next_ = next; - if (remainder) { - while (next->next_ != nullptr) { - next = next->next_; + + TessResultRenderer::~TessResultRenderer() { + if (fout_ != nullptr) { + if (fout_ != stdout) + fclose(fout_); + else + clearerr(fout_); + } + delete next_; + } + + void TessResultRenderer::insert(TessResultRenderer* next) { + if (next == nullptr) return; + + TessResultRenderer* remainder = next_; + next_ = next; + if (remainder) { + while (next->next_ != nullptr) { + next = next->next_; + } + next->next_ = remainder; + } + } + + bool TessResultRenderer::BeginDocument(const char* title) { + if (!happy_) return false; + title_ = title; + imagenum_ = -1; + bool ok = BeginDocumentHandler(); + if (next_) { + ok = next_->BeginDocument(title) && ok; + } + return ok; + } + + bool TessResultRenderer::AddImage(TessBaseAPI* api) { + if (!happy_) return false; + ++imagenum_; + bool ok = AddImageHandler(api); + if (next_) { + ok = next_->AddImage(api) && ok; + } + return ok; + } + + bool TessResultRenderer::EndDocument() { + if (!happy_) return false; + bool ok = EndDocumentHandler(); + if (next_) { + ok = next_->EndDocument() && ok; + } + return ok; + } + + void TessResultRenderer::AppendString(const char* s) { + AppendData(s, strlen(s)); + } + + void TessResultRenderer::AppendData(const char* s, int len) { + if (!tesseract::Serialize(fout_, s, len)) happy_ = false; + } + + bool TessResultRenderer::BeginDocumentHandler() { + return happy_; + } + + bool TessResultRenderer::EndDocumentHandler() { + return happy_; } - next->next_ = remainder; - } -} - -bool TessResultRenderer::BeginDocument(const char* title) { - if (!happy_) return false; - title_ = title; - imagenum_ = -1; - bool ok = BeginDocumentHandler(); - if (next_) { - ok = next_->BeginDocument(title) && ok; - } - return ok; -} - -bool TessResultRenderer::AddImage(TessBaseAPI* api) { - if (!happy_) return false; - ++imagenum_; - bool ok = AddImageHandler(api); - if (next_) { - ok = next_->AddImage(api) && ok; - } - return ok; -} - -bool TessResultRenderer::EndDocument() { - if (!happy_) return false; - bool ok = EndDocumentHandler(); - if (next_) { - ok = next_->EndDocument() && ok; - } - return ok; -} - -void TessResultRenderer::AppendString(const char* s) { - AppendData(s, strlen(s)); -} - -void TessResultRenderer::AppendData(const char* s, int len) { - if (!tesseract::Serialize(fout_, s, len)) happy_ = false; -} - -bool TessResultRenderer::BeginDocumentHandler() { - return happy_; -} - -bool TessResultRenderer::EndDocumentHandler() { - return happy_; -} /********************************************************************** * UTF8 Text Renderer interface implementation **********************************************************************/ -TessTextRenderer::TessTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "txt") { -} + TessTextRenderer::TessTextRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "txt") { + } -bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr utf8(api->GetUTF8Text()); - if (utf8 == nullptr) { - return false; - } + bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr utf8(api->GetUTF8Text()); + if (utf8 == nullptr) { + return false; + } - AppendString(utf8.get()); + AppendString(utf8.get()); - const char* pageSeparator = api->GetStringVariable("page_separator"); - if (pageSeparator != nullptr && *pageSeparator != '\0') { - AppendString(pageSeparator); - } + const char* pageSeparator = api->GetStringVariable("page_separator"); + if (pageSeparator != nullptr && *pageSeparator != '\0') { + AppendString(pageSeparator); + } - return true; -} + return true; + } /********************************************************************** * HOcr Text Renderer interface implementation **********************************************************************/ -TessHOcrRenderer::TessHOcrRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "hocr") { - font_info_ = false; -} - -TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) - : TessResultRenderer(outputbase, "hocr") { - font_info_ = font_info; -} - -bool TessHOcrRenderer::BeginDocumentHandler() { - AppendString( - "\n" - "\n" - "\n \n "); - AppendString(title()); - AppendString( - "\n" - "\n" - " \n" - " \n" - "\n\n"); + " \n" + "\n\n"); + + return true; + } - return true; -} + bool TessHOcrRenderer::EndDocumentHandler() { + AppendString(" \n\n"); -bool TessHOcrRenderer::EndDocumentHandler() { - AppendString(" \n\n"); + return true; + } + + bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr hocr(api->GetHOCRText(imagenum())); + if (hocr == nullptr) return false; + + AppendString(hocr.get()); + + return true; + } + +/********************************************************************** + * Alto Text Renderer interface implementation + **********************************************************************/ + TessAltoRenderer::TessAltoRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "alto") { + } - return true; -} + bool TessAltoRenderer::BeginDocumentHandler() { + AppendString( + "\n" + "\n" + "\t\n" + "\t\tpixel\n" + "\t\t\n" + "\t\t\t"); + + AppendString(title()); + + AppendString("\t\t\t\n" + "\t\t\n" + "\t\t\n" + "\t\t\t\n" + "\t\t\t\t\n" + "\t\t\t\t\ttesseract 4.0.0\n" + "\t\t\t\t\n" + "\t\t\t\n" + "\t\t\n" + "\t\n" + "\t\n"); + + return true; + } + + bool TessAltoRenderer::EndDocumentHandler() { + AppendString("\t\n\n"); -bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr hocr(api->GetHOCRText(imagenum())); - if (hocr == nullptr) return false; + return true; + } - AppendString(hocr.get()); + bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr hocr(api->GetAltoText(imagenum())); + if (hocr == nullptr) return false; - return true; -} + AppendString(hocr.get()); + + return true; + } /********************************************************************** * TSV Text Renderer interface implementation **********************************************************************/ -TessTsvRenderer::TessTsvRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "tsv") { - font_info_ = false; -} + TessTsvRenderer::TessTsvRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "tsv") { + font_info_ = false; + } -TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) - : TessResultRenderer(outputbase, "tsv") { - font_info_ = font_info; -} + TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) + : TessResultRenderer(outputbase, "tsv") { + font_info_ = font_info; + } -bool TessTsvRenderer::BeginDocumentHandler() { - // Output TSV column headings - AppendString( - "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" - "num\tleft\ttop\twidth\theight\tconf\ttext\n"); - return true; -} + bool TessTsvRenderer::BeginDocumentHandler() { + // Output TSV column headings + AppendString( + "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" + "num\tleft\ttop\twidth\theight\tconf\ttext\n"); + return true; + } -bool TessTsvRenderer::EndDocumentHandler() { return true; } + bool TessTsvRenderer::EndDocumentHandler() { return true; } -bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr tsv(api->GetTSVText(imagenum())); - if (tsv == nullptr) return false; + bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr tsv(api->GetTSVText(imagenum())); + if (tsv == nullptr) return false; - AppendString(tsv.get()); + AppendString(tsv.get()); - return true; -} + return true; + } /********************************************************************** * UNLV Text Renderer interface implementation **********************************************************************/ -TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "unlv") { -} + TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "unlv") { + } -bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr unlv(api->GetUNLVText()); - if (unlv == nullptr) return false; + bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr unlv(api->GetUNLVText()); + if (unlv == nullptr) return false; - AppendString(unlv.get()); + AppendString(unlv.get()); - return true; -} + return true; + } /********************************************************************** * BoxText Renderer interface implementation **********************************************************************/ -TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "box") { -} + TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "box") { + } -bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr text(api->GetBoxText(imagenum())); - if (text == nullptr) return false; + bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr text(api->GetBoxText(imagenum())); + if (text == nullptr) return false; - AppendString(text.get()); + AppendString(text.get()); - return true; -} + return true; + } #ifndef DISABLED_LEGACY_ENGINE /********************************************************************** * Osd Text Renderer interface implementation **********************************************************************/ -TessOsdRenderer::TessOsdRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "osd") {} + TessOsdRenderer::TessOsdRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "osd") {} -bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { - char* osd = api->GetOsdText(imagenum()); - if (osd == nullptr) return false; + bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { + char* osd = api->GetOsdText(imagenum()); + if (osd == nullptr) return false; - AppendString(osd); - delete[] osd; + AppendString(osd); + delete[] osd; - return true; -} + return true; + } #endif // ndef DISABLED_LEGACY_ENGINE diff --git a/src/api/renderer.h b/src/api/renderer.h index 6c753403cc..cb91f3e005 100644 --- a/src/api/renderer.h +++ b/src/api/renderer.h @@ -27,7 +27,7 @@ namespace tesseract { -class TessBaseAPI; + class TessBaseAPI; /** * Interface for rendering tesseract results into a document, such as text, @@ -42,218 +42,232 @@ class TessBaseAPI; * renderers can manage the associated state needed for the specific formats * in addition to the heuristics for producing it. */ -class TESS_API TessResultRenderer { - public: - virtual ~TessResultRenderer(); - - // Takes ownership of pointer so must be new'd instance. - // Renderers aren't ordered, but appends the sequences of next parameter - // and existing next(). The renderers should be unique across both lists. - void insert(TessResultRenderer* next); - - // Returns the next renderer or nullptr. - TessResultRenderer* next() { return next_; } - - /** - * Starts a new document with the given title. - * This clears the contents of the output data. - * Title should use UTF-8 encoding. - */ - bool BeginDocument(const char* title); - - /** - * Adds the recognized text from the source image to the current document. - * Invalid if BeginDocument not yet called. - * - * Note that this API is a bit weird but is designed to fit into the - * current TessBaseAPI implementation where the api has lots of state - * information that we might want to add in. - */ - bool AddImage(TessBaseAPI* api); - - /** - * Finishes the document and finalizes the output data - * Invalid if BeginDocument not yet called. - */ - bool EndDocument(); - - const char* file_extension() const { return file_extension_; } - const char* title() const { return title_.c_str(); } - - // Is everything fine? Otherwise something went wrong. - bool happy() { return happy_; } - - /** - * Returns the index of the last image given to AddImage - * (i.e. images are incremented whether the image succeeded or not) - * - * This is always defined. It means either the number of the - * current image, the last image ended, or in the completed document - * depending on when in the document lifecycle you are looking at it. - * Will return -1 if a document was never started. - */ - int imagenum() const { return imagenum_; } - - protected: - /** - * Called by concrete classes. - * - * outputbase is the name of the output file excluding - * extension. For example, "/path/to/chocolate-chip-cookie-recipe" - * - * extension indicates the file extension to be used for output - * files. For example "pdf" will produce a .pdf file, and "hocr" - * will produce .hocr files. - */ - TessResultRenderer(const char *outputbase, - const char* extension); - - // Hook for specialized handling in BeginDocument() - virtual bool BeginDocumentHandler(); - - // This must be overridden to render the OCR'd results - virtual bool AddImageHandler(TessBaseAPI* api) = 0; - - // Hook for specialized handling in EndDocument() - virtual bool EndDocumentHandler(); - - // Renderers can call this to append '\0' terminated strings into - // the output string returned by GetOutput. - // This method will grow the output buffer if needed. - void AppendString(const char* s); - - // Renderers can call this to append binary byte sequences into - // the output string returned by GetOutput. Note that s is not necessarily - // '\0' terminated (and can contain '\0' within it). - // This method will grow the output buffer if needed. - void AppendData(const char* s, int len); - - private: - const char* file_extension_; // standard extension for generated output - STRING title_; // title of document being renderered - int imagenum_; // index of last image added - - FILE* fout_; // output file pointer - TessResultRenderer* next_; // Can link multiple renderers together - bool happy_; // I get grumpy when the disk fills up, etc. -}; + class TESS_API TessResultRenderer { + public: + virtual ~TessResultRenderer(); + + // Takes ownership of pointer so must be new'd instance. + // Renderers aren't ordered, but appends the sequences of next parameter + // and existing next(). The renderers should be unique across both lists. + void insert(TessResultRenderer* next); + + // Returns the next renderer or nullptr. + TessResultRenderer* next() { return next_; } + + /** + * Starts a new document with the given title. + * This clears the contents of the output data. + * Title should use UTF-8 encoding. + */ + bool BeginDocument(const char* title); + + /** + * Adds the recognized text from the source image to the current document. + * Invalid if BeginDocument not yet called. + * + * Note that this API is a bit weird but is designed to fit into the + * current TessBaseAPI implementation where the api has lots of state + * information that we might want to add in. + */ + bool AddImage(TessBaseAPI* api); + + /** + * Finishes the document and finalizes the output data + * Invalid if BeginDocument not yet called. + */ + bool EndDocument(); + + const char* file_extension() const { return file_extension_; } + const char* title() const { return title_.c_str(); } + + // Is everything fine? Otherwise something went wrong. + bool happy() { return happy_; } + + /** + * Returns the index of the last image given to AddImage + * (i.e. images are incremented whether the image succeeded or not) + * + * This is always defined. It means either the number of the + * current image, the last image ended, or in the completed document + * depending on when in the document lifecycle you are looking at it. + * Will return -1 if a document was never started. + */ + int imagenum() const { return imagenum_; } + + protected: + /** + * Called by concrete classes. + * + * outputbase is the name of the output file excluding + * extension. For example, "/path/to/chocolate-chip-cookie-recipe" + * + * extension indicates the file extension to be used for output + * files. For example "pdf" will produce a .pdf file, and "hocr" + * will produce .hocr files. + */ + TessResultRenderer(const char *outputbase, + const char* extension); + + // Hook for specialized handling in BeginDocument() + virtual bool BeginDocumentHandler(); + + // This must be overridden to render the OCR'd results + virtual bool AddImageHandler(TessBaseAPI* api) = 0; + + // Hook for specialized handling in EndDocument() + virtual bool EndDocumentHandler(); + + // Renderers can call this to append '\0' terminated strings into + // the output string returned by GetOutput. + // This method will grow the output buffer if needed. + void AppendString(const char* s); + + // Renderers can call this to append binary byte sequences into + // the output string returned by GetOutput. Note that s is not necessarily + // '\0' terminated (and can contain '\0' within it). + // This method will grow the output buffer if needed. + void AppendData(const char* s, int len); + + private: + const char* file_extension_; // standard extension for generated output + STRING title_; // title of document being renderered + int imagenum_; // index of last image added + + FILE* fout_; // output file pointer + TessResultRenderer* next_; // Can link multiple renderers together + bool happy_; // I get grumpy when the disk fills up, etc. + }; /** * Renders tesseract output into a plain UTF-8 text string */ -class TESS_API TessTextRenderer : public TessResultRenderer { - public: - explicit TessTextRenderer(const char *outputbase); + class TESS_API TessTextRenderer : public TessResultRenderer { + public: + explicit TessTextRenderer(const char *outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); + }; /** * Renders tesseract output into an hocr text string */ -class TESS_API TessHOcrRenderer : public TessResultRenderer { - public: - explicit TessHOcrRenderer(const char *outputbase, bool font_info); - explicit TessHOcrRenderer(const char *outputbase); + class TESS_API TessHOcrRenderer : public TessResultRenderer { + public: + explicit TessHOcrRenderer(const char *outputbase, bool font_info); + explicit TessHOcrRenderer(const char *outputbase); - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); - private: - bool font_info_; // whether to print font information -}; + private: + bool font_info_; // whether to print font information + }; + +/** + * Renders tesseract output into an alto text string + */ + class TESS_API TessAltoRenderer : public TessResultRenderer { + public: + explicit TessAltoRenderer(const char *outputbase); + + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); + + }; /** * Renders Tesseract output into a TSV string */ -class TESS_API TessTsvRenderer : public TessResultRenderer { - public: - explicit TessTsvRenderer(const char* outputbase, bool font_info); - explicit TessTsvRenderer(const char* outputbase); + class TESS_API TessTsvRenderer : public TessResultRenderer { + public: + explicit TessTsvRenderer(const char* outputbase, bool font_info); + explicit TessTsvRenderer(const char* outputbase); - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); - private: - bool font_info_; // whether to print font information -}; + private: + bool font_info_; // whether to print font information + }; /** * Renders tesseract output into searchable PDF */ -class TESS_API TessPDFRenderer : public TessResultRenderer { - public: - // datadir is the location of the TESSDATA. We need it because - // we load a custom PDF font from this location. - TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false); - - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); - - private: - // We don't want to have every image in memory at once, - // so we store some metadata as we go along producing - // PDFs one page at a time. At the end, that metadata is - // used to make everything that isn't easily handled in a - // streaming fashion. - long int obj_; // counter for PDF objects - GenericVector offsets_; // offset of every PDF object in bytes - GenericVector pages_; // object number for every /Page object - std::string datadir_; // where to find the custom font - bool textonly_; // skip images if set - // Bookkeeping only. DIY = Do It Yourself. - void AppendPDFObjectDIY(size_t objectsize); - // Bookkeeping + emit data. - void AppendPDFObject(const char *data); - // Create the /Contents object for an entire page. - char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); - // Turn an image into a PDF object. Only transcode if we have to. - static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum, - char** pdf_object, long int* pdf_object_size, const int jpg_quality); -}; + class TESS_API TessPDFRenderer : public TessResultRenderer { + public: + // datadir is the location of the TESSDATA. We need it because + // we load a custom PDF font from this location. + TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false); + + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); + + private: + // We don't want to have every image in memory at once, + // so we store some metadata as we go along producing + // PDFs one page at a time. At the end, that metadata is + // used to make everything that isn't easily handled in a + // streaming fashion. + long int obj_; // counter for PDF objects + GenericVector offsets_; // offset of every PDF object in bytes + GenericVector pages_; // object number for every /Page object + std::string datadir_; // where to find the custom font + bool textonly_; // skip images if set + // Bookkeeping only. DIY = Do It Yourself. + void AppendPDFObjectDIY(size_t objectsize); + // Bookkeeping + emit data. + void AppendPDFObject(const char *data); + // Create the /Contents object for an entire page. + char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); + // Turn an image into a PDF object. Only transcode if we have to. + static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum, + char** pdf_object, long int* pdf_object_size, const int jpg_quality); + }; /** * Renders tesseract output into a plain UTF-8 text string */ -class TESS_API TessUnlvRenderer : public TessResultRenderer { - public: - explicit TessUnlvRenderer(const char *outputbase); + class TESS_API TessUnlvRenderer : public TessResultRenderer { + public: + explicit TessUnlvRenderer(const char *outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); + }; /** * Renders tesseract output into a plain UTF-8 text string */ -class TESS_API TessBoxTextRenderer : public TessResultRenderer { - public: - explicit TessBoxTextRenderer(const char *outputbase); + class TESS_API TessBoxTextRenderer : public TessResultRenderer { + public: + explicit TessBoxTextRenderer(const char *outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); + }; #ifndef DISABLED_LEGACY_ENGINE /** * Renders tesseract output into an osd text string */ -class TESS_API TessOsdRenderer : public TessResultRenderer { - public: - explicit TessOsdRenderer(const char* outputbase); + class TESS_API TessOsdRenderer : public TessResultRenderer { + public: + explicit TessOsdRenderer(const char* outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); -}; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); + }; #endif // ndef DISABLED_LEGACY_ENGINE diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp index 5fd2cf6339..a2b1c4c185 100644 --- a/src/api/tesseractmain.cpp +++ b/src/api/tesseractmain.cpp @@ -66,20 +66,20 @@ static void Win32WarningHandler(const char* module, const char* fmt, #endif // _WIN32 static void PrintVersionInfo() { - char* versionStrP; + char* versionStrP; - printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); + printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); - versionStrP = getLeptonicaVersion(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getLeptonicaVersion(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); - versionStrP = getImagelibVersions(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getImagelibVersions(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); #ifdef USE_OPENCL - cl_platform_id platform[4]; + cl_platform_id platform[4]; cl_uint num_platforms; printf(" OpenCL info:\n"); @@ -118,155 +118,155 @@ static void PrintVersionInfo() { } static void PrintHelpForPSM() { - const char* msg = - "Page segmentation modes:\n" - " 0 Orientation and script detection (OSD) only.\n" - " 1 Automatic page segmentation with OSD.\n" - " 2 Automatic page segmentation, but no OSD, or OCR.\n" - " 3 Fully automatic page segmentation, but no OSD. (Default)\n" - " 4 Assume a single column of text of variable sizes.\n" - " 5 Assume a single uniform block of vertically aligned text.\n" - " 6 Assume a single uniform block of text.\n" - " 7 Treat the image as a single text line.\n" - " 8 Treat the image as a single word.\n" - " 9 Treat the image as a single word in a circle.\n" - " 10 Treat the image as a single character.\n" - " 11 Sparse text. Find as much text as possible in no" - " particular order.\n" - " 12 Sparse text with OSD.\n" - " 13 Raw line. Treat the image as a single text line,\n" - " bypassing hacks that are Tesseract-specific.\n"; + const char* msg = + "Page segmentation modes:\n" + " 0 Orientation and script detection (OSD) only.\n" + " 1 Automatic page segmentation with OSD.\n" + " 2 Automatic page segmentation, but no OSD, or OCR.\n" + " 3 Fully automatic page segmentation, but no OSD. (Default)\n" + " 4 Assume a single column of text of variable sizes.\n" + " 5 Assume a single uniform block of vertically aligned text.\n" + " 6 Assume a single uniform block of text.\n" + " 7 Treat the image as a single text line.\n" + " 8 Treat the image as a single word.\n" + " 9 Treat the image as a single word in a circle.\n" + " 10 Treat the image as a single character.\n" + " 11 Sparse text. Find as much text as possible in no" + " particular order.\n" + " 12 Sparse text with OSD.\n" + " 13 Raw line. Treat the image as a single text line,\n" + " bypassing hacks that are Tesseract-specific.\n"; #ifdef DISABLED_LEGACY_ENGINE - const char* disabled_osd_msg = + const char* disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n"; printf("%s%s", msg, disabled_osd_msg); #else - printf("%s", msg); + printf("%s", msg); #endif } #ifndef DISABLED_LEGACY_ENGINE static void PrintHelpForOEM() { - const char* msg = - "OCR Engine modes:\n" - " 0 Legacy engine only.\n" - " 1 Neural nets LSTM engine only.\n" - " 2 Legacy + LSTM engines.\n" - " 3 Default, based on what is available.\n"; - - printf("%s", msg); + const char* msg = + "OCR Engine modes:\n" + " 0 Legacy engine only.\n" + " 1 Neural nets LSTM engine only.\n" + " 2 Legacy + LSTM engines.\n" + " 3 Default, based on what is available.\n"; + + printf("%s", msg); } #endif // ndef DISABLED_LEGACY_ENGINE static void PrintHelpExtra(const char* program) { - printf( - "Usage:\n" - " %s --help | --help-extra | --help-psm | " -#ifndef DISABLED_LEGACY_ENGINE - "--help-oem | " -#endif - "--version\n" - " %s --list-langs [--tessdata-dir PATH]\n" - " %s --print-parameters [options...] [configfile...]\n" - " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n" - "\n" - "OCR options:\n" - " --tessdata-dir PATH Specify the location of tessdata path.\n" - " --user-words PATH Specify the location of user words file.\n" - " --user-patterns PATH Specify the location of user patterns file.\n" - " --dpi VALUE Specify DPI for input image.\n" - " -l LANG[+LANG] Specify language(s) used for OCR.\n" - " -c VAR=VALUE Set value for config variables.\n" - " Multiple -c arguments are allowed.\n" - " --psm NUM Specify page segmentation mode.\n" -#ifndef DISABLED_LEGACY_ENGINE - " --oem NUM Specify OCR Engine mode.\n" -#endif - "NOTE: These options must occur before any configfile.\n" - "\n", - program, program, program, program - ); - - PrintHelpForPSM(); + printf( + "Usage:\n" + " %s --help | --help-extra | --help-psm | " + #ifndef DISABLED_LEGACY_ENGINE + "--help-oem | " + #endif + "--version\n" + " %s --list-langs [--tessdata-dir PATH]\n" + " %s --print-parameters [options...] [configfile...]\n" + " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n" + "\n" + "OCR options:\n" + " --tessdata-dir PATH Specify the location of tessdata path.\n" + " --user-words PATH Specify the location of user words file.\n" + " --user-patterns PATH Specify the location of user patterns file.\n" + " --dpi VALUE Specify DPI for input image.\n" + " -l LANG[+LANG] Specify language(s) used for OCR.\n" + " -c VAR=VALUE Set value for config variables.\n" + " Multiple -c arguments are allowed.\n" + " --psm NUM Specify page segmentation mode.\n" + #ifndef DISABLED_LEGACY_ENGINE + " --oem NUM Specify OCR Engine mode.\n" + #endif + "NOTE: These options must occur before any configfile.\n" + "\n", + program, program, program, program + ); + + PrintHelpForPSM(); #ifndef DISABLED_LEGACY_ENGINE - printf("\n"); - PrintHelpForOEM(); + printf("\n"); + PrintHelpForOEM(); #endif - printf( - "\n" - "Single options:\n" - " -h, --help Show minimal help message.\n" - " --help-extra Show extra help for advanced users.\n" - " --help-psm Show page segmentation modes.\n" -#ifndef DISABLED_LEGACY_ENGINE - " --help-oem Show OCR Engine modes.\n" -#endif - " -v, --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n" - " --print-parameters Print tesseract parameters.\n" - ); + printf( + "\n" + "Single options:\n" + " -h, --help Show minimal help message.\n" + " --help-extra Show extra help for advanced users.\n" + " --help-psm Show page segmentation modes.\n" + #ifndef DISABLED_LEGACY_ENGINE + " --help-oem Show OCR Engine modes.\n" + #endif + " -v, --version Show version information.\n" + " --list-langs List available languages for tesseract engine.\n" + " --print-parameters Print tesseract parameters.\n" + ); } static void PrintHelpMessage(const char* program) { - printf( - "Usage:\n" - " %s --help | --help-extra | --version\n" - " %s --list-langs\n" - " %s imagename outputbase [options...] [configfile...]\n" - "\n" - "OCR options:\n" - " -l LANG[+LANG] Specify language(s) used for OCR.\n" - "NOTE: These options must occur before any configfile.\n" - "\n" - "Single options:\n" - " --help Show this help message.\n" - " --help-extra Show extra help for advanced users.\n" - " --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n", - program, program, program - ); + printf( + "Usage:\n" + " %s --help | --help-extra | --version\n" + " %s --list-langs\n" + " %s imagename outputbase [options...] [configfile...]\n" + "\n" + "OCR options:\n" + " -l LANG[+LANG] Specify language(s) used for OCR.\n" + "NOTE: These options must occur before any configfile.\n" + "\n" + "Single options:\n" + " --help Show this help message.\n" + " --help-extra Show extra help for advanced users.\n" + " --version Show version information.\n" + " --list-langs List available languages for tesseract engine.\n", + program, program, program + ); } static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { - char opt1[256], opt2[255]; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - strncpy(opt1, argv[i + 1], 255); - opt1[255] = '\0'; - char* p = strchr(opt1, '='); - if (!p) { - fprintf(stderr, "Missing = in configvar assignment\n"); - exit(EXIT_FAILURE); - } - *p = 0; - strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255); - opt2[254] = 0; - ++i; - - if (!api->SetVariable(opt1, opt2)) { - fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); - } + char opt1[256], opt2[255]; + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { + strncpy(opt1, argv[i + 1], 255); + opt1[255] = '\0'; + char* p = strchr(opt1, '='); + if (!p) { + fprintf(stderr, "Missing = in configvar assignment\n"); + exit(EXIT_FAILURE); + } + *p = 0; + strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255); + opt2[254] = 0; + ++i; + + if (!api->SetVariable(opt1, opt2)) { + fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); + } + } } - } } static void PrintLangsList(tesseract::TessBaseAPI* api) { - GenericVector languages; - api->GetAvailableLanguagesAsVector(&languages); - printf("List of available languages (%d):\n", languages.size()); - for (int index = 0; index < languages.size(); ++index) { - STRING& string = languages[index]; - printf("%s\n", string.string()); - } - api->End(); + GenericVector languages; + api->GetAvailableLanguagesAsVector(&languages); + printf("List of available languages (%d):\n", languages.size()); + for (int index = 0; index < languages.size(); ++index) { + STRING& string = languages[index]; + printf("%s\n", string.string()); + } + api->End(); } static void PrintBanner() { - tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", - tesseract::TessBaseAPI::Version()); + tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", + tesseract::TessBaseAPI::Version()); } /** @@ -285,15 +285,15 @@ static void PrintBanner() { */ static void FixPageSegMode(tesseract::TessBaseAPI* api, tesseract::PageSegMode pagesegmode) { - if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) - api->SetPageSegMode(pagesegmode); + if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) + api->SetPageSegMode(pagesegmode); } static void checkArgValues(int arg, const char* mode, int count) { - if (arg >= count || arg < 0) { - printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); - exit(EXIT_SUCCESS); - } + if (arg >= count || arg < 0) { + printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); + exit(EXIT_SUCCESS); + } } // NOTE: arg_i is used here to avoid ugly *i so many times in this function @@ -304,205 +304,219 @@ static void ParseArgs(const int argc, char** argv, const char** lang, GenericVector* vars_values, l_int32* arg_i, tesseract::PageSegMode* pagesegmode, tesseract::OcrEngineMode* enginemode) { - bool noocr = false; - int i; - for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) { - if (*image != nullptr && *outputbase == nullptr) { - // outputbase follows image, don't allow options at that position. - *outputbase = argv[i]; - } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { - PrintHelpMessage(argv[0]); - noocr = true; - } else if (strcmp(argv[i], "--help-extra") == 0) { - PrintHelpExtra(argv[0]); - noocr = true; - } else if ((strcmp(argv[i], "--help-psm") == 0)) { - PrintHelpForPSM(); - noocr = true; + bool noocr = false; + int i; + for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) { + if (*image != nullptr && *outputbase == nullptr) { + // outputbase follows image, don't allow options at that position. + *outputbase = argv[i]; + } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { + PrintHelpMessage(argv[0]); + noocr = true; + } else if (strcmp(argv[i], "--help-extra") == 0) { + PrintHelpExtra(argv[0]); + noocr = true; + } else if ((strcmp(argv[i], "--help-psm") == 0)) { + PrintHelpForPSM(); + noocr = true; #ifndef DISABLED_LEGACY_ENGINE - } else if ((strcmp(argv[i], "--help-oem") == 0)) { - PrintHelpForOEM(); - noocr = true; + } else if ((strcmp(argv[i], "--help-oem") == 0)) { + PrintHelpForOEM(); + noocr = true; #endif - } else if ((strcmp(argv[i], "-v") == 0) || - (strcmp(argv[i], "--version") == 0)) { - PrintVersionInfo(); - noocr = true; - } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) { - *lang = argv[i + 1]; - ++i; - } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) { - *datapath = argv[i + 1]; - ++i; - } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) { - *dpi = atoi(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) { - vars_vec->push_back("user_words_file"); - vars_values->push_back(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) { - vars_vec->push_back("user_patterns_file"); - vars_values->push_back(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--list-langs") == 0) { - noocr = true; - *list_langs = true; - } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { - checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT); - *pagesegmode = static_cast(atoi(argv[i + 1])); - ++i; - } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { + } else if ((strcmp(argv[i], "-v") == 0) || + (strcmp(argv[i], "--version") == 0)) { + PrintVersionInfo(); + noocr = true; + } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) { + *lang = argv[i + 1]; + ++i; + } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) { + *datapath = argv[i + 1]; + ++i; + } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) { + *dpi = atoi(argv[i + 1]); + ++i; + } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) { + vars_vec->push_back("user_words_file"); + vars_values->push_back(argv[i + 1]); + ++i; + } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) { + vars_vec->push_back("user_patterns_file"); + vars_values->push_back(argv[i + 1]); + ++i; + } else if (strcmp(argv[i], "--list-langs") == 0) { + noocr = true; + *list_langs = true; + } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { + checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT); + *pagesegmode = static_cast(atoi(argv[i + 1])); + ++i; + } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { #ifndef DISABLED_LEGACY_ENGINE - int oem = atoi(argv[i + 1]); - checkArgValues(oem, "OEM", tesseract::OEM_COUNT); - *enginemode = static_cast(oem); + int oem = atoi(argv[i + 1]); + checkArgValues(oem, "OEM", tesseract::OEM_COUNT); + *enginemode = static_cast(oem); #endif - ++i; - } else if (strcmp(argv[i], "--print-parameters") == 0) { - noocr = true; - *print_parameters = true; - } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - // handled properly after api init - ++i; - } else if (*image == nullptr) { - *image = argv[i]; - } else { - // Unexpected argument. - fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]); - exit(EXIT_FAILURE); + ++i; + } else if (strcmp(argv[i], "--print-parameters") == 0) { + noocr = true; + *print_parameters = true; + } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { + // handled properly after api init + ++i; + } else if (*image == nullptr) { + *image = argv[i]; + } else { + // Unexpected argument. + fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]); + exit(EXIT_FAILURE); + } } - } - - *arg_i = i; - if (*pagesegmode == tesseract::PSM_OSD_ONLY) { - // OSD = orientation and script detection. - if (*lang != nullptr && strcmp(*lang, "osd")) { - // If the user explicitly specifies a language (other than osd) - // or a script, only orientation can be detected. - fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang); - } else { - // That mode requires osd.traineddata to detect orientation and script. - *lang = "osd"; + *arg_i = i; + + if (*pagesegmode == tesseract::PSM_OSD_ONLY) { + // OSD = orientation and script detection. + if (*lang != nullptr && strcmp(*lang, "osd")) { + // If the user explicitly specifies a language (other than osd) + // or a script, only orientation can be detected. + fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang); + } else { + // That mode requires osd.traineddata to detect orientation and script. + *lang = "osd"; + } } - } - if (*outputbase == nullptr && noocr == false) { - PrintHelpMessage(argv[0]); - exit(EXIT_FAILURE); - } + if (*outputbase == nullptr && noocr == false) { + PrintHelpMessage(argv[0]); + exit(EXIT_FAILURE); + } } static void PreloadRenderers( - tesseract::TessBaseAPI* api, - tesseract::PointerVector* renderers, - tesseract::PageSegMode pagesegmode, const char* outputbase) { - if (pagesegmode == tesseract::PSM_OSD_ONLY) { + tesseract::TessBaseAPI* api, + tesseract::PointerVector* renderers, + tesseract::PageSegMode pagesegmode, const char* outputbase) { + if (pagesegmode == tesseract::PSM_OSD_ONLY) { #ifndef DISABLED_LEGACY_ENGINE - renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); + renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); #endif // ndef DISABLED_LEGACY_ENGINE - } else { - bool b; - api->GetBoolVariable("tessedit_create_hocr", &b); - if (b) { - bool font_info; - api->GetBoolVariable("hocr_font_info", &font_info); - tesseract::TessHOcrRenderer* renderer = - new tesseract::TessHOcrRenderer(outputbase, font_info); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create hOCR output file: %s\n", - strerror(errno)); - } - } + } else { + bool b; + api->GetBoolVariable("tessedit_create_hocr", &b); + if (b) { + bool font_info; + api->GetBoolVariable("hocr_font_info", &font_info); + tesseract::TessHOcrRenderer* renderer = + new tesseract::TessHOcrRenderer(outputbase, font_info); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create hOCR output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_tsv", &b); - if (b) { - bool font_info; - api->GetBoolVariable("hocr_font_info", &font_info); - tesseract::TessTsvRenderer* renderer = - new tesseract::TessTsvRenderer(outputbase, font_info); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create TSV output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_create_alto", &b); + if (b) { + bool font_info; + tesseract::TessAltoRenderer* renderer = + new tesseract::TessAltoRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create ALTO output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_pdf", &b); - if (b) { - #ifdef WIN32 - if (_setmode(_fileno(stdout), _O_BINARY) == -1) + api->GetBoolVariable("tessedit_create_tsv", &b); + if (b) { + bool font_info; + api->GetBoolVariable("hocr_font_info", &font_info); + tesseract::TessTsvRenderer* renderer = + new tesseract::TessTsvRenderer(outputbase, font_info); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create TSV output file: %s\n", + strerror(errno)); + } + } + + api->GetBoolVariable("tessedit_create_pdf", &b); + if (b) { +#ifdef WIN32 + if (_setmode(_fileno(stdout), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); - #endif // WIN32 - bool textonly; - api->GetBoolVariable("textonly_pdf", &textonly); - tesseract::TessPDFRenderer* renderer = - new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(), - textonly); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create PDF output file: %s\n", - strerror(errno)); - } - } +#endif // WIN32 + bool textonly; + api->GetBoolVariable("textonly_pdf", &textonly); + tesseract::TessPDFRenderer* renderer = + new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(), + textonly); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create PDF output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_write_unlv", &b); - if (b) { - api->SetVariable("unlv_tilde_crunching", "true"); - tesseract::TessUnlvRenderer* renderer = - new tesseract::TessUnlvRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create UNLV output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_write_unlv", &b); + if (b) { + api->SetVariable("unlv_tilde_crunching", "true"); + tesseract::TessUnlvRenderer* renderer = + new tesseract::TessUnlvRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create UNLV output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_boxfile", &b); - if (b) { - tesseract::TessBoxTextRenderer* renderer = - new tesseract::TessBoxTextRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create BOX output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_create_boxfile", &b); + if (b) { + tesseract::TessBoxTextRenderer* renderer = + new tesseract::TessBoxTextRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create BOX output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_txt", &b); - if (b || renderers->empty()) { - tesseract::TessTextRenderer* renderer = - new tesseract::TessTextRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create TXT output file: %s\n", - strerror(errno)); - } + api->GetBoolVariable("tessedit_create_txt", &b); + if (b || renderers->empty()) { + tesseract::TessTextRenderer* renderer = + new tesseract::TessTextRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create TXT output file: %s\n", + strerror(errno)); + } + } } - } - if (!renderers->empty()) { - // Since the PointerVector auto-deletes, null-out the renderers that are - // added to the root, and leave the root in the vector. - for (int r = 1; r < renderers->size(); ++r) { - (*renderers)[0]->insert((*renderers)[r]); - (*renderers)[r] = nullptr; + if (!renderers->empty()) { + // Since the PointerVector auto-deletes, null-out the renderers that are + // added to the root, and leave the root in the vector. + for (int r = 1; r < renderers->size(); ++r) { + (*renderers)[0]->insert((*renderers)[r]); + (*renderers)[r] = nullptr; + } } - } } @@ -512,135 +526,135 @@ static void PreloadRenderers( **********************************************************************/ int main(int argc, char** argv) { - const char* lang = nullptr; - const char* image = nullptr; - const char* outputbase = nullptr; - const char* datapath = nullptr; - bool list_langs = false; - bool print_parameters = false; - l_int32 dpi = 0; - int arg_i = 1; - tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; + const char* lang = nullptr; + const char* image = nullptr; + const char* outputbase = nullptr; + const char* datapath = nullptr; + bool list_langs = false; + bool print_parameters = false; + l_int32 dpi = 0; + int arg_i = 1; + tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; #ifdef DISABLED_LEGACY_ENGINE - auto enginemode = tesseract::OEM_LSTM_ONLY; + auto enginemode = tesseract::OEM_LSTM_ONLY; #else - tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT; + tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT; #endif - /* main() calls functions like ParseArgs which call exit(). - * This results in memory leaks if vars_vec and vars_values are - * declared as auto variables (destructor is not called then). */ - static GenericVector vars_vec; - static GenericVector vars_values; + /* main() calls functions like ParseArgs which call exit(). + * This results in memory leaks if vars_vec and vars_values are + * declared as auto variables (destructor is not called then). */ + static GenericVector vars_vec; + static GenericVector vars_values; #if !defined(DEBUG) - // Disable debugging and informational messages from Leptonica. - setMsgSeverity(L_SEVERITY_ERROR); + // Disable debugging and informational messages from Leptonica. + setMsgSeverity(L_SEVERITY_ERROR); #endif #if defined(HAVE_TIFFIO_H) && defined(_WIN32) - /* Show libtiff errors and warnings on console (not in GUI). */ + /* Show libtiff errors and warnings on console (not in GUI). */ TIFFSetErrorHandler(Win32ErrorHandler); TIFFSetWarningHandler(Win32WarningHandler); #endif // HAVE_TIFFIO_H && _WIN32 - ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, - &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i, - &pagesegmode, &enginemode); - - if (lang == nullptr) { - // Set default language if none was given. - lang = "eng"; - } - - if (image == nullptr && !list_langs && !print_parameters) - return EXIT_SUCCESS; + ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, + &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i, + &pagesegmode, &enginemode); - PERF_COUNT_START("Tesseract:main") + if (lang == nullptr) { + // Set default language if none was given. + lang = "eng"; + } - // Call GlobalDawgCache here to create the global DawgCache object before - // the TessBaseAPI object. This fixes the order of destructor calls: - // first TessBaseAPI must be destructed, DawgCache must be the last object. - tesseract::Dict::GlobalDawgCache(); + if (image == nullptr && !list_langs && !print_parameters) + return EXIT_SUCCESS; - // Avoid memory leak caused by auto variable when return is called. - static tesseract::TessBaseAPI api; + PERF_COUNT_START("Tesseract:main") - api.SetOutputName(outputbase); + // Call GlobalDawgCache here to create the global DawgCache object before + // the TessBaseAPI object. This fixes the order of destructor calls: + // first TessBaseAPI must be destructed, DawgCache must be the last object. + tesseract::Dict::GlobalDawgCache(); - const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), - argc - arg_i, &vars_vec, &vars_values, false); + // Avoid memory leak caused by auto variable when return is called. + static tesseract::TessBaseAPI api; - SetVariablesFromCLArgs(&api, argc, argv); + api.SetOutputName(outputbase); - if (list_langs) { - PrintLangsList(&api); - return EXIT_SUCCESS; - } + const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), + argc - arg_i, &vars_vec, &vars_values, false); - if (init_failed) { - fprintf(stderr, "Could not initialize tesseract.\n"); - return EXIT_FAILURE; - } + SetVariablesFromCLArgs(&api, argc, argv); - if (print_parameters) { - FILE* fout = stdout; - fprintf(stdout, "Tesseract parameters:\n"); - api.PrintVariables(fout); - api.End(); - return EXIT_SUCCESS; - } + if (list_langs) { + PrintLangsList(&api); + return EXIT_SUCCESS; + } - FixPageSegMode(&api, pagesegmode); + if (init_failed) { + fprintf(stderr, "Could not initialize tesseract.\n"); + return EXIT_FAILURE; + } - if (dpi) { - char dpi_string[255]; - snprintf(dpi_string, 254, "%d", dpi); - api.SetVariable("user_defined_dpi", dpi_string); - } + if (print_parameters) { + FILE* fout = stdout; + fprintf(stdout, "Tesseract parameters:\n"); + api.PrintVariables(fout); + api.End(); + return EXIT_SUCCESS; + } - if (pagesegmode == tesseract::PSM_AUTO_ONLY) { - int ret_val = EXIT_SUCCESS; + FixPageSegMode(&api, pagesegmode); - Pix* pixs = pixRead(image); - if (!pixs) { - fprintf(stderr, "Leptonica can't process input file: %s\n", image); - return 2; + if (dpi) { + char dpi_string[255]; + snprintf(dpi_string, 254, "%d", dpi); + api.SetVariable("user_defined_dpi", dpi_string); } - api.SetImage(pixs); + if (pagesegmode == tesseract::PSM_AUTO_ONLY) { + int ret_val = EXIT_SUCCESS; - tesseract::Orientation orientation; - tesseract::WritingDirection direction; - tesseract::TextlineOrder order; - float deskew_angle; + Pix* pixs = pixRead(image); + if (!pixs) { + fprintf(stderr, "Leptonica can't process input file: %s\n", image); + return 2; + } - const tesseract::PageIterator* it = api.AnalyseLayout(); - if (it) { - it->Orientation(&orientation, &direction, &order, &deskew_angle); - tprintf( - "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" - "Deskew angle: %.4f\n", - orientation, direction, order, deskew_angle); - } else { - ret_val = EXIT_FAILURE; - } + api.SetImage(pixs); + + tesseract::Orientation orientation; + tesseract::WritingDirection direction; + tesseract::TextlineOrder order; + float deskew_angle; + + const tesseract::PageIterator* it = api.AnalyseLayout(); + if (it) { + it->Orientation(&orientation, &direction, &order, &deskew_angle); + tprintf( + "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" + "Deskew angle: %.4f\n", + orientation, direction, order, deskew_angle); + } else { + ret_val = EXIT_FAILURE; + } - delete it; + delete it; - pixDestroy(&pixs); - return ret_val; - } + pixDestroy(&pixs); + return ret_val; + } - // set in_training_mode to true when using one of these configs: - // ambigs.train, box.train, box.train.stderr, linebox, rebox - bool b = false; - bool in_training_mode = - (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || - (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); + // set in_training_mode to true when using one of these configs: + // ambigs.train, box.train, box.train.stderr, linebox, rebox + bool b = false; + bool in_training_mode = + (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || + (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || + (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); #ifdef DISABLED_LEGACY_ENGINE - auto cur_psm = api.GetPageSegMode(); + auto cur_psm = api.GetPageSegMode(); auto osd_warning = std::string(""); if (cur_psm == tesseract::PSM_OSD_ONLY) { const char* disabled_osd_msg = @@ -660,37 +674,37 @@ int main(int argc, char** argv) { } #endif // def DISABLED_LEGACY_ENGINE - // Avoid memory leak caused by auto variable when exit() is called. - static tesseract::PointerVector renderers; + // Avoid memory leak caused by auto variable when exit() is called. + static tesseract::PointerVector renderers; - if (in_training_mode) { - renderers.push_back(nullptr); - } else { - PreloadRenderers(&api, &renderers, pagesegmode, outputbase); - } + if (in_training_mode) { + renderers.push_back(nullptr); + } else { + PreloadRenderers(&api, &renderers, pagesegmode, outputbase); + } - bool banner = false; - if (outputbase != nullptr && strcmp(outputbase, "-") && - strcmp(outputbase, "stdout")) { - banner = true; - } + bool banner = false; + if (outputbase != nullptr && strcmp(outputbase, "-") && + strcmp(outputbase, "stdout")) { + banner = true; + } - if (!renderers.empty()) { - if (banner) PrintBanner(); + if (!renderers.empty()) { + if (banner) PrintBanner(); #ifdef DISABLED_LEGACY_ENGINE - if (!osd_warning.empty()) { + if (!osd_warning.empty()) { fprintf(stderr, "%s",osd_warning.c_str()); } #endif - bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]); - if (!succeed) { - fprintf(stderr, "Error during processing.\n"); - return EXIT_FAILURE; + bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]); + if (!succeed) { + fprintf(stderr, "Error during processing.\n"); + return EXIT_FAILURE; + } } - } - PERF_COUNT_END + PERF_COUNT_END - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/tessdata/configs/alto b/tessdata/configs/alto new file mode 100644 index 0000000000..0dd12a7a70 --- /dev/null +++ b/tessdata/configs/alto @@ -0,0 +1 @@ +tessedit_create_alto 1 From 38b705c660a28c9128b2b77ffa67729b12c07d41 Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Mon, 19 Nov 2018 22:12:11 -0500 Subject: [PATCH 2/9] Separate ALTO functionality into altorenderer.cpp --- CMakeLists.txt | 3 +- android/jni/Android.mk | 1 + src/api/Makefile.am | 2 +- src/api/altorenderer.cpp | 254 ++++++++++++++++++++++++++++++++++ src/api/baseapi.cpp | 165 ---------------------- src/api/renderer.cpp | 48 ------- src/api/renderer.h | 1 + src/api/tesseractmain.cpp | 1 - src/ccmain/tesseractclass.cpp | 2 + src/ccmain/tesseractclass.h | 1 + 10 files changed, 262 insertions(+), 216 deletions(-) create mode 100644 src/api/altorenderer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cf2cecc6e..3076140edd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -215,6 +215,7 @@ set(tesseract_src ${tesseract_src} src/api/capi.cpp src/api/renderer.cpp src/api/pdfrenderer.cpp + src/api/altorenderer.cpp ) if (WIN32) @@ -223,7 +224,7 @@ if (WIN32) set(tesseract_hdr ${tesseract_hdr} ${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h) - set(tesseract_rsc ${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/libtesseract.rc) + set(tesseract_rsc ${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/libtesseract.rc src/api/altorenderer.cpp) set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 170bb51344..225a2f4c97 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -31,6 +31,7 @@ LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/../../api/*.cpp $(LOCAL_PATH)/../../ EXPLICIT_SRC_EXCLUDES := \ $(LOCAL_PATH)/../../api/pdfrenderer.cpp \ + $(LOCAL_PATH)/../../api/altorenderer.cpp \ $(LOCAL_PATH)/../../api/tesseractmain.cpp \ LOCAL_SRC_FILES := $(filter-out $(EXPLICIT_SRC_EXCLUDES), $(LOCAL_SRC_FILES)) diff --git a/src/api/Makefile.am b/src/api/Makefile.am index bdecc6a77f..3fdf17fb19 100644 --- a/src/api/Makefile.am +++ b/src/api/Makefile.am @@ -32,7 +32,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS) if VISIBILITY libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS endif -libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp +libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp altorenderer.cpp lib_LTLIBRARIES += libtesseract.la libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp new file mode 100644 index 0000000000..2b4755d046 --- /dev/null +++ b/src/api/altorenderer.cpp @@ -0,0 +1,254 @@ +/////////////////////////////////////////////////////////////////////// +// File: altorenderer.cpp +// Description: ALTO rendering interface +// +// (C) Copyright 2018 +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "baseapi.h" +#include "renderer.h" + +namespace tesseract { + +/********************************************************************** + * Alto Text Renderer interface implementation + **********************************************************************/ + TessAltoRenderer::TessAltoRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "xml") { + } + + /** + * Append the ALTO XML for the beginning of the document + */ + bool TessAltoRenderer::BeginDocumentHandler() { + AppendString( + "\n" + "\n" + "\t\n" + "\t\tpixel\n" + "\t\t\n" + "\t\t\t"); + + AppendString(title()); + + AppendString("\t\t\t\n" + "\t\t\n" + "\t\t\n" + "\t\t\t\n" + "\t\t\t\t\n" + "\t\t\t\t\ttesseract 4.0.0\n" + "\t\t\t\t\n" + "\t\t\t\n" + "\t\t\n" + "\t\n" + "\t\n"); + + return true; + } + + /** + * Append the ALTO XML for the end of the document + */ + bool TessAltoRenderer::EndDocumentHandler() { + AppendString("\t\n\n"); + + return true; + } + + /** + * Append the ALTO XML for the layout of the image + */ + bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) { + const std::unique_ptr hocr(api->GetAltoText(imagenum())); + if (hocr == nullptr) return false; + + AppendString(hocr.get()); + + return true; + } + + /** + * Add a unique ID to an ALTO element + */ + static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); + id_buffer[BUFSIZE - 1] = '\0'; + *alto_str += " ID=\""; + *alto_str += id_buffer; + *alto_str += "\""; + } + + /** + * Add coordinates to specified TextBlock, TextLine, or String bounding box + * Add word confidence if adding to a String bounding box + */ + static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, + STRING *alto_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + + int hpos = left; + int vpos = top; + int height = bottom - top; + int width = right - left; + + *alto_str += " HPOS=\""; + alto_str->add_str_int("", hpos); + *alto_str += "\""; + *alto_str += " VPOS=\""; + alto_str->add_str_int("", vpos); + *alto_str += "\""; + *alto_str += " WIDTH=\""; + alto_str->add_str_int("", width); + *alto_str += "\""; + *alto_str += " HEIGHT=\""; + alto_str->add_str_int("", height); + *alto_str += "\""; + + if (level == RIL_WORD) { + int wc = it->Confidence(RIL_WORD); + *alto_str += " WC=\"0."; + alto_str->add_str_int("", wc); + *alto_str += "\""; + } + if (level != RIL_WORD) { + + *alto_str += ">"; + } + } + + /** + * Make an XML-formatted string with ALTO markup from the internal + * data structures. + */ + char *TessBaseAPI::GetAltoText(int page_number) { + return GetAltoText(nullptr, page_number); + } + + /** + * Make an XML-formatted string with ALTO markup from the internal + * data structures. + */ + char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) + return nullptr; + + int lcnt = 0, bcnt = 0, wcnt = 0; + int page_id = page_number; + + STRING alto_str(""); + + if (input_file_ == nullptr) + SetInputName(nullptr); + + #ifdef _WIN32 + // convert input name from ANSI encoding to utf-8 + int str16_len = + MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); + wchar_t *uni16_str = new WCHAR[str16_len]; + str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, + uni16_str, str16_len); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, + nullptr, nullptr); + char *utf8_str = new char[utf8_len]; + WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, + utf8_len, nullptr, nullptr); + *input_file_ = utf8_str; + delete[] uni16_str; + delete[] utf8_str; + #endif + + alto_str += "\t\t\n"; + + ResultIterator *res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + alto_str += "\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { + + alto_str += "\t\t\t\t\tIsAtFinalElement(RIL_TEXTLINE, RIL_WORD); + bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); + + do { + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); + if (grapheme && grapheme[0] != 0) { + alto_str += HOcrEscape(grapheme.get()); + } + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + + alto_str += "\"/>\n"; + + wcnt++; + + if (last_word_in_line) { + alto_str += "\t\t\t\t\t\n"; + lcnt++; + } + + if (last_word_in_block) { + alto_str += "\t\t\t\t\n"; + bcnt++; + } + } + + alto_str += "\t\t\t\n"; + alto_str += "\t\t\n"; + + char *ret = new char[alto_str.length() + 1]; + strcpy(ret, alto_str.string()); + delete res_it; + return ret; + } + + } \ No newline at end of file diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 05f04a443c..52fb563b02 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1426,16 +1426,6 @@ namespace tesseract { *hocr_str += "'"; } - static void AddIdToAlto(STRING* alto_str, const std::string base, int num1) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); - id_buffer[BUFSIZE - 1] = '\0'; - *alto_str += " ID=\""; - *alto_str += id_buffer; - *alto_str += "\""; - } - static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, STRING* hocr_str) { int left, top, right, bottom; @@ -1460,41 +1450,6 @@ namespace tesseract { *hocr_str += "\">"; } - static void AddBoxToAlto(const ResultIterator* it, PageIteratorLevel level, - STRING* alto_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - - int hpos = left; - int vpos = top; - int height = bottom - top; - int width = right - left; - - *alto_str += " HPOS=\""; - alto_str->add_str_int("", hpos); - *alto_str += "\""; - *alto_str += " VPOS=\""; - alto_str->add_str_int("", vpos); - *alto_str += "\""; - *alto_str += " WIDTH=\""; - alto_str->add_str_int("", width); - *alto_str += "\""; - *alto_str += " HEIGHT=\""; - alto_str->add_str_int("", height); - *alto_str += "\""; - - if (level == RIL_WORD) { - int wc = it->Confidence(RIL_WORD); - *alto_str += " WC=\"0."; - alto_str->add_str_int("", wc); - *alto_str += "\""; - } - if (level != RIL_WORD) { - - *alto_str += ">"; - } - } - static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, STRING* hocr_str) { int left, top, right, bottom; @@ -1518,14 +1473,6 @@ namespace tesseract { return GetHOCRText(nullptr, page_number); } -/** - * Make an XML-formatted string with ALTO markup from the internal - * data structures. - */ - char* TessBaseAPI::GetAltoText(int page_number) { - return GetAltoText(nullptr, page_number); - } - /** * Make a HTML-formatted string with hOCR markup from the internal * data structures. @@ -1752,119 +1699,7 @@ namespace tesseract { return ret; } -/** - * Make an XML-formatted string with ALTO markup from the internal - * data structures. - */ - char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) - return nullptr; - - int lcnt = 0, bcnt = 0, wcnt = 0; - int page_id = page_number; - - STRING alto_str(""); - - if (input_file_ == nullptr) - SetInputName(nullptr); -#ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 - int str16_len = - MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, - uni16_str, str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, - nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, - utf8_len, nullptr, nullptr); - *input_file_ = utf8_str; - delete[] uni16_str; - delete[] utf8_str; -#endif - - alto_str += "\t\t\n"; - - ResultIterator *res_it = GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - alto_str += "\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { - - alto_str += "\t\t\t\t\tIsAtFinalElement(RIL_TEXTLINE, RIL_WORD); - bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); - - do { - const std::unique_ptr grapheme( - res_it->GetUTF8Text(RIL_SYMBOL)); - if (grapheme && grapheme[0] != 0) { - alto_str += HOcrEscape(grapheme.get()); - } - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - - alto_str += "\"/>\n"; - - wcnt++; - - if (last_word_in_line) { - alto_str += "\t\t\t\t\t\n"; - lcnt++; - } - - if (last_word_in_block) { - alto_str += "\t\t\t\t\n"; - bcnt++; - } - } - - alto_str += "\t\t\t\n"; - alto_str += "\t\t\n"; - - char *ret = new char[alto_str.length() + 1]; - strcpy(ret, alto_str.string()); - delete res_it; - return ret; - } /** * Make a TSV-formatted string from the internal data structures. diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp index 277bc47389..e2cf91003e 100644 --- a/src/api/renderer.cpp +++ b/src/api/renderer.cpp @@ -193,54 +193,6 @@ namespace tesseract { return true; } -/********************************************************************** - * Alto Text Renderer interface implementation - **********************************************************************/ - TessAltoRenderer::TessAltoRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "alto") { - } - - bool TessAltoRenderer::BeginDocumentHandler() { - AppendString( - "\n" - "\n" - "\t\n" - "\t\tpixel\n" - "\t\t\n" - "\t\t\t"); - - AppendString(title()); - - AppendString("\t\t\t\n" - "\t\t\n" - "\t\t\n" - "\t\t\t\n" - "\t\t\t\t\n" - "\t\t\t\t\ttesseract 4.0.0\n" - "\t\t\t\t\n" - "\t\t\t\n" - "\t\t\n" - "\t\n" - "\t\n"); - - return true; - } - - bool TessAltoRenderer::EndDocumentHandler() { - AppendString("\t\n\n"); - - return true; - } - - bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr hocr(api->GetAltoText(imagenum())); - if (hocr == nullptr) return false; - - AppendString(hocr.get()); - - return true; - } - /********************************************************************** * TSV Text Renderer interface implementation **********************************************************************/ diff --git a/src/api/renderer.h b/src/api/renderer.h index cb91f3e005..f2313c31fd 100644 --- a/src/api/renderer.h +++ b/src/api/renderer.h @@ -180,6 +180,7 @@ namespace tesseract { }; + /** * Renders Tesseract output into a TSV string */ diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp index a2b1c4c185..8ec5e7b75f 100644 --- a/src/api/tesseractmain.cpp +++ b/src/api/tesseractmain.cpp @@ -421,7 +421,6 @@ static void PreloadRenderers( api->GetBoolVariable("tessedit_create_alto", &b); if (b) { - bool font_info; tesseract::TessAltoRenderer* renderer = new tesseract::TessAltoRenderer(outputbase); if (renderer->happy()) { diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index c6aa833723..997e6a4258 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -387,6 +387,8 @@ Tesseract::Tesseract() this->params()), BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file", this->params()), + BOOL_MEMBER(tessedit_create_alto, false, "Write .xml ALTO file", + this->params()), BOOL_MEMBER(tessedit_create_tsv, false, "Write .tsv output file", this->params()), BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file", diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 8b821f86ac..61ee727616 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -1037,6 +1037,7 @@ class Tesseract : public Wordrec { BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file"); BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file"); BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file"); + BOOL_VAR_H(tessedit_create_alto, false, "Write .xml ALTO output file"); BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file"); BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file"); BOOL_VAR_H(textonly_pdf, false, From 3ba5a09574f21bfd58105d18b607d85c8e8748ac Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Tue, 20 Nov 2018 18:14:22 -0500 Subject: [PATCH 3/9] Clean up formatting --- src/api/altorenderer.cpp | 33 +- src/api/baseapi.cpp | 3542 +++++++++++++++++++------------------- src/api/baseapi.h | 27 +- src/api/capi.cpp | 5 - src/api/capi.h | 4 +- src/api/renderer.cpp | 220 +-- src/api/renderer.h | 1 - 7 files changed, 1913 insertions(+), 1919 deletions(-) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index 2b4755d046..a654ad818d 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -1,19 +1,20 @@ -/////////////////////////////////////////////////////////////////////// -// File: altorenderer.cpp -// Description: ALTO rendering interface -// -// (C) Copyright 2018 -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/////////////////////////////////////////////////////////////////////// +/********************************************************************** + * File: altorenderer.cpp + * Description: ALTO rendering interface + * Author: Jake Sebright + * + * (C) Copyright 2018 + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ #include "baseapi.h" #include "renderer.h" diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 52fb563b02..5a7945154d 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -118,10 +118,10 @@ namespace tesseract { static void addAvailableLanguages(const STRING &datadir, const STRING &base, GenericVector* langs) { - const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; - const size_t extlen = sizeof(kTrainedDataSuffix); + const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; + const size_t extlen = sizeof(kTrainedDataSuffix); #ifdef _WIN32 - WIN32_FIND_DATA data; + WIN32_FIND_DATA data; HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); if (handle != INVALID_HANDLE_VALUE) { BOOL result = TRUE; @@ -146,37 +146,37 @@ namespace tesseract { FindClose(handle); } #else // _WIN32 - DIR* dir = opendir((datadir + base).string()); - if (dir != nullptr) { - dirent *de; - while ((de = readdir(dir))) { - char *name = de->d_name; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - struct stat st; - if (stat((datadir + base2 + name).string(), &st) == 0 && - (st.st_mode & S_IFDIR) == S_IFDIR) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } + DIR* dir = opendir((datadir + base).string()); + if (dir != nullptr) { + dirent *de; + while ((de = readdir(dir))) { + char *name = de->d_name; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + struct stat st; + if (stat((datadir + base2 + name).string(), &st) == 0 && + (st.st_mode & S_IFDIR) == S_IFDIR) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } + } + } } - } + closedir(dir); } - closedir(dir); - } #endif } // Compare two STRING values (used for sorting). static int CompareSTRING(const void* p1, const void* p2) { - const STRING* s1 = static_cast(p1); - const STRING* s2 = static_cast(p2); - return strcmp(s1->c_str(), s2->c_str()); + const STRING* s1 = static_cast(p1); + const STRING* s2 = static_cast(p2); + return strcmp(s1->c_str(), s2->c_str()); } TessBaseAPI::TessBaseAPI() @@ -204,24 +204,24 @@ namespace tesseract { rect_height_(0), image_width_(0), image_height_(0) { - const char *locale; - locale = std::setlocale(LC_ALL, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_CTYPE, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_NUMERIC, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); + const char *locale; + locale = std::setlocale(LC_ALL, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + locale = std::setlocale(LC_CTYPE, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + locale = std::setlocale(LC_NUMERIC, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); } TessBaseAPI::~TessBaseAPI() { - End(); + End(); } /** * Returns the version identifier as a static string. Do not delete. */ const char* TessBaseAPI::Version() { - return PACKAGE_VERSION; + return PACKAGE_VERSION; } /** @@ -238,7 +238,7 @@ namespace tesseract { #endif size_t TessBaseAPI::getOpenCLDevice(void **data) { #ifdef USE_OPENCL - #ifdef USE_DEVICE_SELECTION + #ifdef USE_DEVICE_SELECTION ds_device device = OpenclDevice::getDeviceSelection(); if (device.type == DS_DEVICE_OPENCL_DEVICE) { *data = new cl_device_id; @@ -248,8 +248,8 @@ namespace tesseract { #endif #endif - *data = nullptr; - return 0; + *data = nullptr; + return 0; } /** @@ -258,7 +258,7 @@ namespace tesseract { */ void TessBaseAPI::CatchSignals() { #ifdef __linux__ - struct sigaction action; + struct sigaction action; memset(&action, 0, sizeof(action)); action.sa_handler = &signal_exit; action.sa_flags = SA_RESETHAND; @@ -266,8 +266,8 @@ namespace tesseract { sigaction(SIGFPE, &action, nullptr); sigaction(SIGBUS, &action, nullptr); #else - // Warn API users that an implementation is needed. - tprintf("CatchSignals has no non-linux implementation!\n"); + // Warn API users that an implementation is needed. + tprintf("CatchSignals has no non-linux implementation!\n"); #endif } @@ -276,70 +276,70 @@ namespace tesseract { * loading a UNLV zone file. */ void TessBaseAPI::SetInputName(const char* name) { - if (input_file_ == nullptr) - input_file_ = new STRING(name); - else - *input_file_ = name; + if (input_file_ == nullptr) + input_file_ = new STRING(name); + else + *input_file_ = name; } /** Set the name of the output files. Needed only for debugging. */ void TessBaseAPI::SetOutputName(const char* name) { - if (output_file_ == nullptr) - output_file_ = new STRING(name); - else - *output_file_ = name; + if (output_file_ == nullptr) + output_file_ = new STRING(name); + else + *output_file_ = name; } bool TessBaseAPI::SetVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, - tesseract_->params()); + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, + tesseract_->params()); } bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, - tesseract_->params()); + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, + tesseract_->params()); } bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { - IntParam *p = ParamUtils::FindParam( - name, GlobalParams()->int_params, tesseract_->params()->int_params); - if (p == nullptr) return false; - *value = (int32_t)(*p); - return true; + IntParam *p = ParamUtils::FindParam( + name, GlobalParams()->int_params, tesseract_->params()->int_params); + if (p == nullptr) return false; + *value = (int32_t)(*p); + return true; } bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { - BoolParam *p = ParamUtils::FindParam( - name, GlobalParams()->bool_params, tesseract_->params()->bool_params); - if (p == nullptr) return false; - *value = (BOOL8)(*p); - return true; + BoolParam *p = ParamUtils::FindParam( + name, GlobalParams()->bool_params, tesseract_->params()->bool_params); + if (p == nullptr) return false; + *value = (BOOL8)(*p); + return true; } const char *TessBaseAPI::GetStringVariable(const char *name) const { - StringParam *p = ParamUtils::FindParam( - name, GlobalParams()->string_params, tesseract_->params()->string_params); - return (p != nullptr) ? p->string() : nullptr; + StringParam *p = ParamUtils::FindParam( + name, GlobalParams()->string_params, tesseract_->params()->string_params); + return (p != nullptr) ? p->string() : nullptr; } bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { - DoubleParam *p = ParamUtils::FindParam( - name, GlobalParams()->double_params, tesseract_->params()->double_params); - if (p == nullptr) return false; - *value = (double)(*p); - return true; + DoubleParam *p = ParamUtils::FindParam( + name, GlobalParams()->double_params, tesseract_->params()->double_params); + if (p == nullptr) return false; + *value = (double)(*p); + return true; } /** Get value of named variable as a string, if it exists. */ bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { - return ParamUtils::GetParamAsString(name, tesseract_->params(), val); + return ParamUtils::GetParamAsString(name, tesseract_->params(), val); } /** Print Tesseract parameters to the given file. */ void TessBaseAPI::PrintVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params()); + ParamUtils::PrintParams(fp, tesseract_->params()); } /** @@ -355,8 +355,8 @@ namespace tesseract { const GenericVector *vars_vec, const GenericVector *vars_values, bool set_only_non_debug_params) { - return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, - vars_values, set_only_non_debug_params, nullptr); + return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, + vars_values, set_only_non_debug_params, nullptr); } // In-memory version reads the traineddata file directly from the given @@ -367,72 +367,72 @@ namespace tesseract { const GenericVector* vars_vec, const GenericVector* vars_values, bool set_only_non_debug_params, FileReader reader) { - PERF_COUNT_START("TessBaseAPI::Init") - // Default language is "eng". - if (language == nullptr) language = "eng"; - STRING datapath = data_size == 0 ? data : language; - // If the datapath, OcrEngineMode or the language have changed - start again. - // Note that the language_ field stores the last requested language that was - // initialized successfully, while tesseract_->lang stores the language - // actually used. They differ only if the requested language was nullptr, in - // which case tesseract_->lang is set to the Tesseract default ("eng"). - if (tesseract_ != nullptr && - (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || - last_oem_requested_ != oem || - (*language_ != language && tesseract_->lang != language))) { - delete tesseract_; - tesseract_ = nullptr; - } - // PERF_COUNT_SUB("delete tesseract_") + PERF_COUNT_START("TessBaseAPI::Init") + // Default language is "eng". + if (language == nullptr) language = "eng"; + STRING datapath = data_size == 0 ? data : language; + // If the datapath, OcrEngineMode or the language have changed - start again. + // Note that the language_ field stores the last requested language that was + // initialized successfully, while tesseract_->lang stores the language + // actually used. They differ only if the requested language was nullptr, in + // which case tesseract_->lang is set to the Tesseract default ("eng"). + if (tesseract_ != nullptr && + (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || + last_oem_requested_ != oem || + (*language_ != language && tesseract_->lang != language))) { + delete tesseract_; + tesseract_ = nullptr; + } + // PERF_COUNT_SUB("delete tesseract_") #ifdef USE_OPENCL - OpenclDevice od; + OpenclDevice od; od.InitEnv(); #endif - PERF_COUNT_SUB("OD::InitEnv()") - bool reset_classifier = true; - if (tesseract_ == nullptr) { - reset_classifier = false; - tesseract_ = new Tesseract; - if (reader != nullptr) reader_ = reader; - TessdataManager mgr(reader_); - if (data_size != 0) { - mgr.LoadMemBuffer(language, data, data_size); - } - if (tesseract_->init_tesseract( - datapath.string(), - output_file_ != nullptr ? output_file_->string() : nullptr, - language, oem, configs, configs_size, vars_vec, vars_values, - set_only_non_debug_params, &mgr) != 0) { - return -1; + PERF_COUNT_SUB("OD::InitEnv()") + bool reset_classifier = true; + if (tesseract_ == nullptr) { + reset_classifier = false; + tesseract_ = new Tesseract; + if (reader != nullptr) reader_ = reader; + TessdataManager mgr(reader_); + if (data_size != 0) { + mgr.LoadMemBuffer(language, data, data_size); + } + if (tesseract_->init_tesseract( + datapath.string(), + output_file_ != nullptr ? output_file_->string() : nullptr, + language, oem, configs, configs_size, vars_vec, vars_values, + set_only_non_debug_params, &mgr) != 0) { + return -1; + } } - } - PERF_COUNT_SUB("update tesseract_") - // Update datapath and language requested for the last valid initialization. - if (datapath_ == nullptr) - datapath_ = new STRING(datapath); - else - *datapath_ = datapath; - if ((strcmp(datapath_->string(), "") == 0) && - (strcmp(tesseract_->datadir.string(), "") != 0)) - *datapath_ = tesseract_->datadir; - - if (language_ == nullptr) - language_ = new STRING(language); - else - *language_ = language; - last_oem_requested_ = oem; + PERF_COUNT_SUB("update tesseract_") + // Update datapath and language requested for the last valid initialization. + if (datapath_ == nullptr) + datapath_ = new STRING(datapath); + else + *datapath_ = datapath; + if ((strcmp(datapath_->string(), "") == 0) && + (strcmp(tesseract_->datadir.string(), "") != 0)) + *datapath_ = tesseract_->datadir; + + if (language_ == nullptr) + language_ = new STRING(language); + else + *language_ = language; + last_oem_requested_ = oem; #ifndef DISABLED_LEGACY_ENGINE - // PERF_COUNT_SUB("update last_oem_requested_") - // For same language and datapath, just reset the adaptive classifier. - if (reset_classifier) { - tesseract_->ResetAdaptiveClassifier(); - PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") - } + // PERF_COUNT_SUB("update last_oem_requested_") + // For same language and datapath, just reset the adaptive classifier. + if (reset_classifier) { + tesseract_->ResetAdaptiveClassifier(); + PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") + } #endif // ndef DISABLED_LEGACY_ENGINE - PERF_COUNT_END - return 0; + PERF_COUNT_END + return 0; } /** @@ -444,8 +444,8 @@ namespace tesseract { * The returned string should NOT be deleted. */ const char* TessBaseAPI::GetInitLanguagesAsString() const { - return (language_ == nullptr || language_->string() == nullptr) ? - "" : language_->string(); + return (language_ == nullptr || language_->string() == nullptr) ? + "" : language_->string(); } /** @@ -455,13 +455,13 @@ namespace tesseract { */ void TessBaseAPI::GetLoadedLanguagesAsVector( GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - langs->push_back(tesseract_->lang); - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) - langs->push_back(tesseract_->get_sub_lang(i)->lang); - } + langs->clear(); + if (tesseract_ != nullptr) { + langs->push_back(tesseract_->lang); + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) + langs->push_back(tesseract_->get_sub_lang(i)->lang); + } } /** @@ -469,11 +469,11 @@ namespace tesseract { */ void TessBaseAPI::GetAvailableLanguagesAsVector( GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir, "", langs); - langs->sort(CompareSTRING); - } + langs->clear(); + if (tesseract_ != nullptr) { + addAvailableLanguages(tesseract_->datadir, "", langs); + langs->sort(CompareSTRING); + } } //TODO(amit): Adapt to lstm @@ -485,12 +485,12 @@ namespace tesseract { * in a separate API at some future time. */ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - else - ParamUtils::ResetToDefaults(tesseract_->params()); - TessdataManager mgr; - return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + else + ParamUtils::ResetToDefaults(tesseract_->params()); + TessdataManager mgr; + return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); } #endif // ndef DISABLED_LEGACY_ENGINE @@ -499,12 +499,12 @@ namespace tesseract { * AnalysePage. Calls that attempt recognition will generate an error. */ void TessBaseAPI::InitForAnalysePage() { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; #ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); + tesseract_->InitAdaptiveClassifier(nullptr); #endif - } + } } /** @@ -513,12 +513,12 @@ namespace tesseract { * and also accepts a relative or absolute path name. */ void TessBaseAPI::ReadConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); } /** Same as above, but only set debug params from the given config file. */ void TessBaseAPI::ReadDebugConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); } /** @@ -527,17 +527,17 @@ namespace tesseract { * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - tesseract_->tessedit_pageseg_mode.set_value(mode); + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + tesseract_->tessedit_pageseg_mode.set_value(mode); } /** Return the current page segmentation mode. */ PageSegMode TessBaseAPI::GetPageSegMode() const { - if (tesseract_ == nullptr) - return PSM_SINGLE_BLOCK; - return static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); + if (tesseract_ == nullptr) + return PSM_SINGLE_BLOCK; + return static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); } /** @@ -558,17 +558,17 @@ namespace tesseract { int bytes_per_line, int left, int top, int width, int height) { - if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) - return nullptr; // Nothing worth doing. + if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) + return nullptr; // Nothing worth doing. - // Since this original api didn't give the exact size of the image, - // we have to invent a reasonable value. - int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; - SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, - bytes_per_pixel, bytes_per_line); - SetRectangle(left, top, width, height); + // Since this original api didn't give the exact size of the image, + // we have to invent a reasonable value. + int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; + SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, + bytes_per_pixel, bytes_per_line); + SetRectangle(left, top, width, height); - return GetUTF8Text(); + return GetUTF8Text(); } #ifndef DISABLED_LEGACY_ENGINE @@ -577,10 +577,10 @@ namespace tesseract { * adaptive data. */ void TessBaseAPI::ClearAdaptiveClassifier() { - if (tesseract_ == nullptr) - return; - tesseract_->ResetAdaptiveClassifier(); - tesseract_->ResetDocumentDictionary(); + if (tesseract_ == nullptr) + return; + tesseract_->ResetAdaptiveClassifier(); + tesseract_->ResetDocumentDictionary(); } #endif // ndef DISABLED_LEGACY_ENGINE @@ -594,18 +594,18 @@ namespace tesseract { void TessBaseAPI::SetImage(const unsigned char* imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line) { - if (InternalSetImage()) { - thresholder_->SetImage(imagedata, width, height, - bytes_per_pixel, bytes_per_line); - SetInputImage(thresholder_->GetPixRect()); - } + if (InternalSetImage()) { + thresholder_->SetImage(imagedata, width, height, + bytes_per_pixel, bytes_per_line); + SetInputImage(thresholder_->GetPixRect()); + } } void TessBaseAPI::SetSourceResolution(int ppi) { - if (thresholder_) - thresholder_->SetSourceYResolution(ppi); - else - tprintf("Please call SetImage before SetSourceResolution.\n"); + if (thresholder_) + thresholder_->SetSourceYResolution(ppi); + else + tprintf("Please call SetImage before SetSourceResolution.\n"); } /** @@ -617,17 +617,17 @@ namespace tesseract { * and it is therefore more efficient to provide a Pix directly. */ void TessBaseAPI::SetImage(Pix* pix) { - if (InternalSetImage()) { - if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { - // remove alpha channel from png - PIX* p1 = pixRemoveAlpha(pix); - pixSetSpp(p1, 3); - pix = pixCopy(nullptr, p1); - pixDestroy(&p1); - } - thresholder_->SetImage(pix); - SetInputImage(thresholder_->GetPixRect()); - } + if (InternalSetImage()) { + if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { + // remove alpha channel from png + PIX* p1 = pixRemoveAlpha(pix); + pixSetSpp(p1, 3); + pix = pixCopy(nullptr, p1); + pixDestroy(&p1); + } + thresholder_->SetImage(pix); + SetInputImage(thresholder_->GetPixRect()); + } } /** @@ -636,10 +636,10 @@ namespace tesseract { * can be recognized with the same image. */ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) - return; - thresholder_->SetRectangle(left, top, width, height); - ClearResults(); + if (thresholder_ == nullptr) + return; + thresholder_->SetRectangle(left, top, width, height); + ClearResults(); } /** @@ -647,12 +647,12 @@ namespace tesseract { * Get a copy of the internal thresholded image from Tesseract. */ Pix* TessBaseAPI::GetThresholdedImage() { - if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return nullptr; - } - return pixClone(tesseract_->pix_binary()); + if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return nullptr; + } + return pixClone(tesseract_->pix_binary()); } /** @@ -661,7 +661,7 @@ namespace tesseract { * Can be called before or after Recognize. */ Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { - return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); + return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); } /** @@ -674,8 +674,8 @@ namespace tesseract { */ Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, Pixa** pixa, int** blockids, int** paraids) { - return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, - pixa, blockids, paraids); + return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, + pixa, blockids, paraids); } /** @@ -687,7 +687,7 @@ namespace tesseract { * array of one element per line. delete [] after use. */ Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { - return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); + return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); } /** @@ -696,7 +696,7 @@ namespace tesseract { * Can be called before or after Recognize. */ Boxa* TessBaseAPI::GetWords(Pixa** pixa) { - return GetComponentImages(RIL_WORD, true, pixa, nullptr); + return GetComponentImages(RIL_WORD, true, pixa, nullptr); } /** @@ -706,7 +706,7 @@ namespace tesseract { * Can be called before or after Recognize. */ Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { - return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); + return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); } /** @@ -722,88 +722,88 @@ namespace tesseract { const int raw_padding, Pixa** pixa, int** blockids, int** paraids) { - PageIterator* page_it = GetIterator(); - if (page_it == nullptr) - page_it = AnalyseLayout(); - if (page_it == nullptr) - return nullptr; // Failed. - - // Count the components to get a size for the arrays. - int component_count = 0; - int left, top, right, bottom; - - TessResultCallback* get_bbox = nullptr; - if (raw_image) { - // Get bounding box in original raw image with padding. - get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, - level, raw_padding, - &left, &top, &right, &bottom); - } else { - // Get bounding box from binarized imaged. Note that this could be - // differently scaled from the original image. - get_bbox = NewPermanentTessCallback(page_it, - &PageIterator::BoundingBoxInternal, - level, &left, &top, &right, &bottom); - } - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) - ++component_count; - } while (page_it->Next(level)); - - Boxa* boxa = boxaCreate(component_count); - if (pixa != nullptr) - *pixa = pixaCreate(component_count); - if (blockids != nullptr) - *blockids = new int[component_count]; - if (paraids != nullptr) - *paraids = new int[component_count]; - - int blockid = 0; - int paraid = 0; - int component_index = 0; - page_it->Begin(); - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) { - Box* lbox = boxCreate(left, top, right - left, bottom - top); - boxaAddBox(boxa, lbox, L_INSERT); - if (pixa != nullptr) { - Pix* pix = nullptr; - if (raw_image) { - pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, - &top); - } else { - pix = page_it->GetBinaryImage(level); - } - pixaAddPix(*pixa, pix, L_INSERT); - pixaAddBox(*pixa, lbox, L_CLONE); - } - if (paraids != nullptr) { - (*paraids)[component_index] = paraid; - if (page_it->IsAtFinalElement(RIL_PARA, level)) - ++paraid; - } - if (blockids != nullptr) { - (*blockids)[component_index] = blockid; - if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { - ++blockid; - paraid = 0; - } - } - ++component_index; + PageIterator* page_it = GetIterator(); + if (page_it == nullptr) + page_it = AnalyseLayout(); + if (page_it == nullptr) + return nullptr; // Failed. + + // Count the components to get a size for the arrays. + int component_count = 0; + int left, top, right, bottom; + + TessResultCallback* get_bbox = nullptr; + if (raw_image) { + // Get bounding box in original raw image with padding. + get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, + level, raw_padding, + &left, &top, &right, &bottom); + } else { + // Get bounding box from binarized imaged. Note that this could be + // differently scaled from the original image. + get_bbox = NewPermanentTessCallback(page_it, + &PageIterator::BoundingBoxInternal, + level, &left, &top, &right, &bottom); } - } while (page_it->Next(level)); - delete page_it; - delete get_bbox; - return boxa; + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) + ++component_count; + } while (page_it->Next(level)); + + Boxa* boxa = boxaCreate(component_count); + if (pixa != nullptr) + *pixa = pixaCreate(component_count); + if (blockids != nullptr) + *blockids = new int[component_count]; + if (paraids != nullptr) + *paraids = new int[component_count]; + + int blockid = 0; + int paraid = 0; + int component_index = 0; + page_it->Begin(); + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) { + Box* lbox = boxCreate(left, top, right - left, bottom - top); + boxaAddBox(boxa, lbox, L_INSERT); + if (pixa != nullptr) { + Pix* pix = nullptr; + if (raw_image) { + pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, + &top); + } else { + pix = page_it->GetBinaryImage(level); + } + pixaAddPix(*pixa, pix, L_INSERT); + pixaAddBox(*pixa, lbox, L_CLONE); + } + if (paraids != nullptr) { + (*paraids)[component_index] = paraid; + if (page_it->IsAtFinalElement(RIL_PARA, level)) + ++paraid; + } + if (blockids != nullptr) { + (*blockids)[component_index] = blockid; + if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { + ++blockid; + paraid = 0; + } + } + ++component_index; + } + } while (page_it->Next(level)); + delete page_it; + delete get_bbox; + return boxa; } int TessBaseAPI::GetThresholdedImageScaleFactor() const { - if (thresholder_ == nullptr) { - return 0; - } - return thresholder_->GetScaleFactor(); + if (thresholder_ == nullptr) { + return 0; + } + return thresholder_->GetScaleFactor(); } /** @@ -824,17 +824,17 @@ namespace tesseract { PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { - if (FindLines() == 0) { - if (block_list_->empty()) - return nullptr; // The page was empty. - page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); - DetectParagraphs(false); - return new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - } - return nullptr; + if (FindLines() == 0) { + if (block_list_->empty()) + return nullptr; // The page was empty. + page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); + DetectParagraphs(false); + return new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + } + return nullptr; } /** @@ -842,125 +842,125 @@ namespace tesseract { * internal structures. */ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (FindLines() != 0) - return -1; - delete page_res_; - if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, - &tesseract_->prev_word_best_choice_); - return 0; // Empty page. - } + if (tesseract_ == nullptr) + return -1; + if (FindLines() != 0) + return -1; + delete page_res_; + if (block_list_->empty()) { + page_res_ = new PAGE_RES(false, block_list_, + &tesseract_->prev_word_best_choice_); + return 0; // Empty page. + } - tesseract_->SetBlackAndWhitelist(); - recognition_done_ = true; + tesseract_->SetBlackAndWhitelist(); + recognition_done_ = true; #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_resegment_from_line_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); - } else if (tesseract_->tessedit_resegment_from_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); - } else + if (tesseract_->tessedit_resegment_from_line_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); + } else if (tesseract_->tessedit_resegment_from_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); + } else #endif // ndef DISABLED_LEGACY_ENGINE - { - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), - block_list_, &tesseract_->prev_word_best_choice_); - } + { + page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), + block_list_, &tesseract_->prev_word_best_choice_); + } - if (page_res_ == nullptr) { - return -1; - } + if (page_res_ == nullptr) { + return -1; + } - if (tesseract_->tessedit_train_line_recognizer) { - tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } + if (tesseract_->tessedit_train_line_recognizer) { + tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_make_boxes_from_boxes) { - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } + if (tesseract_->tessedit_make_boxes_from_boxes) { + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } #endif // ndef DISABLED_LEGACY_ENGINE - if (truth_cb_ != nullptr) { - tesseract_->wordrec_run_blamer.set_value(true); - PageIterator *page_it = new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - truth_cb_->Run(tesseract_->getDict().getUnicharset(), - image_height_, page_it, this->tesseract()->pix_grey()); - delete page_it; - } + if (truth_cb_ != nullptr) { + tesseract_->wordrec_run_blamer.set_value(true); + PageIterator *page_it = new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + truth_cb_->Run(tesseract_->getDict().getUnicharset(), + image_height_, page_it, this->tesseract()->pix_grey()); + delete page_it; + } - int result = 0; - if (tesseract_->interactive_display_mode) { + int result = 0; + if (tesseract_->interactive_display_mode) { #ifndef GRAPHICS_DISABLED - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); #endif // GRAPHICS_DISABLED - // The page_res is invalid after an interactive session, so cleanup - // in a way that lets us continue to the next page without crashing. - delete page_res_; - page_res_ = nullptr; - return -1; + // The page_res is invalid after an interactive session, so cleanup + // in a way that lets us continue to the next page without crashing. + delete page_res_; + page_res_ = nullptr; + return -1; #ifndef DISABLED_LEGACY_ENGINE - } else if (tesseract_->tessedit_train_from_boxes) { - STRING fontname; - ExtractFontName(*output_file_, &fontname); - tesseract_->ApplyBoxTraining(fontname, page_res_); - } else if (tesseract_->tessedit_ambigs_training) { - FILE *training_output_file = tesseract_->init_recog_training(*input_file_); - // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented( - *input_file_, page_res_, monitor, training_output_file); - fclose(training_output_file); + } else if (tesseract_->tessedit_train_from_boxes) { + STRING fontname; + ExtractFontName(*output_file_, &fontname); + tesseract_->ApplyBoxTraining(fontname, page_res_); + } else if (tesseract_->tessedit_ambigs_training) { + FILE *training_output_file = tesseract_->init_recog_training(*input_file_); + // OCR the page segmented into words by tesseract. + tesseract_->recog_training_segmented( + *input_file_, page_res_, monitor, training_output_file); + fclose(training_output_file); #endif // ndef DISABLED_LEGACY_ENGINE - } else { - // Now run the main recognition. - bool wait_for_text = true; - GetBoolVariable("paragraph_text_based", &wait_for_text); - if (!wait_for_text) DetectParagraphs(false); - if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { - if (wait_for_text) DetectParagraphs(true); } else { - result = -1; + // Now run the main recognition. + bool wait_for_text = true; + GetBoolVariable("paragraph_text_based", &wait_for_text); + if (!wait_for_text) DetectParagraphs(false); + if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { + if (wait_for_text) DetectParagraphs(true); + } else { + result = -1; + } } - } - return result; + return result; } #ifndef DISABLED_LEGACY_ENGINE /** Tests the chopper by exhaustively running chop_one_blob. */ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (page_res_ != nullptr) - ClearResults(); - if (FindLines() != 0) - return -1; - // Additional conditions under which chopper test cannot be run - if (tesseract_->interactive_display_mode) return -1; + if (tesseract_ == nullptr) + return -1; + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (page_res_ != nullptr) + ClearResults(); + if (FindLines() != 0) + return -1; + // Additional conditions under which chopper test cannot be run + if (tesseract_->interactive_display_mode) return -1; - recognition_done_ = true; + recognition_done_ = true; - page_res_ = new PAGE_RES(false, block_list_, - &(tesseract_->prev_word_best_choice_)); + page_res_ = new PAGE_RES(false, block_list_, + &(tesseract_->prev_word_best_choice_)); - PAGE_RES_IT page_res_it(page_res_); + PAGE_RES_IT page_res_it(page_res_); - while (page_res_it.word() != nullptr) { - WERD_RES *word_res = page_res_it.word(); - GenericVector boxes; - tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, - page_res_it.row()->row, word_res); - page_res_it.forward(); - } - return 0; + while (page_res_it.word() != nullptr) { + WERD_RES *word_res = page_res_it.word(); + GenericVector boxes; + tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, + page_res_it.row()->row, word_res); + page_res_it.forward(); + } + return 0; } #endif // ndef DISABLED_LEGACY_ENGINE @@ -970,17 +970,17 @@ namespace tesseract { Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } const char * TessBaseAPI::GetInputName() { - if (input_file_) - return input_file_->c_str(); - return nullptr; + if (input_file_) + return input_file_->c_str(); + return nullptr; } const char * TessBaseAPI::GetDatapath() { - return tesseract_->datadir.c_str(); + return tesseract_->datadir.c_str(); } int TessBaseAPI::GetSourceYResolution() { - return thresholder_->GetSourceYResolution(); + return thresholder_->GetSourceYResolution(); } // If flist exists, get data from there. Otherwise get data from buf. @@ -993,56 +993,56 @@ namespace tesseract { int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number) { - if (!flist && !buf) return false; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - char pagename[MAX_PATH]; - - GenericVector lines; - if (!flist) { - buf->split('\n', &lines); - if (lines.empty()) return false; - } + if (!flist && !buf) return false; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + char pagename[MAX_PATH]; + + GenericVector lines; + if (!flist) { + buf->split('\n', &lines); + if (lines.empty()) return false; + } - // Skip to the requested page number. - for (int i = 0; i < page; i++) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + // Skip to the requested page number. + for (int i = 0; i < page; i++) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } } - } - // Begin producing output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - return false; - } + // Begin producing output + if (renderer && !renderer->BeginDocument(unknown_title_)) { + return false; + } - // Loop over all pages - or just the requested one - while (true) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } else { - if (page >= lines.size()) break; - snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); - } - chomp_string(pagename); - Pix *pix = pixRead(pagename); - if (pix == nullptr) { - tprintf("Image file %s cannot be read!\n", pagename); - return false; - } - tprintf("Page %d : %s\n", page, pagename); - bool r = ProcessPage(pix, page, pagename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - ++page; - } + // Loop over all pages - or just the requested one + while (true) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } else { + if (page >= lines.size()) break; + snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); + } + chomp_string(pagename); + Pix *pix = pixRead(pagename); + if (pix == nullptr) { + tprintf("Image file %s cannot be read!\n", pagename); + return false; + } + tprintf("Page %d : %s\n", page, pagename); + bool r = ProcessPage(pix, page, pagename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + ++page; + } - // Finish producing output - if (renderer && !renderer->EndDocument()) { - return false; - } - return true; + // Finish producing output + if (renderer && !renderer->EndDocument()) { + return false; + } + return true; } bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, @@ -1053,29 +1053,29 @@ namespace tesseract { TessResultRenderer* renderer, int tessedit_page_number) { #ifndef ANDROID_BUILD - Pix *pix = nullptr; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - size_t offset = 0; - for (; ; ++page) { - if (tessedit_page_number >= 0) - page = tessedit_page_number; - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); - if (pix == nullptr) break; - tprintf("Page %d\n", page + 1); - char page_str[kMaxIntSize]; - snprintf(page_str, kMaxIntSize - 1, "%d", page); - SetVariable("applybox_page", page_str); - bool r = ProcessPage(pix, page, filename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - if (!offset) break; - } - return true; + Pix *pix = nullptr; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + size_t offset = 0; + for (; ; ++page) { + if (tessedit_page_number >= 0) + page = tessedit_page_number; + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); + if (pix == nullptr) break; + tprintf("Page %d\n", page + 1); + char page_str[kMaxIntSize]; + snprintf(page_str, kMaxIntSize - 1, "%d", page); + SetVariable("applybox_page", page_str); + bool r = ProcessPage(pix, page, filename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + if (!offset) break; + } + return true; #else - return false; + return false; #endif } @@ -1084,18 +1084,18 @@ namespace tesseract { bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer) { - bool result = - ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); + bool result = + ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); #ifndef DISABLED_LEGACY_ENGINE - if (result) { - if (tesseract_->tessedit_train_from_boxes && - !tesseract_->WriteTRFile(*output_file_)) { - tprintf("Write of TR file failed: %s\n", output_file_->string()); - return false; + if (result) { + if (tesseract_->tessedit_train_from_boxes && + !tesseract_->WriteTRFile(*output_file_)) { + tprintf("Write of TR file failed: %s\n", output_file_->string()); + return false; + } } - } #endif // ndef DISABLED_LEGACY_ENGINE - return result; + return result; } // In the ideal scenario, Tesseract will start working on data as soon @@ -1113,166 +1113,166 @@ namespace tesseract { const char* retry_config, int timeout_millisec, TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPages") - bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); - if (stdInput) { + PERF_COUNT_START("ProcessPages") + bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); + if (stdInput) { #ifdef WIN32 - if (_setmode(_fileno(stdin), _O_BINARY) == -1) + if (_setmode(_fileno(stdin), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); #endif // WIN32 - } - - if (stream_filelist) { - return ProcessPagesFileList(stdin, nullptr, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } - - // At this point we are officially in autodection territory. - // That means any data in stdin must be buffered, to make it - // seekable. - std::string buf; - const l_uint8 *data = nullptr; - if (stdInput) { - buf.assign((std::istreambuf_iterator(std::cin)), - (std::istreambuf_iterator())); - data = reinterpret_cast(buf.data()); - } else { - // Check whether the input file can be read. - if (FILE* file = fopen(filename, "rb")) { - fclose(file); - } else { - fprintf(stderr, "Error, cannot read input file %s: %s\n", - filename, strerror(errno)); - return false; } - } - // Here is our autodetection - int format; - int r = (stdInput) ? - findFileFormatBuffer(data, &format) : - findFileFormat(filename, &format); + if (stream_filelist) { + return ProcessPagesFileList(stdin, nullptr, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } - // Maybe we have a filelist - if (r != 0 || format == IFF_UNKNOWN) { - STRING s; + // At this point we are officially in autodection territory. + // That means any data in stdin must be buffered, to make it + // seekable. + std::string buf; + const l_uint8 *data = nullptr; if (stdInput) { - s = buf.c_str(); + buf.assign((std::istreambuf_iterator(std::cin)), + (std::istreambuf_iterator())); + data = reinterpret_cast(buf.data()); } else { - std::ifstream t(filename); - std::string u((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - s = u.c_str(); - } - return ProcessPagesFileList(nullptr, &s, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } + // Check whether the input file can be read. + if (FILE* file = fopen(filename, "rb")) { + fclose(file); + } else { + fprintf(stderr, "Error, cannot read input file %s: %s\n", + filename, strerror(errno)); + return false; + } + } - // Maybe we have a TIFF which is potentially multipage - bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || - format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || - format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || - format == IFF_TIFF_ZIP); + // Here is our autodetection + int format; + int r = (stdInput) ? + findFileFormatBuffer(data, &format) : + findFileFormat(filename, &format); + + // Maybe we have a filelist + if (r != 0 || format == IFF_UNKNOWN) { + STRING s; + if (stdInput) { + s = buf.c_str(); + } else { + std::ifstream t(filename); + std::string u((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + s = u.c_str(); + } + return ProcessPagesFileList(nullptr, &s, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } - // Fail early if we can, before producing any output - Pix *pix = nullptr; - if (!tiff) { - pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); - if (pix == nullptr) { - return false; + // Maybe we have a TIFF which is potentially multipage + bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || + format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || + format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || + format == IFF_TIFF_ZIP); + + // Fail early if we can, before producing any output + Pix *pix = nullptr; + if (!tiff) { + pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); + if (pix == nullptr) { + return false; + } } - } - // Begin the output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - pixDestroy(&pix); - return false; - } + // Begin the output + if (renderer && !renderer->BeginDocument(unknown_title_)) { + pixDestroy(&pix); + return false; + } - // Produce output - r = (tiff) ? - ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number) : - ProcessPage(pix, 0, filename, retry_config, - timeout_millisec, renderer); + // Produce output + r = (tiff) ? + ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number) : + ProcessPage(pix, 0, filename, retry_config, + timeout_millisec, renderer); - // Clean up memory as needed - pixDestroy(&pix); + // Clean up memory as needed + pixDestroy(&pix); - // End the output - if (!r || (renderer && !renderer->EndDocument())) { - return false; - } - PERF_COUNT_END - return true; + // End the output + if (!r || (renderer && !renderer->EndDocument())) { + return false; + } + PERF_COUNT_END + return true; } bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPage") - SetInputName(filename); - SetImage(pix); - bool failed = false; + PERF_COUNT_START("ProcessPage") + SetInputName(filename); + SetImage(pix); + bool failed = false; - if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { - // Disabled character recognition - PageIterator* it = AnalyseLayout(); + if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + // Disabled character recognition + PageIterator* it = AnalyseLayout(); - if (it == nullptr) { - failed = true; + if (it == nullptr) { + failed = true; + } else { + delete it; + } + } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + failed = FindLines() != 0; + } else if (timeout_millisec > 0) { + // Running with a timeout. + ETEXT_DESC monitor; + monitor.cancel = nullptr; + monitor.cancel_this = nullptr; + monitor.set_deadline_msecs(timeout_millisec); + + // Now run the main recognition. + failed = Recognize(&monitor) < 0; } else { - delete it; - } - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { - failed = FindLines() != 0; - } else if (timeout_millisec > 0) { - // Running with a timeout. - ETEXT_DESC monitor; - monitor.cancel = nullptr; - monitor.cancel_this = nullptr; - monitor.set_deadline_msecs(timeout_millisec); - - // Now run the main recognition. - failed = Recognize(&monitor) < 0; - } else { - // Normal layout and character recognition with no timeout. - failed = Recognize(nullptr) < 0; - } + // Normal layout and character recognition with no timeout. + failed = Recognize(nullptr) < 0; + } - if (tesseract_->tessedit_write_images) { + if (tesseract_->tessedit_write_images) { #ifndef ANDROID_BUILD - Pix* page_pix = GetThresholdedImage(); - pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); + Pix* page_pix = GetThresholdedImage(); + pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); #endif // ANDROID_BUILD - } + } - if (failed && retry_config != nullptr && retry_config[0] != '\0') { - // Save current config variables before switching modes. - FILE* fp = fopen(kOldVarsFile, "wb"); - if (fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); - } else { - PrintVariables(fp); - fclose(fp); + if (failed && retry_config != nullptr && retry_config[0] != '\0') { + // Save current config variables before switching modes. + FILE* fp = fopen(kOldVarsFile, "wb"); + if (fp == nullptr) { + tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); + } else { + PrintVariables(fp); + fclose(fp); + } + // Switch to alternate mode for retry. + ReadConfigFile(retry_config); + SetImage(pix); + Recognize(nullptr); + // Restore saved config variables. + ReadConfigFile(kOldVarsFile); } - // Switch to alternate mode for retry. - ReadConfigFile(retry_config); - SetImage(pix); - Recognize(nullptr); - // Restore saved config variables. - ReadConfigFile(kOldVarsFile); - } - if (renderer && !failed) { - failed = !renderer->AddImage(this); - } + if (renderer && !failed) { + failed = !renderer->AddImage(this); + } - PERF_COUNT_END - return !failed; + PERF_COUNT_END + return !failed; } /** @@ -1280,12 +1280,12 @@ namespace tesseract { * Recognize. The returned iterator must be deleted after use. */ LTRResultIterator* TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); } /** @@ -1297,12 +1297,12 @@ namespace tesseract { * DetectOS, or anything else that changes the internal PAGE_RES. */ ResultIterator* TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return ResultIterator::StartOfParagraph(LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_)); + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return ResultIterator::StartOfParagraph(LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_)); } /** @@ -1314,43 +1314,43 @@ namespace tesseract { * DetectOS, or anything else that changes the internal PAGE_RES. */ MutableIterator* TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new MutableIterator(page_res_, tesseract_, - thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new MutableIterator(page_res_, tesseract_, + thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); } /** Make a text string from the internal data structures. */ char* TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - STRING text(""); - ResultIterator *it = GetIterator(); - do { - if (it->Empty(RIL_PARA)) continue; - const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); - text += para_text.get(); - } while (it->Next(RIL_PARA)); - char* result = new char[text.length() + 1]; - strncpy(result, text.string(), text.length() + 1); - delete it; - return result; + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + STRING text(""); + ResultIterator *it = GetIterator(); + do { + if (it->Empty(RIL_PARA)) continue; + const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); + text += para_text.get(); + } while (it->Next(RIL_PARA)); + char* result = new char[text.length() + 1]; + strncpy(result, text.string(), text.length() + 1); + delete it; + return result; } /** * Gets the block orientation at the current iterator position. */ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { - tesseract::Orientation orientation; - tesseract::WritingDirection writing_direction; - tesseract::TextlineOrder textline_order; - float deskew_angle; - it->Orientation(&orientation, &writing_direction, &textline_order, - &deskew_angle); - return orientation; + tesseract::Orientation orientation; + tesseract::WritingDirection writing_direction; + tesseract::TextlineOrder textline_order; + float deskew_angle; + it->Orientation(&orientation, &writing_direction, &textline_order, + &deskew_angle); + return orientation; } /** @@ -1364,100 +1364,100 @@ namespace tesseract { static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel level, STRING* hocr_str) { - tesseract::Orientation orientation = GetBlockTextOrientation(it); - if (orientation != ORIENTATION_PAGE_UP) { - hocr_str->add_str_int("; textangle ", 360 - orientation * 90); - return; - } + tesseract::Orientation orientation = GetBlockTextOrientation(it); + if (orientation != ORIENTATION_PAGE_UP) { + hocr_str->add_str_int("; textangle ", 360 - orientation * 90); + return; + } - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - - // Try to get the baseline coordinates at this level. - int x1, y1, x2, y2; - if (!it->Baseline(level, &x1, &y1, &x2, &y2)) - return; - // Following the description of this field of the hOCR spec, we convert the - // baseline coordinates so that "the bottom left of the bounding box is the - // origin". - x1 -= left; - x2 -= left; - y1 -= bottom; - y2 -= bottom; - - // Now fit a line through the points so we can extract coefficients for the - // equation: y = p1 x + p0 - double p1 = 0; - double p0 = 0; - if (x1 == x2) { - // Problem computing the polynomial coefficients. - return; - } - p1 = (y2 - y1) / static_cast(x2 - x1); - p0 = y1 - static_cast(p1 * x1); + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + + // Try to get the baseline coordinates at this level. + int x1, y1, x2, y2; + if (!it->Baseline(level, &x1, &y1, &x2, &y2)) + return; + // Following the description of this field of the hOCR spec, we convert the + // baseline coordinates so that "the bottom left of the bounding box is the + // origin". + x1 -= left; + x2 -= left; + y1 -= bottom; + y2 -= bottom; + + // Now fit a line through the points so we can extract coefficients for the + // equation: y = p1 x + p0 + double p1 = 0; + double p0 = 0; + if (x1 == x2) { + // Problem computing the polynomial coefficients. + return; + } + p1 = (y2 - y1) / static_cast(x2 - x1); + p0 = y1 - static_cast(p1 * x1); - hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); - hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); + hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); + hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); } static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - if (num2 >= 0) { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); - } else { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); - } - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + if (num2 >= 0) { + snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); + } else { + snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); + } + id_buffer[BUFSIZE - 1] = '\0'; + *hocr_str += " id='"; + *hocr_str += id_buffer; + *hocr_str += "'"; } static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2, int num3) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); + id_buffer[BUFSIZE - 1] = '\0'; + *hocr_str += " id='"; + *hocr_str += id_buffer; + *hocr_str += "'"; } static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - // This is the only place we use double quotes instead of single quotes, - // but it may too late to change for consistency - hocr_str->add_str_int(" title=\"bbox ", left); - hocr_str->add_str_int(" ", top); - hocr_str->add_str_int(" ", right); - hocr_str->add_str_int(" ", bottom); - // Add baseline coordinates & heights for textlines only. - if (level == RIL_TEXTLINE) { - AddBaselineCoordsTohOCR(it, level, hocr_str); - // add custom height measures - float row_height, descenders, ascenders; // row attributes - it->RowAttributes(&row_height, &descenders, &ascenders); - // TODO(rays): Do we want to limit these to a single decimal place? - hocr_str->add_str_double("; x_size ", row_height); - hocr_str->add_str_double("; x_descenders ", descenders * -1); - hocr_str->add_str_double("; x_ascenders ", ascenders); - } - *hocr_str += "\">"; + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + // This is the only place we use double quotes instead of single quotes, + // but it may too late to change for consistency + hocr_str->add_str_int(" title=\"bbox ", left); + hocr_str->add_str_int(" ", top); + hocr_str->add_str_int(" ", right); + hocr_str->add_str_int(" ", bottom); + // Add baseline coordinates & heights for textlines only. + if (level == RIL_TEXTLINE) { + AddBaselineCoordsTohOCR(it, level, hocr_str); + // add custom height measures + float row_height, descenders, ascenders; // row attributes + it->RowAttributes(&row_height, &descenders, &ascenders); + // TODO(rays): Do we want to limit these to a single decimal place? + hocr_str->add_str_double("; x_size ", row_height); + hocr_str->add_str_double("; x_descenders ", descenders * -1); + hocr_str->add_str_double("; x_ascenders ", ascenders); + } + *hocr_str += "\">"; } static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - hocr_str->add_str_int("\t", left); - hocr_str->add_str_int("\t", top); - hocr_str->add_str_int("\t", right - left); - hocr_str->add_str_int("\t", bottom - top); + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + hocr_str->add_str_int("\t", left); + hocr_str->add_str_int("\t", top); + hocr_str->add_str_int("\t", right - left); + hocr_str->add_str_int("\t", bottom - top); } /** @@ -1470,7 +1470,7 @@ namespace tesseract { * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetHOCRText(int page_number) { - return GetHOCRText(nullptr, page_number); + return GetHOCRText(nullptr, page_number); } /** @@ -1483,23 +1483,23 @@ namespace tesseract { * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) - return nullptr; + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) + return nullptr; - int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; - int page_id = page_number + 1; // hOCR uses 1-based page numbers. - bool para_is_ltr = true; // Default direction is LTR - const char* paragraph_lang = nullptr; - bool font_info = false; - GetBoolVariable("hocr_font_info", &font_info); + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; + int page_id = page_number + 1; // hOCR uses 1-based page numbers. + bool para_is_ltr = true; // Default direction is LTR + const char* paragraph_lang = nullptr; + bool font_info = false; + GetBoolVariable("hocr_font_info", &font_info); - STRING hocr_str(""); + STRING hocr_str(""); - if (input_file_ == nullptr) - SetInputName(nullptr); + if (input_file_ == nullptr) + SetInputName(nullptr); #ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 + // convert input name from ANSI encoding to utf-8 int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); wchar_t *uni16_str = new WCHAR[str16_len]; @@ -1515,302 +1515,300 @@ namespace tesseract { delete[] utf8_str; #endif - hocr_str += "

string()); - } else { - hocr_str += "unknown"; - } - hocr_str.add_str_int("\"; bbox ", rect_left_); - hocr_str.add_str_int(" ", rect_top_); - hocr_str.add_str_int(" ", rect_width_); - hocr_str.add_str_int(" ", rect_height_); - hocr_str.add_str_int("; ppageno ", page_number); - hocr_str += "'>\n"; - - ResultIterator *res_it = GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Open any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - para_is_ltr = true; // reset to default direction - hocr_str += "
IsAtBeginningOf(RIL_PARA)) { - hocr_str += "\n

ParagraphIsLtr(); - if (!para_is_ltr) { - hocr_str += " dir='rtl'"; - } - AddIdTohOCR(&hocr_str, "par", page_id, pcnt); - paragraph_lang = res_it->WordRecognitionLanguage(); - if (paragraph_lang) { - hocr_str += " lang='"; - hocr_str += paragraph_lang; - hocr_str += "'"; - } - AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - hocr_str += "\n string()); + } else { + hocr_str += "unknown"; } + hocr_str.add_str_int("\"; bbox ", rect_left_); + hocr_str.add_str_int(" ", rect_top_); + hocr_str.add_str_int(" ", rect_width_); + hocr_str.add_str_int(" ", rect_height_); + hocr_str.add_str_int("; ppageno ", page_number); + hocr_str += "'>\n"; + + ResultIterator *res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } - // Now, process the word... - std::vector>>* confidencemap = nullptr; - if (tesseract_->lstm_choice_mode) { - confidencemap = res_it->GetBestLSTMSymbolChoices(); - } - hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); - hocr_str.add_str_int(" title='bbox ", left); - hocr_str.add_str_int(" ", top); - hocr_str.add_str_int(" ", right); - hocr_str.add_str_int(" ", bottom); - hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); - if (font_info) { - if (font_name) { - hocr_str += "; x_font "; - hocr_str += HOcrEscape(font_name); - } - hocr_str.add_str_int("; x_fsize ", pointsize); - } - hocr_str += "'"; - const char* lang = res_it->WordRecognitionLanguage(); - if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { - hocr_str += " lang='"; - hocr_str += lang; - hocr_str += "'"; - } - switch (res_it->WordDirection()) { - // Only emit direction if different from current paragraph direction - case DIR_LEFT_TO_RIGHT: - if (!para_is_ltr) hocr_str += " dir='ltr'"; - break; - case DIR_RIGHT_TO_LEFT: - if (para_is_ltr) hocr_str += " dir='rtl'"; - break; - case DIR_MIX: - case DIR_NEUTRAL: - default: // Do nothing. - break; - } - hocr_str += ">"; - bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); - bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); - bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); - if (bold) hocr_str += ""; - if (italic) hocr_str += ""; - do { - const std::unique_ptr grapheme( - res_it->GetUTF8Text(RIL_SYMBOL)); - if (grapheme && grapheme[0] != 0) { - hocr_str += HOcrEscape(grapheme.get()); - } - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - if (italic) hocr_str += ""; - if (bold) hocr_str += ""; - // If the lstm choice mode is required it is added here - if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - hocr_str += "\n IsAtBeginningOf(RIL_BLOCK)) { + para_is_ltr = true; // reset to default direction + hocr_str += "

IsAtBeginningOf(RIL_PARA)) { + hocr_str += "\n

ParagraphIsLtr(); + if (!para_is_ltr) { + hocr_str += " dir='rtl'"; + } + AddIdTohOCR(&hocr_str, "par", page_id, pcnt); + paragraph_lang = res_it->WordRecognitionLanguage(); + if (paragraph_lang) { + hocr_str += " lang='"; + hocr_str += paragraph_lang; + hocr_str += "'"; + } + AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + hocr_str += "\n >>* confidencemap = nullptr; + if (tesseract_->lstm_choice_mode) { + confidencemap = res_it->GetBestLSTMSymbolChoices(); + } + hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, + &monospace, &serif, &smallcaps, + &pointsize, &font_id); + hocr_str.add_str_int(" title='bbox ", left); + hocr_str.add_str_int(" ", top); + hocr_str.add_str_int(" ", right); + hocr_str.add_str_int(" ", bottom); + hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); + if (font_info) { + if (font_name) { + hocr_str += "; x_font "; + hocr_str += HOcrEscape(font_name); + } + hocr_str.add_str_int("; x_fsize ", pointsize); + } + hocr_str += "'"; + const char* lang = res_it->WordRecognitionLanguage(); + if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { + hocr_str += " lang='"; + hocr_str += lang; + hocr_str += "'"; + } + switch (res_it->WordDirection()) { + // Only emit direction if different from current paragraph direction + case DIR_LEFT_TO_RIGHT: + if (!para_is_ltr) hocr_str += " dir='ltr'"; + break; + case DIR_RIGHT_TO_LEFT: + if (para_is_ltr) hocr_str += " dir='rtl'"; + break; + case DIR_MIX: + case DIR_NEUTRAL: + default: // Do nothing. + break; + } hocr_str += ">"; - std::vector> timestep = (*confidencemap)[i]; - for (std::pair conf : timestep) { - hocr_str += "IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); + bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); + bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); + if (bold) hocr_str += ""; + if (italic) hocr_str += ""; + do { + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); + if (grapheme && grapheme[0] != 0) { + hocr_str += HOcrEscape(grapheme.get()); + } + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + if (italic) hocr_str += ""; + if (bold) hocr_str += ""; + // If the lstm choice mode is required it is added here + if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { + for (size_t i = 0; i < confidencemap->size(); i++) { + hocr_str += "\n > timestep = (*confidencemap)[i]; + for (std::pair conf : timestep) { + hocr_str += "lstm_choice_mode == 2 && confidencemap != nullptr) { + for (size_t i = 0; i < confidencemap->size(); i++) { + std::vector> timestep = (*confidencemap)[i]; + if (timestep.size() > 0) { + hocr_str += "\n lstm_choice_mode == 2 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - std::vector> timestep = (*confidencemap)[i]; - if (timestep.size() > 0) { - hocr_str += "\n Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Add rows for any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - block_num++; - par_num = 0; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("2\t", page_num); // level 2 - block - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for block - } - if (res_it->IsAtBeginningOf(RIL_PARA)) { - par_num++; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_PARA, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for para - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - line_num++; - word_num = 0; - tsv_str.add_str_int("4\t", page_num); // level 4 - line - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for line - } - - // Now, process the word... - int left, top, right, bottom; - res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - word_num++; - tsv_str.add_str_int("5\t", page_num); // level 5 - word + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; + int page_id = page_number + 1; // we use 1-based page numbers. + + STRING tsv_str(""); + + int page_num = page_id; + int block_num = 0; + int par_num = 0; + int line_num = 0; + int word_num = 0; + + tsv_str.add_str_int("1\t", page_num); // level 1 - page tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); - tsv_str.add_str_int("\t", left); - tsv_str.add_str_int("\t", top); - tsv_str.add_str_int("\t", right - left); - tsv_str.add_str_int("\t", bottom - top); - tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); - tsv_str += "\t"; - - // Increment counts if at end of block/paragraph/textline. - if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; - if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; - if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; + tsv_str.add_str_int("\t", rect_left_); + tsv_str.add_str_int("\t", rect_top_); + tsv_str.add_str_int("\t", rect_width_); + tsv_str.add_str_int("\t", rect_height_); + tsv_str += "\t-1\t\n"; + + ResultIterator* res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } - do { - tsv_str += - std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - tsv_str += "\n"; // end of row - wcnt++; - } + // Add rows for any new block/paragraph/textline. + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + block_num++; + par_num = 0; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("2\t", page_num); // level 2 - block + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for block + } + if (res_it->IsAtBeginningOf(RIL_PARA)) { + par_num++; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_PARA, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for para + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + line_num++; + word_num = 0; + tsv_str.add_str_int("4\t", page_num); // level 4 - line + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for line + } + + // Now, process the word... + int left, top, right, bottom; + res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + word_num++; + tsv_str.add_str_int("5\t", page_num); // level 5 - word + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + tsv_str.add_str_int("\t", left); + tsv_str.add_str_int("\t", top); + tsv_str.add_str_int("\t", right - left); + tsv_str.add_str_int("\t", bottom - top); + tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); + tsv_str += "\t"; + + // Increment counts if at end of block/paragraph/textline. + if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; + if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; + if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; + + do { + tsv_str += + std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + tsv_str += "\n"; // end of row + wcnt++; + } - char* ret = new char[tsv_str.length() + 1]; - strcpy(ret, tsv_str.string()); - delete res_it; - return ret; + char* ret = new char[tsv_str.length() + 1]; + strcpy(ret, tsv_str.string()); + delete res_it; + return ret; } /** The 5 numbers output for each box (the usual 4 and a page number.) */ @@ -1844,39 +1842,39 @@ namespace tesseract { * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int blob_count; - int utf8_length = TextLength(&blob_count); - int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + - kMaxBytesPerLine; - char* result = new char[total_length]; - result[0] = '\0'; - int output_length = 0; - LTRResultIterator* it = GetLTRIterator(); - do { - int left, top, right, bottom; - if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - const std::unique_ptr text( - it->GetUTF8Text(RIL_SYMBOL)); - // Tesseract uses space for recognition failure. Fix to a reject - // character, kTesseractReject so we don't create illegal box files. - for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') - text[i] = kTesseractReject; - } - snprintf(result + output_length, total_length - output_length, - "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, - right, image_height_ - top, page_number); - output_length += strlen(result + output_length); - // Just in case... - if (output_length + kMaxBytesPerLine > total_length) - break; - } - } while (it->Next(RIL_SYMBOL)); - delete it; - return result; + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int blob_count; + int utf8_length = TextLength(&blob_count); + int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + + kMaxBytesPerLine; + char* result = new char[total_length]; + result[0] = '\0'; + int output_length = 0; + LTRResultIterator* it = GetLTRIterator(); + do { + int left, top, right, bottom; + if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { + const std::unique_ptr text( + it->GetUTF8Text(RIL_SYMBOL)); + // Tesseract uses space for recognition failure. Fix to a reject + // character, kTesseractReject so we don't create illegal box files. + for (int i = 0; text[i] != '\0'; ++i) { + if (text[i] == ' ') + text[i] = kTesseractReject; + } + snprintf(result + output_length, total_length - output_length, + "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, + right, image_height_ - top, page_number); + output_length += strlen(result + output_length); + // Just in case... + if (output_length + kMaxBytesPerLine > total_length) + break; + } + } while (it->Next(RIL_SYMBOL)); + delete it; + return result; } /** @@ -1898,104 +1896,104 @@ namespace tesseract { * Returned string must be freed with the delete [] operator. */ char* TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - bool tilde_crunch_written = false; - bool last_char_was_newline = true; - bool last_char_was_tilde = false; - - int total_length = TextLength(nullptr); - PAGE_RES_IT page_res_it(page_res_); - char* result = new char[total_length]; - char* ptr = result; - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - // Process the current word. - if (word->unlv_crunch_mode != CR_NONE) { - if (word->unlv_crunch_mode != CR_DELETE && - (!tilde_crunch_written || - (word->unlv_crunch_mode == CR_KEEP_SPACE && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)))) { - if (!word->word->flag(W_BOL) && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)) { - /* Write a space to separate from preceding good text */ - *ptr++ = ' '; - last_char_was_tilde = false; - } - if (!last_char_was_tilde) { - // Write a reject char. - last_char_was_tilde = true; - *ptr++ = kUNLVReject; - tilde_crunch_written = true; - last_char_was_newline = false; - } - } - } else { - // NORMAL PROCESSING of non tilde crunched words. - tilde_crunch_written = false; - tesseract_->set_unlv_suspects(word); - const char* wordstr = word->best_choice->unichar_string().string(); - const STRING& lengths = word->best_choice->unichar_lengths(); - int length = lengths.length(); - int i = 0; - int offset = 0; - - if (last_char_was_tilde && - word->word->space() == 0 && wordstr[offset] == ' ') { - // Prevent adjacent tilde across words - we know that adjacent tildes - // within words have been removed. - // Skip the first character. - offset = lengths[i++]; - } - if (i < length && wordstr[offset] != 0) { - if (!last_char_was_newline) - *ptr++ = ' '; - else - last_char_was_newline = false; - for (; i < length; offset += lengths[i++]) { - if (wordstr[offset] == ' ' || - wordstr[offset] == kTesseractReject) { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; - } else { - if (word->reject_map[i].rejected()) - *ptr++ = kUNLVSuspect; - UNICHAR ch(wordstr + offset, lengths[i]); - int uni_ch = ch.first_uni(); - for (int j = 0; kUniChs[j] != 0; ++j) { - if (kUniChs[j] == uni_ch) { - uni_ch = kLatinChs[j]; - break; - } + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + bool tilde_crunch_written = false; + bool last_char_was_newline = true; + bool last_char_was_tilde = false; + + int total_length = TextLength(nullptr); + PAGE_RES_IT page_res_it(page_res_); + char* result = new char[total_length]; + char* ptr = result; + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + // Process the current word. + if (word->unlv_crunch_mode != CR_NONE) { + if (word->unlv_crunch_mode != CR_DELETE && + (!tilde_crunch_written || + (word->unlv_crunch_mode == CR_KEEP_SPACE && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)))) { + if (!word->word->flag(W_BOL) && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)) { + /* Write a space to separate from preceding good text */ + *ptr++ = ' '; + last_char_was_tilde = false; + } + if (!last_char_was_tilde) { + // Write a reject char. + last_char_was_tilde = true; + *ptr++ = kUNLVReject; + tilde_crunch_written = true; + last_char_was_newline = false; + } } - if (uni_ch <= 0xff) { - *ptr++ = static_cast(uni_ch); - last_char_was_tilde = false; - } else { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; + } else { + // NORMAL PROCESSING of non tilde crunched words. + tilde_crunch_written = false; + tesseract_->set_unlv_suspects(word); + const char* wordstr = word->best_choice->unichar_string().string(); + const STRING& lengths = word->best_choice->unichar_lengths(); + int length = lengths.length(); + int i = 0; + int offset = 0; + + if (last_char_was_tilde && + word->word->space() == 0 && wordstr[offset] == ' ') { + // Prevent adjacent tilde across words - we know that adjacent tildes + // within words have been removed. + // Skip the first character. + offset = lengths[i++]; + } + if (i < length && wordstr[offset] != 0) { + if (!last_char_was_newline) + *ptr++ = ' '; + else + last_char_was_newline = false; + for (; i < length; offset += lengths[i++]) { + if (wordstr[offset] == ' ' || + wordstr[offset] == kTesseractReject) { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } else { + if (word->reject_map[i].rejected()) + *ptr++ = kUNLVSuspect; + UNICHAR ch(wordstr + offset, lengths[i]); + int uni_ch = ch.first_uni(); + for (int j = 0; kUniChs[j] != 0; ++j) { + if (kUniChs[j] == uni_ch) { + uni_ch = kLatinChs[j]; + break; + } + } + if (uni_ch <= 0xff) { + *ptr++ = static_cast(uni_ch); + last_char_was_tilde = false; + } else { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } + } + } } - } } - } - } - if (word->word->flag(W_EOL) && !last_char_was_newline) { - /* Add a new line output */ - *ptr++ = '\n'; - tilde_crunch_written = false; - last_char_was_newline = true; - last_char_was_tilde = false; + if (word->word->flag(W_EOL) && !last_char_was_newline) { + /* Add a new line output */ + *ptr++ = '\n'; + tilde_crunch_written = false; + last_char_was_newline = true; + last_char_was_tilde = false; + } } - } - *ptr++ = '\n'; - *ptr = '\0'; - return result; + *ptr++ = '\n'; + *ptr = '\0'; + return result; } #ifndef DISABLED_LEGACY_ENGINE @@ -2012,27 +2010,27 @@ namespace tesseract { bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) { - OSResults osr; + OSResults osr; - bool osd = DetectOS(&osr); - if (!osd) { - return false; - } + bool osd = DetectOS(&osr); + if (!osd) { + return false; + } - int orient_id = osr.best_result.orientation_id; - int script_id = osr.get_best_script(orient_id); - if (orient_conf) *orient_conf = osr.best_result.oconfidence; - if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees + int orient_id = osr.best_result.orientation_id; + int script_id = osr.get_best_script(orient_id); + if (orient_conf) *orient_conf = osr.best_result.oconfidence; + if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees - if (script_name) { - const char* script = osr.unicharset->get_script_from_script_id(script_id); + if (script_name) { + const char* script = osr.unicharset->get_script_from_script_id(script_id); - *script_name = script; - } + *script_name = script; + } - if (script_conf) *script_conf = osr.best_result.sconfidence; + if (script_conf) *script_conf = osr.best_result.sconfidence; - return true; + return true; } /** @@ -2041,70 +2039,70 @@ namespace tesseract { * page_number is a 0-based page index that will appear in the osd file. */ char* TessBaseAPI::GetOsdText(int page_number) { - int orient_deg; - float orient_conf; - const char* script_name; - float script_conf; + int orient_deg; + float orient_conf; + const char* script_name; + float script_conf; - if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, - &script_conf)) - return nullptr; + if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, + &script_conf)) + return nullptr; - // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_deg / 90); + // clockwise rotation needed to make the page upright + int rotate = OrientationIdToValue(orient_deg / 90); - const int kOsdBufsize = 255; - char* osd_buf = new char[kOsdBufsize]; - snprintf(osd_buf, kOsdBufsize, - "Page number: %d\n" - "Orientation in degrees: %d\n" - "Rotate: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name, - script_conf); + const int kOsdBufsize = 255; + char* osd_buf = new char[kOsdBufsize]; + snprintf(osd_buf, kOsdBufsize, + "Page number: %d\n" + "Orientation in degrees: %d\n" + "Rotate: %d\n" + "Orientation confidence: %.2f\n" + "Script: %s\n" + "Script confidence: %.2f\n", + page_number, orient_deg, rotate, orient_conf, script_name, + script_conf); - return osd_buf; + return osd_buf; } #endif // ndef DISABLED_LEGACY_ENGINE /** Returns the average word confidence for Tesseract page result. */ int TessBaseAPI::MeanTextConf() { - int* conf = AllWordConfidences(); - if (!conf) return 0; - int sum = 0; - int *pt = conf; - while (*pt >= 0) sum += *pt++; - if (pt != conf) sum /= pt - conf; - delete [] conf; - return sum; + int* conf = AllWordConfidences(); + if (!conf) return 0; + int sum = 0; + int *pt = conf; + while (*pt >= 0) sum += *pt++; + if (pt != conf) sum /= pt - conf; + delete [] conf; + return sum; } /** Returns an array of all word confidences, terminated by -1. */ int* TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int n_word = 0; - PAGE_RES_IT res_it(page_res_); - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) - n_word++; - - int* conf = new int[n_word+1]; - n_word = 0; - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { - WERD_RES *word = res_it.word(); - WERD_CHOICE* choice = word->best_choice; - int w_conf = static_cast(100 + 5 * choice->certainty()); - // This is the eq for converting Tesseract confidence to 1..100 - if (w_conf < 0) w_conf = 0; - if (w_conf > 100) w_conf = 100; - conf[n_word++] = w_conf; - } - conf[n_word] = -1; - return conf; + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int n_word = 0; + PAGE_RES_IT res_it(page_res_); + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) + n_word++; + + int* conf = new int[n_word+1]; + n_word = 0; + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { + WERD_RES *word = res_it.word(); + WERD_CHOICE* choice = word->best_choice; + int w_conf = static_cast(100 + 5 * choice->certainty()); + // This is the eq for converting Tesseract confidence to 1..100 + if (w_conf < 0) w_conf = 0; + if (w_conf > 100) w_conf = 100; + conf[n_word++] = w_conf; + } + conf[n_word] = -1; + return conf; } #ifndef DISABLED_LEGACY_ENGINE @@ -2119,59 +2117,59 @@ namespace tesseract { * Returns false if adaption was not possible for some reason. */ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { - int debug = 0; - GetIntVariable("applybox_debug", &debug); - bool success = true; - PageSegMode current_psm = GetPageSegMode(); - SetPageSegMode(mode); - SetVariable("classify_enable_learning", "0"); - const std::unique_ptr text(GetUTF8Text()); - if (debug) { - tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); - } - if (text != nullptr) { - PAGE_RES_IT it(page_res_); - WERD_RES* word_res = it.word(); - if (word_res != nullptr) { - word_res->word->set_text(wordstr); - // Check to see if text matches wordstr. - int w = 0; - int t; - for (t = 0; text[t] != '\0'; ++t) { - if (text[t] == '\n' || text[t] == ' ') - continue; - while (wordstr[w] == ' ') ++w; - if (text[t] != wordstr[w]) - break; - ++w; - } - if (text[t] != '\0' || wordstr[w] != '\0') { - // No match. - delete page_res_; - GenericVector boxes; - page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); - tesseract_->ReSegmentByClassification(page_res_); - tesseract_->TidyUp(page_res_); - PAGE_RES_IT pr_it(page_res_); - if (pr_it.word() == nullptr) - success = false; - else - word_res = pr_it.word(); - } else { - word_res->BestChoiceToCorrectText(); - } - if (success) { - tesseract_->EnableLearning = true; - tesseract_->LearnWord(nullptr, word_res); - } + int debug = 0; + GetIntVariable("applybox_debug", &debug); + bool success = true; + PageSegMode current_psm = GetPageSegMode(); + SetPageSegMode(mode); + SetVariable("classify_enable_learning", "0"); + const std::unique_ptr text(GetUTF8Text()); + if (debug) { + tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); + } + if (text != nullptr) { + PAGE_RES_IT it(page_res_); + WERD_RES* word_res = it.word(); + if (word_res != nullptr) { + word_res->word->set_text(wordstr); + // Check to see if text matches wordstr. + int w = 0; + int t; + for (t = 0; text[t] != '\0'; ++t) { + if (text[t] == '\n' || text[t] == ' ') + continue; + while (wordstr[w] == ' ') ++w; + if (text[t] != wordstr[w]) + break; + ++w; + } + if (text[t] != '\0' || wordstr[w] != '\0') { + // No match. + delete page_res_; + GenericVector boxes; + page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); + tesseract_->ReSegmentByClassification(page_res_); + tesseract_->TidyUp(page_res_); + PAGE_RES_IT pr_it(page_res_); + if (pr_it.word() == nullptr) + success = false; + else + word_res = pr_it.word(); + } else { + word_res->BestChoiceToCorrectText(); + } + if (success) { + tesseract_->EnableLearning = true; + tesseract_->LearnWord(nullptr, word_res); + } + } else { + success = false; + } } else { - success = false; + success = false; } - } else { - success = false; - } - SetPageSegMode(current_psm); - return success; + SetPageSegMode(current_psm); + return success; } #endif // ndef DISABLED_LEGACY_ENGINE @@ -2182,10 +2180,10 @@ namespace tesseract { * any Recognize or Get* operation. */ void TessBaseAPI::Clear() { - if (thresholder_ != nullptr) - thresholder_->Clear(); - ClearResults(); - if (tesseract_ != nullptr) SetInputImage(nullptr); + if (thresholder_ != nullptr) + thresholder_->Clear(); + ClearResults(); + if (tesseract_ != nullptr) SetInputImage(nullptr); } /** @@ -2195,33 +2193,33 @@ namespace tesseract { * other than Init and anything declared above it in the class definition. */ void TessBaseAPI::End() { - Clear(); - delete thresholder_; - thresholder_ = nullptr; - delete page_res_; - page_res_ = nullptr; - delete block_list_; - block_list_ = nullptr; - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; - delete tesseract_; - tesseract_ = nullptr; - delete osd_tesseract_; - osd_tesseract_ = nullptr; - delete equ_detect_; - equ_detect_ = nullptr; - delete input_file_; - input_file_ = nullptr; - delete output_file_; - output_file_ = nullptr; - delete datapath_; - datapath_ = nullptr; - delete language_; - language_ = nullptr; + Clear(); + delete thresholder_; + thresholder_ = nullptr; + delete page_res_; + page_res_ = nullptr; + delete block_list_; + block_list_ = nullptr; + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; + delete tesseract_; + tesseract_ = nullptr; + delete osd_tesseract_; + osd_tesseract_ = nullptr; + delete equ_detect_; + equ_detect_ = nullptr; + delete input_file_; + input_file_ = nullptr; + delete output_file_; + output_file_ = nullptr; + delete datapath_; + datapath_ = nullptr; + delete language_; + language_ = nullptr; } // Clear any library-level memory caches. @@ -2230,7 +2228,7 @@ namespace tesseract { // and End() of individual TessBaseAPI's. This function allows the clearing // of these caches. void TessBaseAPI::ClearPersistentCache() { - Dict::GlobalDawgCache()->DeleteUnusedDawgs(); + Dict::GlobalDawgCache()->DeleteUnusedDawgs(); } /** @@ -2238,55 +2236,55 @@ namespace tesseract { * returns 0 if the word is invalid, non-zero if valid */ int TessBaseAPI::IsValidWord(const char *word) { - return tesseract_->getDict().valid_word(word); + return tesseract_->getDict().valid_word(word); } // Returns true if utf8_character is defined in the UniCharset. bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { - return tesseract_->unicharset.contains_unichar(utf8_character); + return tesseract_->unicharset.contains_unichar(utf8_character); } // TODO(rays) Obsolete this function and replace with a more aptly named // function that returns image coordinates rather than tesseract coordinates. bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { - PageIterator* it = AnalyseLayout(); - if (it == nullptr) { - return false; - } - int x1, x2, y1, y2; - it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); - // Calculate offset and slope (NOTE: Kind of ugly) - if (x2 <= x1) x2 = x1 + 1; - // Convert the point pair to slope/offset of the baseline (in image coords.) - *out_slope = static_cast(y2 - y1) / (x2 - x1); - *out_offset = static_cast(y1 - *out_slope * x1); - // Get the y-coord of the baseline at the left and right edges of the - // textline's bounding box. - int left, top, right, bottom; - if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { + PageIterator* it = AnalyseLayout(); + if (it == nullptr) { + return false; + } + int x1, x2, y1, y2; + it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); + // Calculate offset and slope (NOTE: Kind of ugly) + if (x2 <= x1) x2 = x1 + 1; + // Convert the point pair to slope/offset of the baseline (in image coords.) + *out_slope = static_cast(y2 - y1) / (x2 - x1); + *out_offset = static_cast(y1 - *out_slope * x1); + // Get the y-coord of the baseline at the left and right edges of the + // textline's bounding box. + int left, top, right, bottom; + if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { + delete it; + return false; + } + int left_y = IntCastRounded(*out_slope * left + *out_offset); + int right_y = IntCastRounded(*out_slope * right + *out_offset); + // Shift the baseline down so it passes through the nearest bottom-corner + // of the textline's bounding box. This is the difference between the y + // at the lowest (max) edge of the box and the actual box bottom. + *out_offset += bottom - std::max(left_y, right_y); + // Switch back to bottom-up tesseract coordinates. Requires negation of + // the slope and height - offset for the offset. + *out_slope = -*out_slope; + *out_offset = rect_height_ - *out_offset; delete it; - return false; - } - int left_y = IntCastRounded(*out_slope * left + *out_offset); - int right_y = IntCastRounded(*out_slope * right + *out_offset); - // Shift the baseline down so it passes through the nearest bottom-corner - // of the textline's bounding box. This is the difference between the y - // at the lowest (max) edge of the box and the actual box bottom. - *out_offset += bottom - std::max(left_y, right_y); - // Switch back to bottom-up tesseract coordinates. Requires negation of - // the slope and height - offset for the offset. - *out_slope = -*out_slope; - *out_offset = rect_height_ - *out_offset; - delete it; - return true; + return true; } /** Sets Dict::letter_is_okay_ function to point to the given function. */ void TessBaseAPI::SetDictFunc(DictFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().letter_is_okay_ = f; - } + if (tesseract_ != nullptr) { + tesseract_->getDict().letter_is_okay_ = f; + } } /** @@ -2298,33 +2296,33 @@ namespace tesseract { * utf-8 string. */ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().probability_in_context_ = f; - // Set it for the sublangs too. - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) { - tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + if (tesseract_ != nullptr) { + tesseract_->getDict().probability_in_context_ = f; + // Set it for the sublangs too. + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) { + tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + } } - } } #ifndef DISABLED_LEGACY_ENGINE /** Sets Wordrec::fill_lattice_ function to point to the given function. */ void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { - if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; + if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; } #endif // ndef DISABLED_LEGACY_ENGINE /** Common code for setting the image. */ bool TessBaseAPI::InternalSetImage() { - if (tesseract_ == nullptr) { - tprintf("Please call Init before attempting to set an image.\n"); - return false; - } - if (thresholder_ == nullptr) - thresholder_ = new ImageThresholder; - ClearResults(); - return true; + if (tesseract_ == nullptr) { + tprintf("Please call Init before attempting to set an image.\n"); + return false; + } + if (thresholder_ == nullptr) + thresholder_ = new ImageThresholder; + ClearResults(); + return true; } /** @@ -2334,153 +2332,153 @@ namespace tesseract { * The usual argument to Threshold is Tesseract::mutable_pix_binary(). */ bool TessBaseAPI::Threshold(Pix** pix) { - ASSERT_HOST(pix != nullptr); - if (*pix != nullptr) - pixDestroy(pix); - // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = 0; - bool a = GetIntVariable("user_defined_dpi", &user_dpi); - int y_res = thresholder_->GetScaledYResolution(); - if (user_dpi && (user_dpi < kMinCredibleResolution || - user_dpi > kMaxCredibleResolution)) { - tprintf("Warning: User defined image dpi is outside of expected range " - "(%d - %d)!\n", - kMinCredibleResolution, kMaxCredibleResolution); - } - // Always use user defined dpi - if (user_dpi) { - thresholder_->SetSourceYResolution(user_dpi); - } else if (y_res < kMinCredibleResolution || - y_res > kMaxCredibleResolution) { - tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", - y_res, kMinCredibleResolution); - thresholder_->SetSourceYResolution(kMinCredibleResolution); - } - PageSegMode pageseg_mode = - static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); - if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; - thresholder_->GetImageSizes(&rect_left_, &rect_top_, - &rect_width_, &rect_height_, - &image_width_, &image_height_); - if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); - } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); - } - // Set the internal resolution that is used for layout parameters from the - // estimated resolution, rather than the image resolution, which may be - // fabricated, but we will use the image resolution, if there is one, to - // report output point sizes. - int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), - kMinCredibleResolution, - kMaxCredibleResolution); - if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { - tprintf("Estimated internal resolution %d out of range! " - "Corrected to %d.\n", - thresholder_->GetScaledEstimatedResolution(), estimated_res); - } - tesseract_->set_source_resolution(estimated_res); - SavePixForCrash(estimated_res, *pix); - return true; + ASSERT_HOST(pix != nullptr); + if (*pix != nullptr) + pixDestroy(pix); + // Zero resolution messes up the algorithms, so make sure it is credible. + int user_dpi = 0; + bool a = GetIntVariable("user_defined_dpi", &user_dpi); + int y_res = thresholder_->GetScaledYResolution(); + if (user_dpi && (user_dpi < kMinCredibleResolution || + user_dpi > kMaxCredibleResolution)) { + tprintf("Warning: User defined image dpi is outside of expected range " + "(%d - %d)!\n", + kMinCredibleResolution, kMaxCredibleResolution); + } + // Always use user defined dpi + if (user_dpi) { + thresholder_->SetSourceYResolution(user_dpi); + } else if (y_res < kMinCredibleResolution || + y_res > kMaxCredibleResolution) { + tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", + y_res, kMinCredibleResolution); + thresholder_->SetSourceYResolution(kMinCredibleResolution); + } + PageSegMode pageseg_mode = + static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); + if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; + thresholder_->GetImageSizes(&rect_left_, &rect_top_, + &rect_width_, &rect_height_, + &image_width_, &image_height_); + if (!thresholder_->IsBinary()) { + tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + } else { + tesseract_->set_pix_thresholds(nullptr); + tesseract_->set_pix_grey(nullptr); + } + // Set the internal resolution that is used for layout parameters from the + // estimated resolution, rather than the image resolution, which may be + // fabricated, but we will use the image resolution, if there is one, to + // report output point sizes. + int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), + kMinCredibleResolution, + kMaxCredibleResolution); + if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { + tprintf("Estimated internal resolution %d out of range! " + "Corrected to %d.\n", + thresholder_->GetScaledEstimatedResolution(), estimated_res); + } + tesseract_->set_source_resolution(estimated_res); + SavePixForCrash(estimated_res, *pix); + return true; } /** Find lines from the image making the BLOCK_LIST. */ int TessBaseAPI::FindLines() { - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (recognition_done_) - ClearResults(); - if (!block_list_->empty()) { - return 0; - } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (recognition_done_) + ClearResults(); + if (!block_list_->empty()) { + return 0; + } + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; #ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); + tesseract_->InitAdaptiveClassifier(nullptr); #endif - } - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return -1; - } + } + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return -1; + } - tesseract_->PrepareForPageseg(); + tesseract_->PrepareForPageseg(); #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->textord_equation_detect) { - if (equ_detect_ == nullptr && datapath_ != nullptr) { - equ_detect_ = new EquationDetect(datapath_->string(), nullptr); - } - if (equ_detect_ == nullptr) { - tprintf("Warning: Could not set equation detector\n"); - } else { - tesseract_->SetEquationDetect(equ_detect_); + if (tesseract_->textord_equation_detect) { + if (equ_detect_ == nullptr && datapath_ != nullptr) { + equ_detect_ = new EquationDetect(datapath_->string(), nullptr); + } + if (equ_detect_ == nullptr) { + tprintf("Warning: Could not set equation detector\n"); + } else { + tesseract_->SetEquationDetect(equ_detect_); + } } - } #endif // ndef DISABLED_LEGACY_ENGINE - Tesseract* osd_tess = osd_tesseract_; - OSResults osr; - if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && - osd_tess == nullptr) { - if (strcmp(language_->string(), "osd") == 0) { - osd_tess = tesseract_; - } else { - osd_tesseract_ = new Tesseract; - TessdataManager mgr(reader_); - if (datapath_ == nullptr) { - tprintf("Warning: Auto orientation and script detection requested," - " but data path is undefined\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, - "osd", OEM_TESSERACT_ONLY, - nullptr, 0, nullptr, nullptr, - false, &mgr) == 0) { - osd_tess = osd_tesseract_; - osd_tesseract_->set_source_resolution( - thresholder_->GetSourceYResolution()); - } else { - tprintf("Warning: Auto orientation and script detection requested," - " but osd language failed to load\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } + Tesseract* osd_tess = osd_tesseract_; + OSResults osr; + if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && + osd_tess == nullptr) { + if (strcmp(language_->string(), "osd") == 0) { + osd_tess = tesseract_; + } else { + osd_tesseract_ = new Tesseract; + TessdataManager mgr(reader_); + if (datapath_ == nullptr) { + tprintf("Warning: Auto orientation and script detection requested," + " but data path is undefined\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, + "osd", OEM_TESSERACT_ONLY, + nullptr, 0, nullptr, nullptr, + false, &mgr) == 0) { + osd_tess = osd_tesseract_; + osd_tesseract_->set_source_resolution( + thresholder_->GetSourceYResolution()); + } else { + tprintf("Warning: Auto orientation and script detection requested," + " but osd language failed to load\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } + } } - } - if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) - return -1; + if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) + return -1; - // If Devanagari is being recognized, we use different images for page seg - // and for OCR. - tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); - return 0; + // If Devanagari is being recognized, we use different images for page seg + // and for OCR. + tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); + return 0; } /** Delete the pageres and clear the block list ready for a new page. */ void TessBaseAPI::ClearResults() { - if (tesseract_ != nullptr) { - tesseract_->Clear(); - } - delete page_res_; - page_res_ = nullptr; - recognition_done_ = false; - if (block_list_ == nullptr) - block_list_ = new BLOCK_LIST; - else - block_list_->clear(); - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - SavePixForCrash(0, nullptr); + if (tesseract_ != nullptr) { + tesseract_->Clear(); + } + delete page_res_; + page_res_ = nullptr; + recognition_done_ = false; + if (block_list_ == nullptr) + block_list_ = new BLOCK_LIST; + else + block_list_->clear(); + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + SavePixForCrash(0, nullptr); } /** @@ -2491,29 +2489,29 @@ namespace tesseract { * Also return the number of recognized blobs in blob_count. */ int TessBaseAPI::TextLength(int* blob_count) { - if (tesseract_ == nullptr || page_res_ == nullptr) - return 0; - - PAGE_RES_IT page_res_it(page_res_); - int total_length = 2; - int total_blobs = 0; - // Iterate over the data structures to extract the recognition result. - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - WERD_CHOICE* choice = word->best_choice; - if (choice != nullptr) { - total_blobs += choice->length() + 2; - total_length += choice->unichar_string().length() + 2; - for (int i = 0; i < word->reject_map.length(); ++i) { - if (word->reject_map[i].rejected()) - ++total_length; - } + if (tesseract_ == nullptr || page_res_ == nullptr) + return 0; + + PAGE_RES_IT page_res_it(page_res_); + int total_length = 2; + int total_blobs = 0; + // Iterate over the data structures to extract the recognition result. + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + WERD_CHOICE* choice = word->best_choice; + if (choice != nullptr) { + total_blobs += choice->length() + 2; + total_length += choice->unichar_string().length() + 2; + for (int i = 0; i < word->reject_map.length(); ++i) { + if (word->reject_map[i].rejected()) + ++total_length; + } + } } - } - if (blob_count != nullptr) - *blob_count = total_blobs; - return total_length; + if (blob_count != nullptr) + *blob_count = total_blobs; + return total_length; } #ifndef DISABLED_LEGACY_ENGINE @@ -2522,22 +2520,22 @@ namespace tesseract { * Returns true if the image was processed successfully. */ bool TessBaseAPI::DetectOS(OSResults* osr) { - if (tesseract_ == nullptr) - return false; - ClearResults(); - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return false; - } + if (tesseract_ == nullptr) + return false; + ClearResults(); + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return false; + } - if (input_file_ == nullptr) - input_file_ = new STRING(kInputFile); - return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; + if (input_file_ == nullptr) + input_file_ = new STRING(kInputFile); + return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; } #endif // ndef DISABLED_LEGACY_ENGINE void TessBaseAPI::set_min_orientation_margin(double margin) { - tesseract_->min_orientation_margin.set_value(margin); + tesseract_->min_orientation_margin.set_value(margin); } /** @@ -2556,95 +2554,95 @@ namespace tesseract { */ void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, bool** vertical_writing) { - delete[] *block_orientation; - *block_orientation = nullptr; - delete[] *vertical_writing; - *vertical_writing = nullptr; - BLOCK_IT block_it(block_list_); - - block_it.move_to_first(); - int num_blocks = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - ++num_blocks; - } - if (!num_blocks) { - tprintf("WARNING: Found no blocks\n"); - return; - } - *block_orientation = new int[num_blocks]; - *vertical_writing = new bool[num_blocks]; - block_it.move_to_first(); - int i = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - FCOORD re_rotation = block_it.data()->re_rotation(); - float re_theta = re_rotation.angle(); - FCOORD classify_rotation = block_it.data()->classify_rotation(); - float classify_theta = classify_rotation.angle(); - double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; - if (rot_theta < 0) rot_theta += 4; - int num_rotations = static_cast(rot_theta + 0.5); - (*block_orientation)[i] = num_rotations; - // The classify_rotation is non-zero only if the text has vertical - // writing direction. - (*vertical_writing)[i] = classify_rotation.y() != 0.0f; - ++i; - } + delete[] *block_orientation; + *block_orientation = nullptr; + delete[] *vertical_writing; + *vertical_writing = nullptr; + BLOCK_IT block_it(block_list_); + + block_it.move_to_first(); + int num_blocks = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + ++num_blocks; + } + if (!num_blocks) { + tprintf("WARNING: Found no blocks\n"); + return; + } + *block_orientation = new int[num_blocks]; + *vertical_writing = new bool[num_blocks]; + block_it.move_to_first(); + int i = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + FCOORD re_rotation = block_it.data()->re_rotation(); + float re_theta = re_rotation.angle(); + FCOORD classify_rotation = block_it.data()->classify_rotation(); + float classify_theta = classify_rotation.angle(); + double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; + if (rot_theta < 0) rot_theta += 4; + int num_rotations = static_cast(rot_theta + 0.5); + (*block_orientation)[i] = num_rotations; + // The classify_rotation is non-zero only if the text has vertical + // writing direction. + (*vertical_writing)[i] = classify_rotation.y() != 0.0f; + ++i; + } } void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { - int debug_level = 0; - GetIntVariable("paragraph_debug_level", &debug_level); - if (paragraph_models_ == nullptr) - paragraph_models_ = new GenericVector; - MutableIterator *result_it = GetMutableIterator(); - do { // Detect paragraphs for this block - GenericVector models; - ::tesseract::DetectParagraphs(debug_level, after_text_recognition, - result_it, &models); - *paragraph_models_ += models; - } while (result_it->Next(RIL_BLOCK)); - delete result_it; + int debug_level = 0; + GetIntVariable("paragraph_debug_level", &debug_level); + if (paragraph_models_ == nullptr) + paragraph_models_ = new GenericVector; + MutableIterator *result_it = GetMutableIterator(); + do { // Detect paragraphs for this block + GenericVector models; + ::tesseract::DetectParagraphs(debug_level, after_text_recognition, + result_it, &models); + *paragraph_models_ += models; + } while (result_it->Next(RIL_BLOCK)); + delete result_it; } /** This method returns the string form of the specified unichar. */ const char* TessBaseAPI::GetUnichar(int unichar_id) { - return tesseract_->unicharset.id_to_unichar(unichar_id); + return tesseract_->unicharset.id_to_unichar(unichar_id); } /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ const Dawg *TessBaseAPI::GetDawg(int i) const { - if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; - return tesseract_->getDict().GetDawg(i); + if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; + return tesseract_->getDict().GetDawg(i); } /** Return the number of dawgs loaded into tesseract_ object. */ int TessBaseAPI::NumDawgs() const { - return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); + return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); } /** Escape a char string - remove <>&"' with HTML codes. */ STRING HOcrEscape(const char* text) { - STRING ret; - const char *ptr; - for (ptr = text; *ptr; ptr++) { - switch (*ptr) { - case '<': ret += "<"; break; - case '>': ret += ">"; break; - case '&': ret += "&"; break; - case '"': ret += """; break; - case '\'': ret += "'"; break; - default: ret += *ptr; + STRING ret; + const char *ptr; + for (ptr = text; *ptr; ptr++) { + switch (*ptr) { + case '<': ret += "<"; break; + case '>': ret += ">"; break; + case '&': ret += "&"; break; + case '"': ret += """; break; + case '\'': ret += "'"; break; + default: ret += *ptr; + } } - } - return ret; + return ret; } @@ -2656,10 +2654,10 @@ namespace tesseract { /** Find lines from the image making the BLOCK_LIST. */ BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { - ASSERT_HOST(FindLines() == 0); - BLOCK_LIST* result = block_list_; - block_list_ = nullptr; - return result; + ASSERT_HOST(FindLines() == 0); + BLOCK_LIST* result = block_list_; + block_list_ = nullptr; + return result; } /** @@ -2668,7 +2666,7 @@ namespace tesseract { * and let go of including the other headers. */ void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { - delete block_list; + delete block_list; } @@ -2676,42 +2674,42 @@ namespace tesseract { float xheight, float descender, float ascender) { - int32_t xstarts[] = {-32000}; - double quad_coeffs[] = {0, 0, baseline}; - return new ROW(1, - xstarts, - quad_coeffs, - xheight, - ascender - (baseline + xheight), - descender - baseline, - 0, - 0); + int32_t xstarts[] = {-32000}; + double quad_coeffs[] = {0, 0, baseline}; + return new ROW(1, + xstarts, + quad_coeffs, + xheight, + ascender - (baseline + xheight), + descender - baseline, + 0, + 0); } /** Creates a TBLOB* from the whole pix. */ TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); - - // Create C_BLOBs from the page - extract_edges(pix, &block); - - // Merge all C_BLOBs - C_BLOB_LIST *list = block.blob_list(); - C_BLOB_IT c_blob_it(list); - if (c_blob_it.empty()) - return nullptr; - // Move all the outlines to the first blob. - C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); - for (c_blob_it.forward(); - !c_blob_it.at_first(); - c_blob_it.forward()) { - C_BLOB *c_blob = c_blob_it.data(); - ol_it.add_list_after(c_blob->out_list()); - } - // Convert the first blob to the output TBLOB. - return TBLOB::PolygonalCopy(false, c_blob_it.data()); + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); + + // Create C_BLOBs from the page + extract_edges(pix, &block); + + // Merge all C_BLOBs + C_BLOB_LIST *list = block.blob_list(); + C_BLOB_IT c_blob_it(list); + if (c_blob_it.empty()) + return nullptr; + // Move all the outlines to the first blob. + C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); + for (c_blob_it.forward(); + !c_blob_it.at_first(); + c_blob_it.forward()) { + C_BLOB *c_blob = c_blob_it.data(); + ol_it.add_list_after(c_blob->out_list()); + } + // Convert the first blob to the output TBLOB. + return TBLOB::PolygonalCopy(false, c_blob_it.data()); } /** @@ -2720,12 +2718,12 @@ namespace tesseract { * normalization-antidote is returned. */ void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { - TBOX box = tblob->bounding_box(); - float x_center = (box.left() + box.right()) / 2.0f; - float baseline = row->base_line(x_center); - float scale = kBlnXHeight / row->x_height(); - tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); + TBOX box = tblob->bounding_box(); + float x_center = (box.left() + box.right()) / 2.0f; + float baseline = row->base_line(x_center); + float scale = kBlnXHeight / row->x_height(); + tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, + 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); } /** @@ -2735,14 +2733,14 @@ namespace tesseract { static TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix* pix) { - TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); + TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); - // Normalize TBLOB - ROW *row = - TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); - delete row; - return tblob; + // Normalize TBLOB + ROW *row = + TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); + delete row; + return tblob; } /** @@ -2756,49 +2754,49 @@ namespace tesseract { float xheight, float descender, float ascender) { - UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, - tesseract_->classify_bln_numeric_mode, - tesseract_->pix_binary()); - float threshold; - float best_rating = -100; - - - // Classify to get a raw choice. - BLOB_CHOICE_LIST choices; - tesseract_->AdaptiveClassifier(blob, &choices); - BLOB_CHOICE_IT choice_it; - choice_it.set_to_list(&choices); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - if (choice_it.data()->rating() > best_rating) { - best_rating = choice_it.data()->rating(); + UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); + TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, + tesseract_->classify_bln_numeric_mode, + tesseract_->pix_binary()); + float threshold; + float best_rating = -100; + + + // Classify to get a raw choice. + BLOB_CHOICE_LIST choices; + tesseract_->AdaptiveClassifier(blob, &choices); + BLOB_CHOICE_IT choice_it; + choice_it.set_to_list(&choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + if (choice_it.data()->rating() > best_rating) { + best_rating = choice_it.data()->rating(); + } } - } - threshold = tesseract_->matcher_good_threshold; + threshold = tesseract_->matcher_good_threshold; - if (blob->outlines) - tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, - tesseract_->AdaptedTemplates); - delete blob; + if (blob->outlines) + tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, + tesseract_->AdaptedTemplates); + delete blob; } PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - PAGE_RES *page_res = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); - return page_res; + PAGE_RES *page_res = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); + return page_res; } PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result) { - if (!pass1_result) - pass1_result = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); - return pass1_result; + if (!pass1_result) + pass1_result = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); + return pass1_result; } struct TESS_CHAR : ELIST_LINK { @@ -2808,9 +2806,9 @@ namespace tesseract { TBOX box; TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { - length = (len == -1 ? strlen(repr) : len); - unicode_repr = new char[length + 1]; - strncpy(unicode_repr, repr, length); + length = (len == -1 ? strlen(repr) : len); + unicode_repr = new char[length + 1]; + strncpy(unicode_repr, repr, length); } TESS_CHAR() @@ -2820,7 +2818,7 @@ namespace tesseract { { // Satisfies ELISTIZE. } ~TESS_CHAR() { - delete [] unicode_repr; + delete [] unicode_repr; } }; @@ -2828,18 +2826,18 @@ namespace tesseract { ELISTIZE(TESS_CHAR) static void add_space(TESS_CHAR_IT* it) { - TESS_CHAR *t = new TESS_CHAR(0, " "); - it->add_after_then_move(t); + TESS_CHAR *t = new TESS_CHAR(0, " "); + it->add_after_then_move(t); } static float rating_to_cost(float rating) { - rating = 100 + rating; - // cuddled that to save from coverage profiler - // (I have never seen ratings worse than -100, - // but the check won't hurt) - if (rating < 0) rating = 0; - return rating; + rating = 100 + rating; + // cuddled that to save from coverage profiler + // (I have never seen ratings worse than -100, + // but the check won't hurt) + if (rating < 0) rating = 0; + return rating; } /** @@ -2848,28 +2846,28 @@ namespace tesseract { */ static void extract_result(TESS_CHAR_IT* out, PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - int word_count = 0; - while (page_res_it.word() != nullptr) { - WERD_RES *word = page_res_it.word(); - const char *str = word->best_choice->unichar_string().string(); - const char *len = word->best_choice->unichar_lengths().string(); - TBOX real_rect = word->word->bounding_box(); - - if (word_count) - add_space(out); - int n = strlen(len); - for (int i = 0; i < n; i++) { - TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), - str, *len); - tc->box = real_rect.intersection(word->box_word->BlobBox(i)); - out->add_after_then_move(tc); - str += *len; - len++; - } - page_res_it.forward(); - word_count++; - } + PAGE_RES_IT page_res_it(page_res); + int word_count = 0; + while (page_res_it.word() != nullptr) { + WERD_RES *word = page_res_it.word(); + const char *str = word->best_choice->unichar_string().string(); + const char *len = word->best_choice->unichar_lengths().string(); + TBOX real_rect = word->word->bounding_box(); + + if (word_count) + add_space(out); + int n = strlen(len); + for (int i = 0; i < n; i++) { + TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), + str, *len); + tc->box = real_rect.intersection(word->box_word->BlobBox(i)); + out->add_after_then_move(tc); + str += *len; + len++; + } + page_res_it.forward(); + word_count++; + } } /** @@ -2884,41 +2882,41 @@ namespace tesseract { int** x1, int** y1, PAGE_RES* page_res) { - TESS_CHAR_LIST tess_chars; - TESS_CHAR_IT tess_chars_it(&tess_chars); - extract_result(&tess_chars_it, page_res); - tess_chars_it.move_to_first(); - int n = tess_chars.length(); - int text_len = 0; - *lengths = new int[n]; - *costs = new float[n]; - *x0 = new int[n]; - *y0 = new int[n]; - *x1 = new int[n]; - *y1 = new int[n]; - int i = 0; - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward(), i++) { - TESS_CHAR *tc = tess_chars_it.data(); - text_len += (*lengths)[i] = tc->length; - (*costs)[i] = tc->cost; - (*x0)[i] = tc->box.left(); - (*y0)[i] = tc->box.bottom(); - (*x1)[i] = tc->box.right(); - (*y1)[i] = tc->box.top(); - } - char *p = *text = new char[text_len]; - - tess_chars_it.move_to_first(); - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward()) { - TESS_CHAR *tc = tess_chars_it.data(); - strncpy(p, tc->unicode_repr, tc->length); - p += tc->length; - } - return n; + TESS_CHAR_LIST tess_chars; + TESS_CHAR_IT tess_chars_it(&tess_chars); + extract_result(&tess_chars_it, page_res); + tess_chars_it.move_to_first(); + int n = tess_chars.length(); + int text_len = 0; + *lengths = new int[n]; + *costs = new float[n]; + *x0 = new int[n]; + *y0 = new int[n]; + *x1 = new int[n]; + *y1 = new int[n]; + int i = 0; + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward(), i++) { + TESS_CHAR *tc = tess_chars_it.data(); + text_len += (*lengths)[i] = tc->length; + (*costs)[i] = tc->cost; + (*x0)[i] = tc->box.left(); + (*y0)[i] = tc->box.bottom(); + (*x1)[i] = tc->box.right(); + (*y1)[i] = tc->box.top(); + } + char *p = *text = new char[text_len]; + + tess_chars_it.move_to_first(); + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward()) { + TESS_CHAR *tc = tess_chars_it.data(); + strncpy(p, tc->unicode_repr, tc->length); + p += tc->length; + } + return n; } /** This method returns the features associated with the input blob. */ @@ -2932,52 +2930,52 @@ namespace tesseract { INT_FEATURE_STRUCT* int_features, int* num_features, int* feature_outline_index) { - GenericVector outline_counts; - GenericVector bl_features; - GenericVector cn_features; - INT_FX_RESULT_STRUCT fx_info; - tesseract_->ExtractFeatures(*blob, false, &bl_features, - &cn_features, &fx_info, &outline_counts); - if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { - *num_features = 0; - return; // Feature extraction failed. - } - *num_features = cn_features.size(); - memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); - // TODO(rays) Pass outline_counts back and simplify the calling code. - if (feature_outline_index != nullptr) { - int f = 0; - for (int i = 0; i < outline_counts.size(); ++i) { - while (f < outline_counts[i]) - feature_outline_index[f++] = i; + GenericVector outline_counts; + GenericVector bl_features; + GenericVector cn_features; + INT_FX_RESULT_STRUCT fx_info; + tesseract_->ExtractFeatures(*blob, false, &bl_features, + &cn_features, &fx_info, &outline_counts); + if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { + *num_features = 0; + return; // Feature extraction failed. + } + *num_features = cn_features.size(); + memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); + // TODO(rays) Pass outline_counts back and simplify the calling code. + if (feature_outline_index != nullptr) { + int f = 0; + for (int i = 0; i < outline_counts.size(); ++i) { + while (f < outline_counts[i]) + feature_outline_index[f++] = i; + } } - } } // This method returns the row to which a box of specified dimensions would // belong. If no good match is found, it returns nullptr. ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom) { - TBOX box(left, bottom, right, top); - BLOCK_IT b_it(blocks); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (box.major_overlap(word->bounding_box())) - return row; - } + TBOX box(left, bottom, right, top); + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + if (!box.major_overlap(block->pdblk.bounding_box())) + continue; + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { + ROW* row = r_it.data(); + if (!box.major_overlap(row->bounding_box())) + continue; + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (box.major_overlap(word->bounding_box())) + return row; + } + } } - } - return nullptr; + return nullptr; } /** Method to run adaptive classifier on a blob. */ @@ -2986,21 +2984,21 @@ namespace tesseract { int* unichar_ids, float* ratings, int* num_matches_returned) { - BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; - tesseract_->AdaptiveClassifier(blob, choices); - BLOB_CHOICE_IT choices_it(choices); - int& index = *num_matches_returned; - index = 0; - for (choices_it.mark_cycle_pt(); - !choices_it.cycled_list() && index < num_max_matches; - choices_it.forward()) { - BLOB_CHOICE* choice = choices_it.data(); - unichar_ids[index] = choice->unichar_id(); - ratings[index] = choice->rating(); - ++index; - } - *num_matches_returned = index; - delete choices; + BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; + tesseract_->AdaptiveClassifier(blob, choices); + BLOB_CHOICE_IT choices_it(choices); + int& index = *num_matches_returned; + index = 0; + for (choices_it.mark_cycle_pt(); + !choices_it.cycled_list() && index < num_max_matches; + choices_it.forward()) { + BLOB_CHOICE* choice = choices_it.data(); + unichar_ids[index] = choice->unichar_id(); + ratings[index] = choice->rating(); + ++index; + } + *num_matches_returned = index; + delete choices; } #endif // ndef DISABLED_LEGACY_ENGINE diff --git a/src/api/baseapi.h b/src/api/baseapi.h index 6da486011a..f82dfa2d56 100644 --- a/src/api/baseapi.h +++ b/src/api/baseapi.h @@ -229,10 +229,10 @@ namespace tesseract { const GenericVector *vars_values, bool set_only_non_debug_params); int Init(const char* datapath, const char* language, OcrEngineMode oem) { - return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); + return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); } int Init(const char* datapath, const char* language) { - return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); + return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); } // In-memory version reads the traineddata file directly from the given // data[data_size] array, and/or reads data via a FileReader. @@ -374,9 +374,9 @@ namespace tesseract { * delete it when it it is replaced or the API is destructed. */ void SetThresholder(ImageThresholder* thresholder) { - delete thresholder_; - thresholder_ = thresholder; - ClearResults(); + delete thresholder_; + thresholder_ = thresholder; + ClearResults(); } /** @@ -410,7 +410,7 @@ namespace tesseract { Helper method to extract from the thresholded image. (most common usage) */ Boxa* GetTextlines(Pixa** pixa, int** blockids) { - return GetTextlines(false, 0, pixa, blockids, nullptr); + return GetTextlines(false, 0, pixa, blockids, nullptr); } /** @@ -461,7 +461,7 @@ namespace tesseract { Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa** pixa, int** blockids) { - return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); + return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); } /** @@ -586,12 +586,6 @@ namespace tesseract { */ char* GetHOCRText(ETEXT_DESC* monitor, int page_number); - /** - * Make an XML-formatted string with Alto markup from the internal - * data structures. - */ - char* GetAltoText(ETEXT_DESC* monitor, int page_number); - /** * Make a HTML-formatted string with hOCR markup from the internal * data structures. @@ -600,6 +594,13 @@ namespace tesseract { */ char* GetHOCRText(int page_number); + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(ETEXT_DESC* monitor, int page_number); + + /** * Make an XML-formatted string with Alto markup from the internal * data structures. diff --git a/src/api/capi.cpp b/src/api/capi.cpp index c9216b8ede..1bbf621c25 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -71,11 +71,6 @@ TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* output return new TessHOcrRenderer(outputbase); } -TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info) -{ - return new TessHOcrRenderer(outputbase, font_info); -} - TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, BOOL textonly) { diff --git a/src/api/capi.h b/src/api/capi.h index 85908c78b0..8f999e8536 100644 --- a/src/api/capi.h +++ b/src/api/capi.h @@ -126,9 +126,8 @@ TESS_API void TESS_CALL TessDeleteIntArray(int* arr); /* Renderer API */ TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); -TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info); +TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, BOOL textonly); TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); @@ -279,6 +278,7 @@ TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); + TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number); TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number); diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp index e2cf91003e..c4c24e032f 100644 --- a/src/api/renderer.cpp +++ b/src/api/renderer.cpp @@ -37,82 +37,82 @@ namespace tesseract { fout_(stdout), next_(nullptr), happy_(true) { - if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { - STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); - fout_ = fopen(outfile.string(), "wb"); - if (fout_ == nullptr) { - happy_ = false; + if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { + STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); + fout_ = fopen(outfile.string(), "wb"); + if (fout_ == nullptr) { + happy_ = false; + } } - } } TessResultRenderer::~TessResultRenderer() { - if (fout_ != nullptr) { - if (fout_ != stdout) - fclose(fout_); - else - clearerr(fout_); - } - delete next_; + if (fout_ != nullptr) { + if (fout_ != stdout) + fclose(fout_); + else + clearerr(fout_); + } + delete next_; } void TessResultRenderer::insert(TessResultRenderer* next) { - if (next == nullptr) return; - - TessResultRenderer* remainder = next_; - next_ = next; - if (remainder) { - while (next->next_ != nullptr) { - next = next->next_; + if (next == nullptr) return; + + TessResultRenderer* remainder = next_; + next_ = next; + if (remainder) { + while (next->next_ != nullptr) { + next = next->next_; + } + next->next_ = remainder; } - next->next_ = remainder; - } } bool TessResultRenderer::BeginDocument(const char* title) { - if (!happy_) return false; - title_ = title; - imagenum_ = -1; - bool ok = BeginDocumentHandler(); - if (next_) { - ok = next_->BeginDocument(title) && ok; - } - return ok; + if (!happy_) return false; + title_ = title; + imagenum_ = -1; + bool ok = BeginDocumentHandler(); + if (next_) { + ok = next_->BeginDocument(title) && ok; + } + return ok; } bool TessResultRenderer::AddImage(TessBaseAPI* api) { - if (!happy_) return false; - ++imagenum_; - bool ok = AddImageHandler(api); - if (next_) { - ok = next_->AddImage(api) && ok; - } - return ok; + if (!happy_) return false; + ++imagenum_; + bool ok = AddImageHandler(api); + if (next_) { + ok = next_->AddImage(api) && ok; + } + return ok; } bool TessResultRenderer::EndDocument() { - if (!happy_) return false; - bool ok = EndDocumentHandler(); - if (next_) { - ok = next_->EndDocument() && ok; - } - return ok; + if (!happy_) return false; + bool ok = EndDocumentHandler(); + if (next_) { + ok = next_->EndDocument() && ok; + } + return ok; } void TessResultRenderer::AppendString(const char* s) { - AppendData(s, strlen(s)); + AppendData(s, strlen(s)); } void TessResultRenderer::AppendData(const char* s, int len) { - if (!tesseract::Serialize(fout_, s, len)) happy_ = false; + if (!tesseract::Serialize(fout_, s, len)) happy_ = false; } bool TessResultRenderer::BeginDocumentHandler() { - return happy_; + return happy_; } bool TessResultRenderer::EndDocumentHandler() { - return happy_; + return happy_; } @@ -124,19 +124,19 @@ namespace tesseract { } bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr utf8(api->GetUTF8Text()); - if (utf8 == nullptr) { - return false; - } + const std::unique_ptr utf8(api->GetUTF8Text()); + if (utf8 == nullptr) { + return false; + } - AppendString(utf8.get()); + AppendString(utf8.get()); - const char* pageSeparator = api->GetStringVariable("page_separator"); - if (pageSeparator != nullptr && *pageSeparator != '\0') { - AppendString(pageSeparator); - } + const char* pageSeparator = api->GetStringVariable("page_separator"); + if (pageSeparator != nullptr && *pageSeparator != '\0') { + AppendString(pageSeparator); + } - return true; + return true; } /********************************************************************** @@ -144,53 +144,53 @@ namespace tesseract { **********************************************************************/ TessHOcrRenderer::TessHOcrRenderer(const char *outputbase) : TessResultRenderer(outputbase, "hocr") { - font_info_ = false; + font_info_ = false; } TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) : TessResultRenderer(outputbase, "hocr") { - font_info_ = font_info; + font_info_ = font_info; } bool TessHOcrRenderer::BeginDocumentHandler() { - AppendString( - "\n" - "\n" - "\n \n "); - AppendString(title()); - AppendString( - "\n" - "\n" - " \n" - " \n" - "\n\n"); + "\n" + "\n" + "\n \n "); + AppendString(title()); + AppendString( + "\n" + "\n" + " \n" + " \n" + "\n\n"); - return true; + return true; } bool TessHOcrRenderer::EndDocumentHandler() { - AppendString(" \n\n"); + AppendString(" \n\n"); - return true; + return true; } bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr hocr(api->GetHOCRText(imagenum())); - if (hocr == nullptr) return false; + const std::unique_ptr hocr(api->GetHOCRText(imagenum())); + if (hocr == nullptr) return false; - AppendString(hocr.get()); + AppendString(hocr.get()); - return true; + return true; } /********************************************************************** @@ -198,31 +198,31 @@ namespace tesseract { **********************************************************************/ TessTsvRenderer::TessTsvRenderer(const char* outputbase) : TessResultRenderer(outputbase, "tsv") { - font_info_ = false; + font_info_ = false; } TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) : TessResultRenderer(outputbase, "tsv") { - font_info_ = font_info; + font_info_ = font_info; } bool TessTsvRenderer::BeginDocumentHandler() { - // Output TSV column headings - AppendString( - "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" - "num\tleft\ttop\twidth\theight\tconf\ttext\n"); - return true; + // Output TSV column headings + AppendString( + "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" + "num\tleft\ttop\twidth\theight\tconf\ttext\n"); + return true; } bool TessTsvRenderer::EndDocumentHandler() { return true; } bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr tsv(api->GetTSVText(imagenum())); - if (tsv == nullptr) return false; + const std::unique_ptr tsv(api->GetTSVText(imagenum())); + if (tsv == nullptr) return false; - AppendString(tsv.get()); + AppendString(tsv.get()); - return true; + return true; } /********************************************************************** @@ -233,12 +233,12 @@ namespace tesseract { } bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr unlv(api->GetUNLVText()); - if (unlv == nullptr) return false; + const std::unique_ptr unlv(api->GetUNLVText()); + if (unlv == nullptr) return false; - AppendString(unlv.get()); + AppendString(unlv.get()); - return true; + return true; } /********************************************************************** @@ -249,12 +249,12 @@ namespace tesseract { } bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr text(api->GetBoxText(imagenum())); - if (text == nullptr) return false; + const std::unique_ptr text(api->GetBoxText(imagenum())); + if (text == nullptr) return false; - AppendString(text.get()); + AppendString(text.get()); - return true; + return true; } #ifndef DISABLED_LEGACY_ENGINE @@ -266,13 +266,13 @@ namespace tesseract { : TessResultRenderer(outputbase, "osd") {} bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { - char* osd = api->GetOsdText(imagenum()); - if (osd == nullptr) return false; + char* osd = api->GetOsdText(imagenum()); + if (osd == nullptr) return false; - AppendString(osd); - delete[] osd; + AppendString(osd); + delete[] osd; - return true; + return true; } #endif // ndef DISABLED_LEGACY_ENGINE diff --git a/src/api/renderer.h b/src/api/renderer.h index f2313c31fd..cb91f3e005 100644 --- a/src/api/renderer.h +++ b/src/api/renderer.h @@ -180,7 +180,6 @@ namespace tesseract { }; - /** * Renders Tesseract output into a TSV string */ From 2a9137941abfabf78b53253bebcf5b3e5e61c5bf Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Sat, 24 Nov 2018 09:27:43 -0500 Subject: [PATCH 4/9] Remove changes unrelated to ALTO --- src/api/baseapi.cpp | 4408 ++++++++++++++++++------------------- src/api/baseapi.h | 1706 +++++++------- src/api/capi.cpp | 34 +- src/api/capi.h | 92 +- src/api/renderer.cpp | 392 ++-- src/api/renderer.h | 348 +-- src/api/tesseractmain.cpp | 872 ++++---- 7 files changed, 3926 insertions(+), 3926 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 5a7945154d..4caf4428f8 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -94,34 +94,34 @@ BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin"); namespace tesseract { /** Minimum sensible image size to be worth running tesseract. */ - const int kMinRectSize = 10; +const int kMinRectSize = 10; /** Character returned when Tesseract couldn't recognize as anything. */ - const char kTesseractReject = '~'; +const char kTesseractReject = '~'; /** Character used by UNLV error counter as a reject. */ - const char kUNLVReject = '~'; +const char kUNLVReject = '~'; /** Character used by UNLV as a suspect marker. */ - const char kUNLVSuspect = '^'; +const char kUNLVSuspect = '^'; /** * Filename used for input image file, from which to derive a name to search * for a possible UNLV zone file, if none is specified by SetInputName. */ - const char* kInputFile = "noname.tif"; +const char* kInputFile = "noname.tif"; /** * Temp file used for storing current parameters before applying retry values. */ - const char* kOldVarsFile = "failed_vars.txt"; +const char* kOldVarsFile = "failed_vars.txt"; /** Max string length of an int. */ - const int kMaxIntSize = 22; +const int kMaxIntSize = 22; /* Add all available languages recursively. */ - static void addAvailableLanguages(const STRING &datadir, const STRING &base, - GenericVector* langs) - { - const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; - const size_t extlen = sizeof(kTrainedDataSuffix); +static void addAvailableLanguages(const STRING &datadir, const STRING &base, + GenericVector* langs) +{ + const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; + const size_t extlen = sizeof(kTrainedDataSuffix); #ifdef _WIN32 - WIN32_FIND_DATA data; + WIN32_FIND_DATA data; HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); if (handle != INVALID_HANDLE_VALUE) { BOOL result = TRUE; @@ -146,83 +146,83 @@ namespace tesseract { FindClose(handle); } #else // _WIN32 - DIR* dir = opendir((datadir + base).string()); - if (dir != nullptr) { - dirent *de; - while ((de = readdir(dir))) { - char *name = de->d_name; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - struct stat st; - if (stat((datadir + base2 + name).string(), &st) == 0 && - (st.st_mode & S_IFDIR) == S_IFDIR) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } - } - closedir(dir); + DIR* dir = opendir((datadir + base).string()); + if (dir != nullptr) { + dirent *de; + while ((de = readdir(dir))) { + char *name = de->d_name; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + struct stat st; + if (stat((datadir + base2 + name).string(), &st) == 0 && + (st.st_mode & S_IFDIR) == S_IFDIR) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } } -#endif + } } + closedir(dir); + } +#endif +} // Compare two STRING values (used for sorting). - static int CompareSTRING(const void* p1, const void* p2) { - const STRING* s1 = static_cast(p1); - const STRING* s2 = static_cast(p2); - return strcmp(s1->c_str(), s2->c_str()); - } - - TessBaseAPI::TessBaseAPI() - : tesseract_(nullptr), - osd_tesseract_(nullptr), - equ_detect_(nullptr), - reader_(nullptr), - // Thresholder is initialized to nullptr here, but will be set before use by: - // A constructor of a derived API, SetThresholder(), or - // created implicitly when used in InternalSetImage. - thresholder_(nullptr), - paragraph_models_(nullptr), - block_list_(nullptr), - page_res_(nullptr), - input_file_(nullptr), - output_file_(nullptr), - datapath_(nullptr), - language_(nullptr), - last_oem_requested_(OEM_DEFAULT), - recognition_done_(false), - truth_cb_(nullptr), - rect_left_(0), - rect_top_(0), - rect_width_(0), - rect_height_(0), - image_width_(0), - image_height_(0) { - const char *locale; - locale = std::setlocale(LC_ALL, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_CTYPE, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - locale = std::setlocale(LC_NUMERIC, nullptr); - ASSERT_HOST(!strcmp(locale, "C")); - } - - TessBaseAPI::~TessBaseAPI() { - End(); - } +static int CompareSTRING(const void* p1, const void* p2) { + const STRING* s1 = static_cast(p1); + const STRING* s2 = static_cast(p2); + return strcmp(s1->c_str(), s2->c_str()); +} + +TessBaseAPI::TessBaseAPI() + : tesseract_(nullptr), + osd_tesseract_(nullptr), + equ_detect_(nullptr), + reader_(nullptr), + // Thresholder is initialized to nullptr here, but will be set before use by: + // A constructor of a derived API, SetThresholder(), or + // created implicitly when used in InternalSetImage. + thresholder_(nullptr), + paragraph_models_(nullptr), + block_list_(nullptr), + page_res_(nullptr), + input_file_(nullptr), + output_file_(nullptr), + datapath_(nullptr), + language_(nullptr), + last_oem_requested_(OEM_DEFAULT), + recognition_done_(false), + truth_cb_(nullptr), + rect_left_(0), + rect_top_(0), + rect_width_(0), + rect_height_(0), + image_width_(0), + image_height_(0) { + const char *locale; + locale = std::setlocale(LC_ALL, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + locale = std::setlocale(LC_CTYPE, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); + locale = std::setlocale(LC_NUMERIC, nullptr); + ASSERT_HOST(!strcmp(locale, "C")); +} + +TessBaseAPI::~TessBaseAPI() { + End(); +} /** * Returns the version identifier as a static string. Do not delete. */ - const char* TessBaseAPI::Version() { - return PACKAGE_VERSION; - } +const char* TessBaseAPI::Version() { + return PACKAGE_VERSION; +} /** * If compiled with OpenCL AND an available OpenCL @@ -232,13 +232,13 @@ namespace tesseract { * otherwise *device=nullptr and returns 0. */ #ifdef USE_OPENCL - #ifdef USE_DEVICE_SELECTION +#ifdef USE_DEVICE_SELECTION #include "opencl_device_selection.h" #endif #endif - size_t TessBaseAPI::getOpenCLDevice(void **data) { +size_t TessBaseAPI::getOpenCLDevice(void **data) { #ifdef USE_OPENCL - #ifdef USE_DEVICE_SELECTION +#ifdef USE_DEVICE_SELECTION ds_device device = OpenclDevice::getDeviceSelection(); if (device.type == DS_DEVICE_OPENCL_DEVICE) { *data = new cl_device_id; @@ -248,17 +248,17 @@ namespace tesseract { #endif #endif - *data = nullptr; - return 0; - } + *data = nullptr; + return 0; +} /** * Writes the thresholded image to stderr as a PBM file on receipt of a * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). */ - void TessBaseAPI::CatchSignals() { +void TessBaseAPI::CatchSignals() { #ifdef __linux__ - struct sigaction action; + struct sigaction action; memset(&action, 0, sizeof(action)); action.sa_handler = &signal_exit; action.sa_flags = SA_RESETHAND; @@ -266,81 +266,81 @@ namespace tesseract { sigaction(SIGFPE, &action, nullptr); sigaction(SIGBUS, &action, nullptr); #else - // Warn API users that an implementation is needed. - tprintf("CatchSignals has no non-linux implementation!\n"); + // Warn API users that an implementation is needed. + tprintf("CatchSignals has no non-linux implementation!\n"); #endif - } +} /** * Set the name of the input file. Needed only for training and * loading a UNLV zone file. */ - void TessBaseAPI::SetInputName(const char* name) { - if (input_file_ == nullptr) - input_file_ = new STRING(name); - else - *input_file_ = name; - } +void TessBaseAPI::SetInputName(const char* name) { + if (input_file_ == nullptr) + input_file_ = new STRING(name); + else + *input_file_ = name; +} /** Set the name of the output files. Needed only for debugging. */ - void TessBaseAPI::SetOutputName(const char* name) { - if (output_file_ == nullptr) - output_file_ = new STRING(name); - else - *output_file_ = name; - } - - bool TessBaseAPI::SetVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, - tesseract_->params()); - } - - bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { - if (tesseract_ == nullptr) tesseract_ = new Tesseract; - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, - tesseract_->params()); - } - - bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { - IntParam *p = ParamUtils::FindParam( - name, GlobalParams()->int_params, tesseract_->params()->int_params); - if (p == nullptr) return false; - *value = (int32_t)(*p); - return true; - } - - bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { - BoolParam *p = ParamUtils::FindParam( - name, GlobalParams()->bool_params, tesseract_->params()->bool_params); - if (p == nullptr) return false; - *value = (BOOL8)(*p); - return true; - } - - const char *TessBaseAPI::GetStringVariable(const char *name) const { - StringParam *p = ParamUtils::FindParam( - name, GlobalParams()->string_params, tesseract_->params()->string_params); - return (p != nullptr) ? p->string() : nullptr; - } - - bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { - DoubleParam *p = ParamUtils::FindParam( - name, GlobalParams()->double_params, tesseract_->params()->double_params); - if (p == nullptr) return false; - *value = (double)(*p); - return true; - } +void TessBaseAPI::SetOutputName(const char* name) { + if (output_file_ == nullptr) + output_file_ = new STRING(name); + else + *output_file_ = name; +} + +bool TessBaseAPI::SetVariable(const char* name, const char* value) { + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, + tesseract_->params()); +} + +bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, + tesseract_->params()); +} + +bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { + IntParam *p = ParamUtils::FindParam( + name, GlobalParams()->int_params, tesseract_->params()->int_params); + if (p == nullptr) return false; + *value = (int32_t)(*p); + return true; +} + +bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { + BoolParam *p = ParamUtils::FindParam( + name, GlobalParams()->bool_params, tesseract_->params()->bool_params); + if (p == nullptr) return false; + *value = (BOOL8)(*p); + return true; +} + +const char *TessBaseAPI::GetStringVariable(const char *name) const { + StringParam *p = ParamUtils::FindParam( + name, GlobalParams()->string_params, tesseract_->params()->string_params); + return (p != nullptr) ? p->string() : nullptr; +} + +bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { + DoubleParam *p = ParamUtils::FindParam( + name, GlobalParams()->double_params, tesseract_->params()->double_params); + if (p == nullptr) return false; + *value = (double)(*p); + return true; +} /** Get value of named variable as a string, if it exists. */ - bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { - return ParamUtils::GetParamAsString(name, tesseract_->params(), val); - } +bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { + return ParamUtils::GetParamAsString(name, tesseract_->params(), val); +} /** Print Tesseract parameters to the given file. */ - void TessBaseAPI::PrintVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params()); - } +void TessBaseAPI::PrintVariables(FILE *fp) const { + ParamUtils::PrintParams(fp, tesseract_->params()); +} /** * The datapath must be the name of the data directory (no ending /) or @@ -350,90 +350,90 @@ namespace tesseract { * be returned. * @return: 0 on success and -1 on initialization failure. */ - int TessBaseAPI::Init(const char* datapath, const char* language, - OcrEngineMode oem, char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params) { - return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, - vars_values, set_only_non_debug_params, nullptr); - } +int TessBaseAPI::Init(const char* datapath, const char* language, + OcrEngineMode oem, char **configs, int configs_size, + const GenericVector *vars_vec, + const GenericVector *vars_values, + bool set_only_non_debug_params) { + return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, + vars_values, set_only_non_debug_params, nullptr); +} // In-memory version reads the traineddata file directly from the given // data[data_size] array. Also implements the version with a datapath in data, // flagged by data_size = 0. - int TessBaseAPI::Init(const char* data, int data_size, const char* language, - OcrEngineMode oem, char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_non_debug_params, FileReader reader) { - PERF_COUNT_START("TessBaseAPI::Init") - // Default language is "eng". - if (language == nullptr) language = "eng"; - STRING datapath = data_size == 0 ? data : language; - // If the datapath, OcrEngineMode or the language have changed - start again. - // Note that the language_ field stores the last requested language that was - // initialized successfully, while tesseract_->lang stores the language - // actually used. They differ only if the requested language was nullptr, in - // which case tesseract_->lang is set to the Tesseract default ("eng"). - if (tesseract_ != nullptr && - (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || - last_oem_requested_ != oem || - (*language_ != language && tesseract_->lang != language))) { - delete tesseract_; - tesseract_ = nullptr; - } - // PERF_COUNT_SUB("delete tesseract_") +int TessBaseAPI::Init(const char* data, int data_size, const char* language, + OcrEngineMode oem, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, + bool set_only_non_debug_params, FileReader reader) { + PERF_COUNT_START("TessBaseAPI::Init") + // Default language is "eng". + if (language == nullptr) language = "eng"; + STRING datapath = data_size == 0 ? data : language; + // If the datapath, OcrEngineMode or the language have changed - start again. + // Note that the language_ field stores the last requested language that was + // initialized successfully, while tesseract_->lang stores the language + // actually used. They differ only if the requested language was nullptr, in + // which case tesseract_->lang is set to the Tesseract default ("eng"). + if (tesseract_ != nullptr && + (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || + last_oem_requested_ != oem || + (*language_ != language && tesseract_->lang != language))) { + delete tesseract_; + tesseract_ = nullptr; + } + // PERF_COUNT_SUB("delete tesseract_") #ifdef USE_OPENCL - OpenclDevice od; + OpenclDevice od; od.InitEnv(); #endif - PERF_COUNT_SUB("OD::InitEnv()") - bool reset_classifier = true; - if (tesseract_ == nullptr) { - reset_classifier = false; - tesseract_ = new Tesseract; - if (reader != nullptr) reader_ = reader; - TessdataManager mgr(reader_); - if (data_size != 0) { - mgr.LoadMemBuffer(language, data, data_size); - } - if (tesseract_->init_tesseract( - datapath.string(), - output_file_ != nullptr ? output_file_->string() : nullptr, - language, oem, configs, configs_size, vars_vec, vars_values, - set_only_non_debug_params, &mgr) != 0) { - return -1; - } - } - - PERF_COUNT_SUB("update tesseract_") - // Update datapath and language requested for the last valid initialization. - if (datapath_ == nullptr) - datapath_ = new STRING(datapath); - else - *datapath_ = datapath; - if ((strcmp(datapath_->string(), "") == 0) && - (strcmp(tesseract_->datadir.string(), "") != 0)) - *datapath_ = tesseract_->datadir; + PERF_COUNT_SUB("OD::InitEnv()") + bool reset_classifier = true; + if (tesseract_ == nullptr) { + reset_classifier = false; + tesseract_ = new Tesseract; + if (reader != nullptr) reader_ = reader; + TessdataManager mgr(reader_); + if (data_size != 0) { + mgr.LoadMemBuffer(language, data, data_size); + } + if (tesseract_->init_tesseract( + datapath.string(), + output_file_ != nullptr ? output_file_->string() : nullptr, + language, oem, configs, configs_size, vars_vec, vars_values, + set_only_non_debug_params, &mgr) != 0) { + return -1; + } + } - if (language_ == nullptr) - language_ = new STRING(language); - else - *language_ = language; - last_oem_requested_ = oem; + PERF_COUNT_SUB("update tesseract_") + // Update datapath and language requested for the last valid initialization. + if (datapath_ == nullptr) + datapath_ = new STRING(datapath); + else + *datapath_ = datapath; + if ((strcmp(datapath_->string(), "") == 0) && + (strcmp(tesseract_->datadir.string(), "") != 0)) + *datapath_ = tesseract_->datadir; + + if (language_ == nullptr) + language_ = new STRING(language); + else + *language_ = language; + last_oem_requested_ = oem; #ifndef DISABLED_LEGACY_ENGINE - // PERF_COUNT_SUB("update last_oem_requested_") - // For same language and datapath, just reset the adaptive classifier. - if (reset_classifier) { - tesseract_->ResetAdaptiveClassifier(); - PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") - } + // PERF_COUNT_SUB("update last_oem_requested_") + // For same language and datapath, just reset the adaptive classifier. + if (reset_classifier) { + tesseract_->ResetAdaptiveClassifier(); + PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") + } #endif // ndef DISABLED_LEGACY_ENGINE - PERF_COUNT_END - return 0; - } + PERF_COUNT_END + return 0; +} /** * Returns the languages string used in the last valid initialization. @@ -443,38 +443,38 @@ namespace tesseract { * loaded use GetLoadedLanguagesAsVector. * The returned string should NOT be deleted. */ - const char* TessBaseAPI::GetInitLanguagesAsString() const { - return (language_ == nullptr || language_->string() == nullptr) ? - "" : language_->string(); - } +const char* TessBaseAPI::GetInitLanguagesAsString() const { + return (language_ == nullptr || language_->string() == nullptr) ? + "" : language_->string(); +} /** * Returns the loaded languages in the vector of STRINGs. * Includes all languages loaded by the last Init, including those loaded * as dependencies of other loaded languages. */ - void TessBaseAPI::GetLoadedLanguagesAsVector( - GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - langs->push_back(tesseract_->lang); - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) - langs->push_back(tesseract_->get_sub_lang(i)->lang); - } - } +void TessBaseAPI::GetLoadedLanguagesAsVector( + GenericVector* langs) const { + langs->clear(); + if (tesseract_ != nullptr) { + langs->push_back(tesseract_->lang); + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) + langs->push_back(tesseract_->get_sub_lang(i)->lang); + } +} /** * Returns the available languages in the sorted vector of STRINGs. */ - void TessBaseAPI::GetAvailableLanguagesAsVector( - GenericVector* langs) const { - langs->clear(); - if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir, "", langs); - langs->sort(CompareSTRING); - } - } +void TessBaseAPI::GetAvailableLanguagesAsVector( + GenericVector* langs) const { + langs->clear(); + if (tesseract_ != nullptr) { + addAvailableLanguages(tesseract_->datadir, "", langs); + langs->sort(CompareSTRING); + } +} //TODO(amit): Adapt to lstm #ifndef DISABLED_LEGACY_ENGINE @@ -484,61 +484,61 @@ namespace tesseract { * WARNING: temporary! This function will be removed from here and placed * in a separate API at some future time. */ - int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - else - ParamUtils::ResetToDefaults(tesseract_->params()); - TessdataManager mgr; - return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); - } +int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + else + ParamUtils::ResetToDefaults(tesseract_->params()); + TessdataManager mgr; + return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); +} #endif // ndef DISABLED_LEGACY_ENGINE /** * Init only for page layout analysis. Use only for calls to SetImage and * AnalysePage. Calls that attempt recognition will generate an error. */ - void TessBaseAPI::InitForAnalysePage() { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; -#ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); -#endif - } - } +void TessBaseAPI::InitForAnalysePage() { + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; + #ifndef DISABLED_LEGACY_ENGINE + tesseract_->InitAdaptiveClassifier(nullptr); + #endif + } +} /** * Read a "config" file containing a set of parameter name, value pairs. * Searches the standard places: tessdata/configs, tessdata/tessconfigs * and also accepts a relative or absolute path name. */ - void TessBaseAPI::ReadConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); - } +void TessBaseAPI::ReadConfigFile(const char* filename) { + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); +} /** Same as above, but only set debug params from the given config file. */ - void TessBaseAPI::ReadDebugConfigFile(const char* filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); - } +void TessBaseAPI::ReadDebugConfigFile(const char* filename) { + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); +} /** * Set the current page segmentation mode. Defaults to PSM_AUTO. * The mode is stored as an IntParam so it can also be modified by * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ - void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) - tesseract_ = new Tesseract; - tesseract_->tessedit_pageseg_mode.set_value(mode); - } +void TessBaseAPI::SetPageSegMode(PageSegMode mode) { + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + tesseract_->tessedit_pageseg_mode.set_value(mode); +} /** Return the current page segmentation mode. */ - PageSegMode TessBaseAPI::GetPageSegMode() const { - if (tesseract_ == nullptr) - return PSM_SINGLE_BLOCK; - return static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); - } +PageSegMode TessBaseAPI::GetPageSegMode() const { + if (tesseract_ == nullptr) + return PSM_SINGLE_BLOCK; + return static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); +} /** * Recognize a rectangle from an image and return the result as a string. @@ -553,35 +553,35 @@ namespace tesseract { * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. */ - char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, - int left, int top, - int width, int height) { - if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) - return nullptr; // Nothing worth doing. - - // Since this original api didn't give the exact size of the image, - // we have to invent a reasonable value. - int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; - SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, - bytes_per_pixel, bytes_per_line); - SetRectangle(left, top, width, height); - - return GetUTF8Text(); - } +char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + int left, int top, + int width, int height) { + if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) + return nullptr; // Nothing worth doing. + + // Since this original api didn't give the exact size of the image, + // we have to invent a reasonable value. + int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; + SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, + bytes_per_pixel, bytes_per_line); + SetRectangle(left, top, width, height); + + return GetUTF8Text(); +} #ifndef DISABLED_LEGACY_ENGINE /** * Call between pages or documents etc to free up memory and forget * adaptive data. */ - void TessBaseAPI::ClearAdaptiveClassifier() { - if (tesseract_ == nullptr) - return; - tesseract_->ResetAdaptiveClassifier(); - tesseract_->ResetDocumentDictionary(); - } +void TessBaseAPI::ClearAdaptiveClassifier() { + if (tesseract_ == nullptr) + return; + tesseract_->ResetAdaptiveClassifier(); + tesseract_->ResetDocumentDictionary(); +} #endif // ndef DISABLED_LEGACY_ENGINE /** @@ -591,22 +591,22 @@ namespace tesseract { * full image, so it may be followed immediately by a GetUTF8Text, and it * will automatically perform recognition. */ - void TessBaseAPI::SetImage(const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, int bytes_per_line) { - if (InternalSetImage()) { - thresholder_->SetImage(imagedata, width, height, - bytes_per_pixel, bytes_per_line); - SetInputImage(thresholder_->GetPixRect()); - } - } +void TessBaseAPI::SetImage(const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, int bytes_per_line) { + if (InternalSetImage()) { + thresholder_->SetImage(imagedata, width, height, + bytes_per_pixel, bytes_per_line); + SetInputImage(thresholder_->GetPixRect()); + } +} - void TessBaseAPI::SetSourceResolution(int ppi) { - if (thresholder_) - thresholder_->SetSourceYResolution(ppi); - else - tprintf("Please call SetImage before SetSourceResolution.\n"); - } +void TessBaseAPI::SetSourceResolution(int ppi) { + if (thresholder_) + thresholder_->SetSourceYResolution(ppi); + else + tprintf("Please call SetImage before SetSourceResolution.\n"); +} /** * Provide an image for Tesseract to recognize. As with SetImage above, @@ -616,53 +616,53 @@ namespace tesseract { * Use Pix where possible. Tesseract uses Pix as its internal representation * and it is therefore more efficient to provide a Pix directly. */ - void TessBaseAPI::SetImage(Pix* pix) { - if (InternalSetImage()) { - if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { - // remove alpha channel from png - PIX* p1 = pixRemoveAlpha(pix); - pixSetSpp(p1, 3); - pix = pixCopy(nullptr, p1); - pixDestroy(&p1); - } - thresholder_->SetImage(pix); - SetInputImage(thresholder_->GetPixRect()); - } - } +void TessBaseAPI::SetImage(Pix* pix) { + if (InternalSetImage()) { + if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { + // remove alpha channel from png + PIX* p1 = pixRemoveAlpha(pix); + pixSetSpp(p1, 3); + pix = pixCopy(nullptr, p1); + pixDestroy(&p1); + } + thresholder_->SetImage(pix); + SetInputImage(thresholder_->GetPixRect()); + } +} /** * Restrict recognition to a sub-rectangle of the image. Call after SetImage. * Each SetRectangle clears the recogntion results so multiple rectangles * can be recognized with the same image. */ - void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) - return; - thresholder_->SetRectangle(left, top, width, height); - ClearResults(); - } +void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { + if (thresholder_ == nullptr) + return; + thresholder_->SetRectangle(left, top, width, height); + ClearResults(); +} /** * ONLY available after SetImage if you have Leptonica installed. * Get a copy of the internal thresholded image from Tesseract. */ - Pix* TessBaseAPI::GetThresholdedImage() { - if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return nullptr; - } - return pixClone(tesseract_->pix_binary()); - } +Pix* TessBaseAPI::GetThresholdedImage() { + if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return nullptr; + } + return pixClone(tesseract_->pix_binary()); +} /** * Get the result of page layout analysis as a leptonica-style * Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. */ - Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { - return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); - } +Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { + return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); +} /** * Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. @@ -672,11 +672,11 @@ namespace tesseract { * If paraids is not nullptr, the paragraph-id of each line within its block is * also returned as an array of one element per line. delete [] after use. */ - Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, - Pixa** pixa, int** blockids, int** paraids) { - return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, - pixa, blockids, paraids); - } +Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, + Pixa** pixa, int** blockids, int** paraids) { + return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, + pixa, blockids, paraids); +} /** * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa @@ -686,18 +686,18 @@ namespace tesseract { * If blockids is not nullptr, the block-id of each line is also returned as an * array of one element per line. delete [] after use. */ - Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { - return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); - } +Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { + return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); +} /** * Get the words as a leptonica-style * Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. */ - Boxa* TessBaseAPI::GetWords(Pixa** pixa) { - return GetComponentImages(RIL_WORD, true, pixa, nullptr); - } +Boxa* TessBaseAPI::GetWords(Pixa** pixa) { + return GetComponentImages(RIL_WORD, true, pixa, nullptr); +} /** * Gets the individual connected (text) components (created @@ -705,9 +705,9 @@ namespace tesseract { * as a leptonica-style Boxa, Pixa pair, in reading order. * Can be called before or after Recognize. */ - Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { - return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); - } +Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { + return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); +} /** * Get the given level kind of components (block, textline, word etc.) as a @@ -717,94 +717,94 @@ namespace tesseract { * as an array of one element per component. delete [] after use. * If text_only is true, then only text components are returned. */ - Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, - bool text_only, bool raw_image, - const int raw_padding, - Pixa** pixa, int** blockids, - int** paraids) { - PageIterator* page_it = GetIterator(); - if (page_it == nullptr) - page_it = AnalyseLayout(); - if (page_it == nullptr) - return nullptr; // Failed. - - // Count the components to get a size for the arrays. - int component_count = 0; - int left, top, right, bottom; - - TessResultCallback* get_bbox = nullptr; +Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, + bool text_only, bool raw_image, + const int raw_padding, + Pixa** pixa, int** blockids, + int** paraids) { + PageIterator* page_it = GetIterator(); + if (page_it == nullptr) + page_it = AnalyseLayout(); + if (page_it == nullptr) + return nullptr; // Failed. + + // Count the components to get a size for the arrays. + int component_count = 0; + int left, top, right, bottom; + + TessResultCallback* get_bbox = nullptr; + if (raw_image) { + // Get bounding box in original raw image with padding. + get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, + level, raw_padding, + &left, &top, &right, &bottom); + } else { + // Get bounding box from binarized imaged. Note that this could be + // differently scaled from the original image. + get_bbox = NewPermanentTessCallback(page_it, + &PageIterator::BoundingBoxInternal, + level, &left, &top, &right, &bottom); + } + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) + ++component_count; + } while (page_it->Next(level)); + + Boxa* boxa = boxaCreate(component_count); + if (pixa != nullptr) + *pixa = pixaCreate(component_count); + if (blockids != nullptr) + *blockids = new int[component_count]; + if (paraids != nullptr) + *paraids = new int[component_count]; + + int blockid = 0; + int paraid = 0; + int component_index = 0; + page_it->Begin(); + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) { + Box* lbox = boxCreate(left, top, right - left, bottom - top); + boxaAddBox(boxa, lbox, L_INSERT); + if (pixa != nullptr) { + Pix* pix = nullptr; if (raw_image) { - // Get bounding box in original raw image with padding. - get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, - level, raw_padding, - &left, &top, &right, &bottom); + pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, + &top); } else { - // Get bounding box from binarized imaged. Note that this could be - // differently scaled from the original image. - get_bbox = NewPermanentTessCallback(page_it, - &PageIterator::BoundingBoxInternal, - level, &left, &top, &right, &bottom); + pix = page_it->GetBinaryImage(level); } - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) - ++component_count; - } while (page_it->Next(level)); - - Boxa* boxa = boxaCreate(component_count); - if (pixa != nullptr) - *pixa = pixaCreate(component_count); - if (blockids != nullptr) - *blockids = new int[component_count]; - if (paraids != nullptr) - *paraids = new int[component_count]; - - int blockid = 0; - int paraid = 0; - int component_index = 0; - page_it->Begin(); - do { - if (get_bbox->Run() && - (!text_only || PTIsTextType(page_it->BlockType()))) { - Box* lbox = boxCreate(left, top, right - left, bottom - top); - boxaAddBox(boxa, lbox, L_INSERT); - if (pixa != nullptr) { - Pix* pix = nullptr; - if (raw_image) { - pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, - &top); - } else { - pix = page_it->GetBinaryImage(level); - } - pixaAddPix(*pixa, pix, L_INSERT); - pixaAddBox(*pixa, lbox, L_CLONE); - } - if (paraids != nullptr) { - (*paraids)[component_index] = paraid; - if (page_it->IsAtFinalElement(RIL_PARA, level)) - ++paraid; - } - if (blockids != nullptr) { - (*blockids)[component_index] = blockid; - if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { - ++blockid; - paraid = 0; - } - } - ++component_index; - } - } while (page_it->Next(level)); - delete page_it; - delete get_bbox; - return boxa; - } - - int TessBaseAPI::GetThresholdedImageScaleFactor() const { - if (thresholder_ == nullptr) { - return 0; + pixaAddPix(*pixa, pix, L_INSERT); + pixaAddBox(*pixa, lbox, L_CLONE); + } + if (paraids != nullptr) { + (*paraids)[component_index] = paraid; + if (page_it->IsAtFinalElement(RIL_PARA, level)) + ++paraid; + } + if (blockids != nullptr) { + (*blockids)[component_index] = blockid; + if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { + ++blockid; + paraid = 0; } - return thresholder_->GetScaleFactor(); + } + ++component_index; } + } while (page_it->Next(level)); + delete page_it; + delete get_bbox; + return boxa; +} + +int TessBaseAPI::GetThresholdedImageScaleFactor() const { + if (thresholder_ == nullptr) { + return 0; + } + return thresholder_->GetScaleFactor(); +} /** * Runs page layout analysis in the mode set by SetPageSegMode. @@ -821,282 +821,282 @@ namespace tesseract { * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ - PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } - - PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { - if (FindLines() == 0) { - if (block_list_->empty()) - return nullptr; // The page was empty. - page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); - DetectParagraphs(false); - return new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - } - return nullptr; - } +PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } + +PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { + if (FindLines() == 0) { + if (block_list_->empty()) + return nullptr; // The page was empty. + page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); + DetectParagraphs(false); + return new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + } + return nullptr; +} /** * Recognize the tesseract global image and return the result as Tesseract * internal structures. */ - int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (FindLines() != 0) - return -1; - delete page_res_; - if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, - &tesseract_->prev_word_best_choice_); - return 0; // Empty page. - } +int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { + if (tesseract_ == nullptr) + return -1; + if (FindLines() != 0) + return -1; + delete page_res_; + if (block_list_->empty()) { + page_res_ = new PAGE_RES(false, block_list_, + &tesseract_->prev_word_best_choice_); + return 0; // Empty page. + } - tesseract_->SetBlackAndWhitelist(); - recognition_done_ = true; + tesseract_->SetBlackAndWhitelist(); + recognition_done_ = true; #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_resegment_from_line_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); - } else if (tesseract_->tessedit_resegment_from_boxes) { - page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); - } else + if (tesseract_->tessedit_resegment_from_line_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); + } else if (tesseract_->tessedit_resegment_from_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); + } else #endif // ndef DISABLED_LEGACY_ENGINE - { - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), - block_list_, &tesseract_->prev_word_best_choice_); - } + { + page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), + block_list_, &tesseract_->prev_word_best_choice_); + } - if (page_res_ == nullptr) { - return -1; - } + if (page_res_ == nullptr) { + return -1; + } - if (tesseract_->tessedit_train_line_recognizer) { - tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } + if (tesseract_->tessedit_train_line_recognizer) { + tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_); + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_make_boxes_from_boxes) { - tesseract_->CorrectClassifyWords(page_res_); - return 0; - } + if (tesseract_->tessedit_make_boxes_from_boxes) { + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } #endif // ndef DISABLED_LEGACY_ENGINE - if (truth_cb_ != nullptr) { - tesseract_->wordrec_run_blamer.set_value(true); - PageIterator *page_it = new PageIterator( - page_res_, tesseract_, thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - truth_cb_->Run(tesseract_->getDict().getUnicharset(), - image_height_, page_it, this->tesseract()->pix_grey()); - delete page_it; - } + if (truth_cb_ != nullptr) { + tesseract_->wordrec_run_blamer.set_value(true); + PageIterator *page_it = new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + truth_cb_->Run(tesseract_->getDict().getUnicharset(), + image_height_, page_it, this->tesseract()->pix_grey()); + delete page_it; + } - int result = 0; - if (tesseract_->interactive_display_mode) { -#ifndef GRAPHICS_DISABLED - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); -#endif // GRAPHICS_DISABLED - // The page_res is invalid after an interactive session, so cleanup - // in a way that lets us continue to the next page without crashing. - delete page_res_; - page_res_ = nullptr; - return -1; -#ifndef DISABLED_LEGACY_ENGINE - } else if (tesseract_->tessedit_train_from_boxes) { - STRING fontname; - ExtractFontName(*output_file_, &fontname); - tesseract_->ApplyBoxTraining(fontname, page_res_); - } else if (tesseract_->tessedit_ambigs_training) { - FILE *training_output_file = tesseract_->init_recog_training(*input_file_); - // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented( - *input_file_, page_res_, monitor, training_output_file); - fclose(training_output_file); -#endif // ndef DISABLED_LEGACY_ENGINE - } else { - // Now run the main recognition. - bool wait_for_text = true; - GetBoolVariable("paragraph_text_based", &wait_for_text); - if (!wait_for_text) DetectParagraphs(false); - if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { - if (wait_for_text) DetectParagraphs(true); - } else { - result = -1; - } - } - return result; + int result = 0; + if (tesseract_->interactive_display_mode) { + #ifndef GRAPHICS_DISABLED + tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + #endif // GRAPHICS_DISABLED + // The page_res is invalid after an interactive session, so cleanup + // in a way that lets us continue to the next page without crashing. + delete page_res_; + page_res_ = nullptr; + return -1; + #ifndef DISABLED_LEGACY_ENGINE + } else if (tesseract_->tessedit_train_from_boxes) { + STRING fontname; + ExtractFontName(*output_file_, &fontname); + tesseract_->ApplyBoxTraining(fontname, page_res_); + } else if (tesseract_->tessedit_ambigs_training) { + FILE *training_output_file = tesseract_->init_recog_training(*input_file_); + // OCR the page segmented into words by tesseract. + tesseract_->recog_training_segmented( + *input_file_, page_res_, monitor, training_output_file); + fclose(training_output_file); + #endif // ndef DISABLED_LEGACY_ENGINE + } else { + // Now run the main recognition. + bool wait_for_text = true; + GetBoolVariable("paragraph_text_based", &wait_for_text); + if (!wait_for_text) DetectParagraphs(false); + if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { + if (wait_for_text) DetectParagraphs(true); + } else { + result = -1; } + } + return result; +} #ifndef DISABLED_LEGACY_ENGINE /** Tests the chopper by exhaustively running chop_one_blob. */ - int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { - if (tesseract_ == nullptr) - return -1; - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (page_res_ != nullptr) - ClearResults(); - if (FindLines() != 0) - return -1; - // Additional conditions under which chopper test cannot be run - if (tesseract_->interactive_display_mode) return -1; - - recognition_done_ = true; - - page_res_ = new PAGE_RES(false, block_list_, - &(tesseract_->prev_word_best_choice_)); - - PAGE_RES_IT page_res_it(page_res_); - - while (page_res_it.word() != nullptr) { - WERD_RES *word_res = page_res_it.word(); - GenericVector boxes; - tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, - page_res_it.row()->row, word_res); - page_res_it.forward(); - } - return 0; - } +int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { + if (tesseract_ == nullptr) + return -1; + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (page_res_ != nullptr) + ClearResults(); + if (FindLines() != 0) + return -1; + // Additional conditions under which chopper test cannot be run + if (tesseract_->interactive_display_mode) return -1; + + recognition_done_ = true; + + page_res_ = new PAGE_RES(false, block_list_, + &(tesseract_->prev_word_best_choice_)); + + PAGE_RES_IT page_res_it(page_res_); + + while (page_res_it.word() != nullptr) { + WERD_RES *word_res = page_res_it.word(); + GenericVector boxes; + tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, + page_res_it.row()->row, word_res); + page_res_it.forward(); + } + return 0; +} #endif // ndef DISABLED_LEGACY_ENGINE // Takes ownership of the input pix. - void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } +void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } - Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } +Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } - const char * TessBaseAPI::GetInputName() { - if (input_file_) - return input_file_->c_str(); - return nullptr; - } +const char * TessBaseAPI::GetInputName() { + if (input_file_) + return input_file_->c_str(); + return nullptr; +} - const char * TessBaseAPI::GetDatapath() { - return tesseract_->datadir.c_str(); - } +const char * TessBaseAPI::GetDatapath() { + return tesseract_->datadir.c_str(); +} - int TessBaseAPI::GetSourceYResolution() { - return thresholder_->GetSourceYResolution(); - } +int TessBaseAPI::GetSourceYResolution() { + return thresholder_->GetSourceYResolution(); +} // If flist exists, get data from there. Otherwise get data from buf. // Seems convoluted, but is the easiest way I know of to meet multiple // goals. Support streaming from stdin, and also work on platforms // lacking fmemopen. - bool TessBaseAPI::ProcessPagesFileList(FILE *flist, - STRING *buf, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number) { - if (!flist && !buf) return false; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - char pagename[MAX_PATH]; - - GenericVector lines; - if (!flist) { - buf->split('\n', &lines); - if (lines.empty()) return false; - } - - // Skip to the requested page number. - for (int i = 0; i < page; i++) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } - } +bool TessBaseAPI::ProcessPagesFileList(FILE *flist, + STRING *buf, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number) { + if (!flist && !buf) return false; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + char pagename[MAX_PATH]; + + GenericVector lines; + if (!flist) { + buf->split('\n', &lines); + if (lines.empty()) return false; + } - // Begin producing output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - return false; - } + // Skip to the requested page number. + for (int i = 0; i < page; i++) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } + } - // Loop over all pages - or just the requested one - while (true) { - if (flist) { - if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; - } else { - if (page >= lines.size()) break; - snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); - } - chomp_string(pagename); - Pix *pix = pixRead(pagename); - if (pix == nullptr) { - tprintf("Image file %s cannot be read!\n", pagename); - return false; - } - tprintf("Page %d : %s\n", page, pagename); - bool r = ProcessPage(pix, page, pagename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - ++page; - } + // Begin producing output + if (renderer && !renderer->BeginDocument(unknown_title_)) { + return false; + } - // Finish producing output - if (renderer && !renderer->EndDocument()) { - return false; - } - return true; - } + // Loop over all pages - or just the requested one + while (true) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } else { + if (page >= lines.size()) break; + snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); + } + chomp_string(pagename); + Pix *pix = pixRead(pagename); + if (pix == nullptr) { + tprintf("Image file %s cannot be read!\n", pagename); + return false; + } + tprintf("Page %d : %s\n", page, pagename); + bool r = ProcessPage(pix, page, pagename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + ++page; + } - bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, - size_t size, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number) { + // Finish producing output + if (renderer && !renderer->EndDocument()) { + return false; + } + return true; +} + +bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, + size_t size, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number) { #ifndef ANDROID_BUILD - Pix *pix = nullptr; - int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; - size_t offset = 0; - for (; ; ++page) { - if (tessedit_page_number >= 0) - page = tessedit_page_number; - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); - if (pix == nullptr) break; - tprintf("Page %d\n", page + 1); - char page_str[kMaxIntSize]; - snprintf(page_str, kMaxIntSize - 1, "%d", page); - SetVariable("applybox_page", page_str); - bool r = ProcessPage(pix, page, filename, retry_config, - timeout_millisec, renderer); - pixDestroy(&pix); - if (!r) return false; - if (tessedit_page_number >= 0) break; - if (!offset) break; - } - return true; + Pix *pix = nullptr; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + size_t offset = 0; + for (; ; ++page) { + if (tessedit_page_number >= 0) + page = tessedit_page_number; + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); + if (pix == nullptr) break; + tprintf("Page %d\n", page + 1); + char page_str[kMaxIntSize]; + snprintf(page_str, kMaxIntSize - 1, "%d", page); + SetVariable("applybox_page", page_str); + bool r = ProcessPage(pix, page, filename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + if (!offset) break; + } + return true; #else - return false; + return false; #endif - } +} // Master ProcessPages calls ProcessPagesInternal and then does any post- // processing required due to being in a training mode. - bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer) { - bool result = - ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); -#ifndef DISABLED_LEGACY_ENGINE - if (result) { - if (tesseract_->tessedit_train_from_boxes && - !tesseract_->WriteTRFile(*output_file_)) { - tprintf("Write of TR file failed: %s\n", output_file_->string()); - return false; - } - } -#endif // ndef DISABLED_LEGACY_ENGINE - return result; +bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + bool result = + ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); + #ifndef DISABLED_LEGACY_ENGINE + if (result) { + if (tesseract_->tessedit_train_from_boxes && + !tesseract_->WriteTRFile(*output_file_)) { + tprintf("Write of TR file failed: %s\n", output_file_->string()); + return false; } + } + #endif // ndef DISABLED_LEGACY_ENGINE + return result; +} // In the ideal scenario, Tesseract will start working on data as soon // as it can. For example, if you stream a filelist through stdin, we @@ -1109,184 +1109,184 @@ namespace tesseract { // impractical. So we support a command line flag to explicitly // identify the scenario that really matters: filelists on // stdin. We'll still do our best if the user likes pipes. - bool TessBaseAPI::ProcessPagesInternal(const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPages") - bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); - if (stdInput) { +bool TessBaseAPI::ProcessPagesInternal(const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + PERF_COUNT_START("ProcessPages") + bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); + if (stdInput) { #ifdef WIN32 - if (_setmode(_fileno(stdin), _O_BINARY) == -1) + if (_setmode(_fileno(stdin), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); #endif // WIN32 - } - - if (stream_filelist) { - return ProcessPagesFileList(stdin, nullptr, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } - - // At this point we are officially in autodection territory. - // That means any data in stdin must be buffered, to make it - // seekable. - std::string buf; - const l_uint8 *data = nullptr; - if (stdInput) { - buf.assign((std::istreambuf_iterator(std::cin)), - (std::istreambuf_iterator())); - data = reinterpret_cast(buf.data()); - } else { - // Check whether the input file can be read. - if (FILE* file = fopen(filename, "rb")) { - fclose(file); - } else { - fprintf(stderr, "Error, cannot read input file %s: %s\n", - filename, strerror(errno)); - return false; - } - } - - // Here is our autodetection - int format; - int r = (stdInput) ? - findFileFormatBuffer(data, &format) : - findFileFormat(filename, &format); - - // Maybe we have a filelist - if (r != 0 || format == IFF_UNKNOWN) { - STRING s; - if (stdInput) { - s = buf.c_str(); - } else { - std::ifstream t(filename); - std::string u((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - s = u.c_str(); - } - return ProcessPagesFileList(nullptr, &s, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number); - } + } - // Maybe we have a TIFF which is potentially multipage - bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || - format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || - format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || - format == IFF_TIFF_ZIP); - - // Fail early if we can, before producing any output - Pix *pix = nullptr; - if (!tiff) { - pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); - if (pix == nullptr) { - return false; - } - } + if (stream_filelist) { + return ProcessPagesFileList(stdin, nullptr, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } - // Begin the output - if (renderer && !renderer->BeginDocument(unknown_title_)) { - pixDestroy(&pix); - return false; - } + // At this point we are officially in autodection territory. + // That means any data in stdin must be buffered, to make it + // seekable. + std::string buf; + const l_uint8 *data = nullptr; + if (stdInput) { + buf.assign((std::istreambuf_iterator(std::cin)), + (std::istreambuf_iterator())); + data = reinterpret_cast(buf.data()); + } else { + // Check whether the input file can be read. + if (FILE* file = fopen(filename, "rb")) { + fclose(file); + } else { + fprintf(stderr, "Error, cannot read input file %s: %s\n", + filename, strerror(errno)); + return false; + } + } - // Produce output - r = (tiff) ? - ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, - timeout_millisec, renderer, - tesseract_->tessedit_page_number) : - ProcessPage(pix, 0, filename, retry_config, - timeout_millisec, renderer); + // Here is our autodetection + int format; + int r = (stdInput) ? + findFileFormatBuffer(data, &format) : + findFileFormat(filename, &format); + + // Maybe we have a filelist + if (r != 0 || format == IFF_UNKNOWN) { + STRING s; + if (stdInput) { + s = buf.c_str(); + } else { + std::ifstream t(filename); + std::string u((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + s = u.c_str(); + } + return ProcessPagesFileList(nullptr, &s, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } - // Clean up memory as needed - pixDestroy(&pix); + // Maybe we have a TIFF which is potentially multipage + bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || + format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || + format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || + format == IFF_TIFF_ZIP); - // End the output - if (!r || (renderer && !renderer->EndDocument())) { - return false; - } - PERF_COUNT_END - return true; + // Fail early if we can, before producing any output + Pix *pix = nullptr; + if (!tiff) { + pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename); + if (pix == nullptr) { + return false; } + } - bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer) { - PERF_COUNT_START("ProcessPage") - SetInputName(filename); - SetImage(pix); - bool failed = false; + // Begin the output + if (renderer && !renderer->BeginDocument(unknown_title_)) { + pixDestroy(&pix); + return false; + } - if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { - // Disabled character recognition - PageIterator* it = AnalyseLayout(); + // Produce output + r = (tiff) ? + ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number) : + ProcessPage(pix, 0, filename, retry_config, + timeout_millisec, renderer); - if (it == nullptr) { - failed = true; - } else { - delete it; - } - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { - failed = FindLines() != 0; - } else if (timeout_millisec > 0) { - // Running with a timeout. - ETEXT_DESC monitor; - monitor.cancel = nullptr; - monitor.cancel_this = nullptr; - monitor.set_deadline_msecs(timeout_millisec); - - // Now run the main recognition. - failed = Recognize(&monitor) < 0; - } else { - // Normal layout and character recognition with no timeout. - failed = Recognize(nullptr) < 0; - } + // Clean up memory as needed + pixDestroy(&pix); - if (tesseract_->tessedit_write_images) { + // End the output + if (!r || (renderer && !renderer->EndDocument())) { + return false; + } + PERF_COUNT_END + return true; +} + +bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer) { + PERF_COUNT_START("ProcessPage") + SetInputName(filename); + SetImage(pix); + bool failed = false; + + if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + // Disabled character recognition + PageIterator* it = AnalyseLayout(); + + if (it == nullptr) { + failed = true; + } else { + delete it; + } + } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + failed = FindLines() != 0; + } else if (timeout_millisec > 0) { + // Running with a timeout. + ETEXT_DESC monitor; + monitor.cancel = nullptr; + monitor.cancel_this = nullptr; + monitor.set_deadline_msecs(timeout_millisec); + + // Now run the main recognition. + failed = Recognize(&monitor) < 0; + } else { + // Normal layout and character recognition with no timeout. + failed = Recognize(nullptr) < 0; + } + + if (tesseract_->tessedit_write_images) { #ifndef ANDROID_BUILD - Pix* page_pix = GetThresholdedImage(); - pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); + Pix* page_pix = GetThresholdedImage(); + pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); #endif // ANDROID_BUILD - } + } - if (failed && retry_config != nullptr && retry_config[0] != '\0') { - // Save current config variables before switching modes. - FILE* fp = fopen(kOldVarsFile, "wb"); - if (fp == nullptr) { - tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); - } else { - PrintVariables(fp); - fclose(fp); - } - // Switch to alternate mode for retry. - ReadConfigFile(retry_config); - SetImage(pix); - Recognize(nullptr); - // Restore saved config variables. - ReadConfigFile(kOldVarsFile); - } + if (failed && retry_config != nullptr && retry_config[0] != '\0') { + // Save current config variables before switching modes. + FILE* fp = fopen(kOldVarsFile, "wb"); + if (fp == nullptr) { + tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); + } else { + PrintVariables(fp); + fclose(fp); + } + // Switch to alternate mode for retry. + ReadConfigFile(retry_config); + SetImage(pix); + Recognize(nullptr); + // Restore saved config variables. + ReadConfigFile(kOldVarsFile); + } - if (renderer && !failed) { - failed = !renderer->AddImage(this); - } + if (renderer && !failed) { + failed = !renderer->AddImage(this); + } - PERF_COUNT_END - return !failed; - } + PERF_COUNT_END + return !failed; +} /** * Get a left-to-right iterator to the results of LayoutAnalysis and/or * Recognize. The returned iterator must be deleted after use. */ - LTRResultIterator* TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - } +LTRResultIterator* TessBaseAPI::GetLTRIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); +} /** * Get a reading-order iterator to the results of LayoutAnalysis and/or @@ -1296,14 +1296,14 @@ namespace tesseract { * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ - ResultIterator* TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return ResultIterator::StartOfParagraph(LTRResultIterator( - page_res_, tesseract_, - thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_)); - } +ResultIterator* TessBaseAPI::GetIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return ResultIterator::StartOfParagraph(LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_)); +} /** * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. @@ -1313,45 +1313,45 @@ namespace tesseract { * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ - MutableIterator* TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) - return nullptr; - return new MutableIterator(page_res_, tesseract_, - thresholder_->GetScaleFactor(), - thresholder_->GetScaledYResolution(), - rect_left_, rect_top_, rect_width_, rect_height_); - } +MutableIterator* TessBaseAPI::GetMutableIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new MutableIterator(page_res_, tesseract_, + thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); +} /** Make a text string from the internal data structures. */ - char* TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - STRING text(""); - ResultIterator *it = GetIterator(); - do { - if (it->Empty(RIL_PARA)) continue; - const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); - text += para_text.get(); - } while (it->Next(RIL_PARA)); - char* result = new char[text.length() + 1]; - strncpy(result, text.string(), text.length() + 1); - delete it; - return result; - } +char* TessBaseAPI::GetUTF8Text() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + STRING text(""); + ResultIterator *it = GetIterator(); + do { + if (it->Empty(RIL_PARA)) continue; + const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); + text += para_text.get(); + } while (it->Next(RIL_PARA)); + char* result = new char[text.length() + 1]; + strncpy(result, text.string(), text.length() + 1); + delete it; + return result; +} /** * Gets the block orientation at the current iterator position. */ - static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { - tesseract::Orientation orientation; - tesseract::WritingDirection writing_direction; - tesseract::TextlineOrder textline_order; - float deskew_angle; - it->Orientation(&orientation, &writing_direction, &textline_order, - &deskew_angle); - return orientation; - } +static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { + tesseract::Orientation orientation; + tesseract::WritingDirection writing_direction; + tesseract::TextlineOrder textline_order; + float deskew_angle; + it->Orientation(&orientation, &writing_direction, &textline_order, + &deskew_angle); + return orientation; +} /** * Fits a line to the baseline at the given level, and appends its coefficients @@ -1361,104 +1361,104 @@ namespace tesseract { * method currently only inserts a 'textangle' property to indicate the rotation * direction and does not add any baseline information to the hocr string. */ - static void AddBaselineCoordsTohOCR(const PageIterator *it, - PageIteratorLevel level, - STRING* hocr_str) { - tesseract::Orientation orientation = GetBlockTextOrientation(it); - if (orientation != ORIENTATION_PAGE_UP) { - hocr_str->add_str_int("; textangle ", 360 - orientation * 90); - return; - } - - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - - // Try to get the baseline coordinates at this level. - int x1, y1, x2, y2; - if (!it->Baseline(level, &x1, &y1, &x2, &y2)) - return; - // Following the description of this field of the hOCR spec, we convert the - // baseline coordinates so that "the bottom left of the bounding box is the - // origin". - x1 -= left; - x2 -= left; - y1 -= bottom; - y2 -= bottom; - - // Now fit a line through the points so we can extract coefficients for the - // equation: y = p1 x + p0 - double p1 = 0; - double p0 = 0; - if (x1 == x2) { - // Problem computing the polynomial coefficients. - return; - } - p1 = (y2 - y1) / static_cast(x2 - x1); - p0 = y1 - static_cast(p1 * x1); - - hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); - hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); - } - - static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, - int num2) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - if (num2 >= 0) { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); - } else { - snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); - } - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; - } - - static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, - int num2, int num3) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); - id_buffer[BUFSIZE - 1] = '\0'; - *hocr_str += " id='"; - *hocr_str += id_buffer; - *hocr_str += "'"; - } - - static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, - STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - // This is the only place we use double quotes instead of single quotes, - // but it may too late to change for consistency - hocr_str->add_str_int(" title=\"bbox ", left); - hocr_str->add_str_int(" ", top); - hocr_str->add_str_int(" ", right); - hocr_str->add_str_int(" ", bottom); - // Add baseline coordinates & heights for textlines only. - if (level == RIL_TEXTLINE) { - AddBaselineCoordsTohOCR(it, level, hocr_str); - // add custom height measures - float row_height, descenders, ascenders; // row attributes - it->RowAttributes(&row_height, &descenders, &ascenders); - // TODO(rays): Do we want to limit these to a single decimal place? - hocr_str->add_str_double("; x_size ", row_height); - hocr_str->add_str_double("; x_descenders ", descenders * -1); - hocr_str->add_str_double("; x_ascenders ", ascenders); - } - *hocr_str += "\">"; - } +static void AddBaselineCoordsTohOCR(const PageIterator *it, + PageIteratorLevel level, + STRING* hocr_str) { + tesseract::Orientation orientation = GetBlockTextOrientation(it); + if (orientation != ORIENTATION_PAGE_UP) { + hocr_str->add_str_int("; textangle ", 360 - orientation * 90); + return; + } - static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, - STRING* hocr_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - hocr_str->add_str_int("\t", left); - hocr_str->add_str_int("\t", top); - hocr_str->add_str_int("\t", right - left); - hocr_str->add_str_int("\t", bottom - top); - } + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + + // Try to get the baseline coordinates at this level. + int x1, y1, x2, y2; + if (!it->Baseline(level, &x1, &y1, &x2, &y2)) + return; + // Following the description of this field of the hOCR spec, we convert the + // baseline coordinates so that "the bottom left of the bounding box is the + // origin". + x1 -= left; + x2 -= left; + y1 -= bottom; + y2 -= bottom; + + // Now fit a line through the points so we can extract coefficients for the + // equation: y = p1 x + p0 + double p1 = 0; + double p0 = 0; + if (x1 == x2) { + // Problem computing the polynomial coefficients. + return; + } + p1 = (y2 - y1) / static_cast(x2 - x1); + p0 = y1 - static_cast(p1 * x1); + + hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); + hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); +} + +static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, + int num2) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + if (num2 >= 0) { + snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); + } else { + snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); + } + id_buffer[BUFSIZE - 1] = '\0'; + *hocr_str += " id='"; + *hocr_str += id_buffer; + *hocr_str += "'"; +} + +static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, + int num2, int num3) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3); + id_buffer[BUFSIZE - 1] = '\0'; + *hocr_str += " id='"; + *hocr_str += id_buffer; + *hocr_str += "'"; +} + +static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, + STRING* hocr_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + // This is the only place we use double quotes instead of single quotes, + // but it may too late to change for consistency + hocr_str->add_str_int(" title=\"bbox ", left); + hocr_str->add_str_int(" ", top); + hocr_str->add_str_int(" ", right); + hocr_str->add_str_int(" ", bottom); + // Add baseline coordinates & heights for textlines only. + if (level == RIL_TEXTLINE) { + AddBaselineCoordsTohOCR(it, level, hocr_str); + // add custom height measures + float row_height, descenders, ascenders; // row attributes + it->RowAttributes(&row_height, &descenders, &ascenders); + // TODO(rays): Do we want to limit these to a single decimal place? + hocr_str->add_str_double("; x_size ", row_height); + hocr_str->add_str_double("; x_descenders ", descenders * -1); + hocr_str->add_str_double("; x_ascenders ", ascenders); + } + *hocr_str += "\">"; +} + +static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, + STRING* hocr_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + hocr_str->add_str_int("\t", left); + hocr_str->add_str_int("\t", top); + hocr_str->add_str_int("\t", right - left); + hocr_str->add_str_int("\t", bottom - top); +} /** * Make a HTML-formatted string with hOCR markup from the internal @@ -1469,9 +1469,9 @@ namespace tesseract { * STL removed from original patch submission and refactored by rays. * Returned string must be freed with the delete [] operator. */ - char* TessBaseAPI::GetHOCRText(int page_number) { - return GetHOCRText(nullptr, page_number); - } +char* TessBaseAPI::GetHOCRText(int page_number) { + return GetHOCRText(nullptr, page_number); +} /** * Make a HTML-formatted string with hOCR markup from the internal @@ -1482,24 +1482,24 @@ namespace tesseract { * STL removed from original patch submission and refactored by rays. * Returned string must be freed with the delete [] operator. */ - char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) - return nullptr; +char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) + return nullptr; - int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; - int page_id = page_number + 1; // hOCR uses 1-based page numbers. - bool para_is_ltr = true; // Default direction is LTR - const char* paragraph_lang = nullptr; - bool font_info = false; - GetBoolVariable("hocr_font_info", &font_info); + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1; + int page_id = page_number + 1; // hOCR uses 1-based page numbers. + bool para_is_ltr = true; // Default direction is LTR + const char* paragraph_lang = nullptr; + bool font_info = false; + GetBoolVariable("hocr_font_info", &font_info); - STRING hocr_str(""); + STRING hocr_str(""); - if (input_file_ == nullptr) - SetInputName(nullptr); + if (input_file_ == nullptr) + SetInputName(nullptr); #ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 + // convert input name from ANSI encoding to utf-8 int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); wchar_t *uni16_str = new WCHAR[str16_len]; @@ -1515,325 +1515,325 @@ namespace tesseract { delete[] utf8_str; #endif - hocr_str += "

string()); - } else { - hocr_str += "unknown"; - } - hocr_str.add_str_int("\"; bbox ", rect_left_); - hocr_str.add_str_int(" ", rect_top_); - hocr_str.add_str_int(" ", rect_width_); - hocr_str.add_str_int(" ", rect_height_); - hocr_str.add_str_int("; ppageno ", page_number); - hocr_str += "'>\n"; - - ResultIterator *res_it = GetIterator(); - while (!res_it->Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Open any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - para_is_ltr = true; // reset to default direction - hocr_str += "
IsAtBeginningOf(RIL_PARA)) { - hocr_str += "\n

ParagraphIsLtr(); - if (!para_is_ltr) { - hocr_str += " dir='rtl'"; - } - AddIdTohOCR(&hocr_str, "par", page_id, pcnt); - paragraph_lang = res_it->WordRecognitionLanguage(); - if (paragraph_lang) { - hocr_str += " lang='"; - hocr_str += paragraph_lang; - hocr_str += "'"; - } - AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - hocr_str += "\n >>* confidencemap = nullptr; - if (tesseract_->lstm_choice_mode) { - confidencemap = res_it->GetBestLSTMSymbolChoices(); - } - hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); - hocr_str.add_str_int(" title='bbox ", left); - hocr_str.add_str_int(" ", top); - hocr_str.add_str_int(" ", right); - hocr_str.add_str_int(" ", bottom); - hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); - if (font_info) { - if (font_name) { - hocr_str += "; x_font "; - hocr_str += HOcrEscape(font_name); - } - hocr_str.add_str_int("; x_fsize ", pointsize); - } + hocr_str += "

string()); + } else { + hocr_str += "unknown"; + } + hocr_str.add_str_int("\"; bbox ", rect_left_); + hocr_str.add_str_int(" ", rect_top_); + hocr_str.add_str_int(" ", rect_width_); + hocr_str.add_str_int(" ", rect_height_); + hocr_str.add_str_int("; ppageno ", page_number); + hocr_str += "'>\n"; + + ResultIterator *res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + // Open any new block/paragraph/textline. + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + para_is_ltr = true; // reset to default direction + hocr_str += "
IsAtBeginningOf(RIL_PARA)) { + hocr_str += "\n

ParagraphIsLtr(); + if (!para_is_ltr) { + hocr_str += " dir='rtl'"; + } + AddIdTohOCR(&hocr_str, "par", page_id, pcnt); + paragraph_lang = res_it->WordRecognitionLanguage(); + if (paragraph_lang) { + hocr_str += " lang='"; + hocr_str += paragraph_lang; + hocr_str += "'"; + } + AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + hocr_str += "\n >>* confidencemap = nullptr; + if (tesseract_->lstm_choice_mode) { + confidencemap = res_it->GetBestLSTMSymbolChoices(); + } + hocr_str += "\n BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, + &monospace, &serif, &smallcaps, + &pointsize, &font_id); + hocr_str.add_str_int(" title='bbox ", left); + hocr_str.add_str_int(" ", top); + hocr_str.add_str_int(" ", right); + hocr_str.add_str_int(" ", bottom); + hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); + if (font_info) { + if (font_name) { + hocr_str += "; x_font "; + hocr_str += HOcrEscape(font_name); + } + hocr_str.add_str_int("; x_fsize ", pointsize); + } + hocr_str += "'"; + const char* lang = res_it->WordRecognitionLanguage(); + if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { + hocr_str += " lang='"; + hocr_str += lang; + hocr_str += "'"; + } + switch (res_it->WordDirection()) { + // Only emit direction if different from current paragraph direction + case DIR_LEFT_TO_RIGHT: + if (!para_is_ltr) hocr_str += " dir='ltr'"; + break; + case DIR_RIGHT_TO_LEFT: + if (para_is_ltr) hocr_str += " dir='rtl'"; + break; + case DIR_MIX: + case DIR_NEUTRAL: + default: // Do nothing. + break; + } + hocr_str += ">"; + bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); + bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); + bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); + if (bold) hocr_str += ""; + if (italic) hocr_str += ""; + do { + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); + if (grapheme && grapheme[0] != 0) { + hocr_str += HOcrEscape(grapheme.get()); + } + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + if (italic) hocr_str += ""; + if (bold) hocr_str += ""; + // If the lstm choice mode is required it is added here + if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { + for (size_t i = 0; i < confidencemap->size(); i++) { + hocr_str += "\n > timestep = (*confidencemap)[i]; + for (std::pair conf : timestep) { + hocr_str += "lstm_choice_mode == 2 && confidencemap != nullptr) { + for (size_t i = 0; i < confidencemap->size(); i++) { + std::vector> timestep = (*confidencemap)[i]; + if (timestep.size() > 0) { + hocr_str += "\n WordRecognitionLanguage(); - if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) { - hocr_str += " lang='"; - hocr_str += lang; - hocr_str += "'"; - } - switch (res_it->WordDirection()) { - // Only emit direction if different from current paragraph direction - case DIR_LEFT_TO_RIGHT: - if (!para_is_ltr) hocr_str += " dir='ltr'"; - break; - case DIR_RIGHT_TO_LEFT: - if (para_is_ltr) hocr_str += " dir='rtl'"; - break; - case DIR_MIX: - case DIR_NEUTRAL: - default: // Do nothing. - break; - } hocr_str += ">"; - bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); - bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); - bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); - if (bold) hocr_str += ""; - if (italic) hocr_str += ""; - do { - const std::unique_ptr grapheme( - res_it->GetUTF8Text(RIL_SYMBOL)); - if (grapheme && grapheme[0] != 0) { - hocr_str += HOcrEscape(grapheme.get()); - } - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - if (italic) hocr_str += ""; - if (bold) hocr_str += ""; - // If the lstm choice mode is required it is added here - if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - hocr_str += "\n > timestep = (*confidencemap)[i]; - for (std::pair conf : timestep) { - hocr_str += "lstm_choice_mode == 2 && confidencemap != nullptr) { - for (size_t i = 0; i < confidencemap->size(); i++) { - std::vector> timestep = (*confidencemap)[i]; - if (timestep.size() > 0) { - hocr_str += "\n Empty(RIL_BLOCK)) { - if (res_it->Empty(RIL_WORD)) { - res_it->Next(RIL_WORD); - continue; - } - - // Add rows for any new block/paragraph/textline. - if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - block_num++; - par_num = 0; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("2\t", page_num); // level 2 - block - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for block - } - if (res_it->IsAtBeginningOf(RIL_PARA)) { - par_num++; - line_num = 0; - word_num = 0; - tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_PARA, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for para - } - if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - line_num++; - word_num = 0; - tsv_str.add_str_int("4\t", page_num); // level 4 - line - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for line - } - - // Now, process the word... - int left, top, right, bottom; - res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - word_num++; - tsv_str.add_str_int("5\t", page_num); // level 5 - word - tsv_str.add_str_int("\t", block_num); - tsv_str.add_str_int("\t", par_num); - tsv_str.add_str_int("\t", line_num); - tsv_str.add_str_int("\t", word_num); - tsv_str.add_str_int("\t", left); - tsv_str.add_str_int("\t", top); - tsv_str.add_str_int("\t", right - left); - tsv_str.add_str_int("\t", bottom - top); - tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); - tsv_str += "\t"; - - // Increment counts if at end of block/paragraph/textline. - if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; - if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; - if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; - - do { - tsv_str += - std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); - res_it->Next(RIL_SYMBOL); - } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - tsv_str += "\n"; // end of row - wcnt++; - } +char* TessBaseAPI::GetTSVText(int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) + return nullptr; + + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; + int page_id = page_number + 1; // we use 1-based page numbers. + + STRING tsv_str(""); + + int page_num = page_id; + int block_num = 0; + int par_num = 0; + int line_num = 0; + int word_num = 0; + + tsv_str.add_str_int("1\t", page_num); // level 1 - page + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + tsv_str.add_str_int("\t", rect_left_); + tsv_str.add_str_int("\t", rect_top_); + tsv_str.add_str_int("\t", rect_width_); + tsv_str.add_str_int("\t", rect_height_); + tsv_str += "\t-1\t\n"; + + ResultIterator* res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + // Add rows for any new block/paragraph/textline. + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + block_num++; + par_num = 0; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("2\t", page_num); // level 2 - block + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for block + } + if (res_it->IsAtBeginningOf(RIL_PARA)) { + par_num++; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_PARA, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for para + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + line_num++; + word_num = 0; + tsv_str.add_str_int("4\t", page_num); // level 4 - line + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for line + } + + // Now, process the word... + int left, top, right, bottom; + res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + word_num++; + tsv_str.add_str_int("5\t", page_num); // level 5 - word + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + tsv_str.add_str_int("\t", left); + tsv_str.add_str_int("\t", top); + tsv_str.add_str_int("\t", right - left); + tsv_str.add_str_int("\t", bottom - top); + tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); + tsv_str += "\t"; + + // Increment counts if at end of block/paragraph/textline. + if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; + if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; + if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; + + do { + tsv_str += + std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + tsv_str += "\n"; // end of row + wcnt++; + } - char* ret = new char[tsv_str.length() + 1]; - strcpy(ret, tsv_str.string()); - delete res_it; - return ret; - } + char* ret = new char[tsv_str.length() + 1]; + strcpy(ret, tsv_str.string()); + delete res_it; + return ret; +} /** The 5 numbers output for each box (the usual 4 and a page number.) */ - const int kNumbersPerBlob = 5; +const int kNumbersPerBlob = 5; /** * The number of bytes taken by each number. Since we use int16_t for ICOORD, * assume only 5 digits max. */ - const int kBytesPerNumber = 5; +const int kBytesPerNumber = 5; /** * Multiplier for max expected textlength assumes (kBytesPerNumber + space) * * kNumbersPerBlob plus the newline. Add to this the * original UTF8 characters, and one kMaxBytesPerLine for safety. */ - const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; +const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; /** Max bytes in the decimal representation of int64_t. */ - const int kBytesPer64BitNumber = 20; +const int kBytesPer64BitNumber = 20; /** * A maximal single box could occupy kNumbersPerBlob numbers at * kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a * space plus the newline and the maximum length of a UNICHAR. * Test against this on each iteration for safety. */ - const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + - UNICHAR_LEN; +const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + + UNICHAR_LEN; /** * The recognized text is returned as a char* which is coded @@ -1841,160 +1841,160 @@ namespace tesseract { * page_number is a 0-base page index that will appear in the box file. * Returned string must be freed with the delete [] operator. */ - char* TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int blob_count; - int utf8_length = TextLength(&blob_count); - int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + - kMaxBytesPerLine; - char* result = new char[total_length]; - result[0] = '\0'; - int output_length = 0; - LTRResultIterator* it = GetLTRIterator(); - do { - int left, top, right, bottom; - if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - const std::unique_ptr text( - it->GetUTF8Text(RIL_SYMBOL)); - // Tesseract uses space for recognition failure. Fix to a reject - // character, kTesseractReject so we don't create illegal box files. - for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') - text[i] = kTesseractReject; - } - snprintf(result + output_length, total_length - output_length, - "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, - right, image_height_ - top, page_number); - output_length += strlen(result + output_length); - // Just in case... - if (output_length + kMaxBytesPerLine > total_length) - break; - } - } while (it->Next(RIL_SYMBOL)); - delete it; - return result; - } +char* TessBaseAPI::GetBoxText(int page_number) { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int blob_count; + int utf8_length = TextLength(&blob_count); + int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + + kMaxBytesPerLine; + char* result = new char[total_length]; + result[0] = '\0'; + int output_length = 0; + LTRResultIterator* it = GetLTRIterator(); + do { + int left, top, right, bottom; + if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { + const std::unique_ptr text( + it->GetUTF8Text(RIL_SYMBOL)); + // Tesseract uses space for recognition failure. Fix to a reject + // character, kTesseractReject so we don't create illegal box files. + for (int i = 0; text[i] != '\0'; ++i) { + if (text[i] == ' ') + text[i] = kTesseractReject; + } + snprintf(result + output_length, total_length - output_length, + "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, + right, image_height_ - top, page_number); + output_length += strlen(result + output_length); + // Just in case... + if (output_length + kMaxBytesPerLine > total_length) + break; + } + } while (it->Next(RIL_SYMBOL)); + delete it; + return result; +} /** * Conversion table for non-latin characters. * Maps characters out of the latin set into the latin set. * TODO(rays) incorporate this translation into unicharset. */ - const int kUniChs[] = { - 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 - }; +const int kUniChs[] = { + 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 +}; /** Latin chars corresponding to the unicode chars above. */ - const int kLatinChs[] = { - 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 - }; +const int kLatinChs[] = { + 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 +}; /** * The recognized text is returned as a char* which is coded * as UNLV format Latin-1 with specific reject and suspect codes. * Returned string must be freed with the delete [] operator. */ - char* TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - bool tilde_crunch_written = false; - bool last_char_was_newline = true; - bool last_char_was_tilde = false; - - int total_length = TextLength(nullptr); - PAGE_RES_IT page_res_it(page_res_); - char* result = new char[total_length]; - char* ptr = result; - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - // Process the current word. - if (word->unlv_crunch_mode != CR_NONE) { - if (word->unlv_crunch_mode != CR_DELETE && - (!tilde_crunch_written || - (word->unlv_crunch_mode == CR_KEEP_SPACE && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)))) { - if (!word->word->flag(W_BOL) && - word->word->space() > 0 && - !word->word->flag(W_FUZZY_NON) && - !word->word->flag(W_FUZZY_SP)) { - /* Write a space to separate from preceding good text */ - *ptr++ = ' '; - last_char_was_tilde = false; - } - if (!last_char_was_tilde) { - // Write a reject char. - last_char_was_tilde = true; - *ptr++ = kUNLVReject; - tilde_crunch_written = true; - last_char_was_newline = false; - } - } - } else { - // NORMAL PROCESSING of non tilde crunched words. - tilde_crunch_written = false; - tesseract_->set_unlv_suspects(word); - const char* wordstr = word->best_choice->unichar_string().string(); - const STRING& lengths = word->best_choice->unichar_lengths(); - int length = lengths.length(); - int i = 0; - int offset = 0; - - if (last_char_was_tilde && - word->word->space() == 0 && wordstr[offset] == ' ') { - // Prevent adjacent tilde across words - we know that adjacent tildes - // within words have been removed. - // Skip the first character. - offset = lengths[i++]; - } - if (i < length && wordstr[offset] != 0) { - if (!last_char_was_newline) - *ptr++ = ' '; - else - last_char_was_newline = false; - for (; i < length; offset += lengths[i++]) { - if (wordstr[offset] == ' ' || - wordstr[offset] == kTesseractReject) { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; - } else { - if (word->reject_map[i].rejected()) - *ptr++ = kUNLVSuspect; - UNICHAR ch(wordstr + offset, lengths[i]); - int uni_ch = ch.first_uni(); - for (int j = 0; kUniChs[j] != 0; ++j) { - if (kUniChs[j] == uni_ch) { - uni_ch = kLatinChs[j]; - break; - } - } - if (uni_ch <= 0xff) { - *ptr++ = static_cast(uni_ch); - last_char_was_tilde = false; - } else { - *ptr++ = kUNLVReject; - last_char_was_tilde = true; - } - } - } - } +char* TessBaseAPI::GetUNLVText() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + bool tilde_crunch_written = false; + bool last_char_was_newline = true; + bool last_char_was_tilde = false; + + int total_length = TextLength(nullptr); + PAGE_RES_IT page_res_it(page_res_); + char* result = new char[total_length]; + char* ptr = result; + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + // Process the current word. + if (word->unlv_crunch_mode != CR_NONE) { + if (word->unlv_crunch_mode != CR_DELETE && + (!tilde_crunch_written || + (word->unlv_crunch_mode == CR_KEEP_SPACE && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)))) { + if (!word->word->flag(W_BOL) && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)) { + /* Write a space to separate from preceding good text */ + *ptr++ = ' '; + last_char_was_tilde = false; + } + if (!last_char_was_tilde) { + // Write a reject char. + last_char_was_tilde = true; + *ptr++ = kUNLVReject; + tilde_crunch_written = true; + last_char_was_newline = false; + } + } + } else { + // NORMAL PROCESSING of non tilde crunched words. + tilde_crunch_written = false; + tesseract_->set_unlv_suspects(word); + const char* wordstr = word->best_choice->unichar_string().string(); + const STRING& lengths = word->best_choice->unichar_lengths(); + int length = lengths.length(); + int i = 0; + int offset = 0; + + if (last_char_was_tilde && + word->word->space() == 0 && wordstr[offset] == ' ') { + // Prevent adjacent tilde across words - we know that adjacent tildes + // within words have been removed. + // Skip the first character. + offset = lengths[i++]; + } + if (i < length && wordstr[offset] != 0) { + if (!last_char_was_newline) + *ptr++ = ' '; + else + last_char_was_newline = false; + for (; i < length; offset += lengths[i++]) { + if (wordstr[offset] == ' ' || + wordstr[offset] == kTesseractReject) { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } else { + if (word->reject_map[i].rejected()) + *ptr++ = kUNLVSuspect; + UNICHAR ch(wordstr + offset, lengths[i]); + int uni_ch = ch.first_uni(); + for (int j = 0; kUniChs[j] != 0; ++j) { + if (kUniChs[j] == uni_ch) { + uni_ch = kLatinChs[j]; + break; + } } - if (word->word->flag(W_EOL) && !last_char_was_newline) { - /* Add a new line output */ - *ptr++ = '\n'; - tilde_crunch_written = false; - last_char_was_newline = true; - last_char_was_tilde = false; + if (uni_ch <= 0xff) { + *ptr++ = static_cast(uni_ch); + last_char_was_tilde = false; + } else { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; } + } } - *ptr++ = '\n'; - *ptr = '\0'; - return result; + } + } + if (word->word->flag(W_EOL) && !last_char_was_newline) { + /* Add a new line output */ + *ptr++ = '\n'; + tilde_crunch_written = false; + last_char_was_newline = true; + last_char_was_tilde = false; } + } + *ptr++ = '\n'; + *ptr = '\0'; + return result; +} #ifndef DISABLED_LEGACY_ENGINE @@ -2007,103 +2007,103 @@ namespace tesseract { * script_conf is confidence level in the script * Returns true on success and writes values to each parameter as an output */ - bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, - const char** script_name, - float* script_conf) { - OSResults osr; - - bool osd = DetectOS(&osr); - if (!osd) { - return false; - } +bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, + const char** script_name, + float* script_conf) { + OSResults osr; + + bool osd = DetectOS(&osr); + if (!osd) { + return false; + } - int orient_id = osr.best_result.orientation_id; - int script_id = osr.get_best_script(orient_id); - if (orient_conf) *orient_conf = osr.best_result.oconfidence; - if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees + int orient_id = osr.best_result.orientation_id; + int script_id = osr.get_best_script(orient_id); + if (orient_conf) *orient_conf = osr.best_result.oconfidence; + if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees - if (script_name) { - const char* script = osr.unicharset->get_script_from_script_id(script_id); + if (script_name) { + const char* script = osr.unicharset->get_script_from_script_id(script_id); - *script_name = script; - } + *script_name = script; + } - if (script_conf) *script_conf = osr.best_result.sconfidence; + if (script_conf) *script_conf = osr.best_result.sconfidence; - return true; - } + return true; +} /** * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. * page_number is a 0-based page index that will appear in the osd file. */ - char* TessBaseAPI::GetOsdText(int page_number) { - int orient_deg; - float orient_conf; - const char* script_name; - float script_conf; - - if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, - &script_conf)) - return nullptr; - - // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_deg / 90); - - const int kOsdBufsize = 255; - char* osd_buf = new char[kOsdBufsize]; - snprintf(osd_buf, kOsdBufsize, - "Page number: %d\n" - "Orientation in degrees: %d\n" - "Rotate: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name, - script_conf); - - return osd_buf; - } +char* TessBaseAPI::GetOsdText(int page_number) { + int orient_deg; + float orient_conf; + const char* script_name; + float script_conf; + + if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, + &script_conf)) + return nullptr; + + // clockwise rotation needed to make the page upright + int rotate = OrientationIdToValue(orient_deg / 90); + + const int kOsdBufsize = 255; + char* osd_buf = new char[kOsdBufsize]; + snprintf(osd_buf, kOsdBufsize, + "Page number: %d\n" + "Orientation in degrees: %d\n" + "Rotate: %d\n" + "Orientation confidence: %.2f\n" + "Script: %s\n" + "Script confidence: %.2f\n", + page_number, orient_deg, rotate, orient_conf, script_name, + script_conf); + + return osd_buf; +} #endif // ndef DISABLED_LEGACY_ENGINE /** Returns the average word confidence for Tesseract page result. */ - int TessBaseAPI::MeanTextConf() { - int* conf = AllWordConfidences(); - if (!conf) return 0; - int sum = 0; - int *pt = conf; - while (*pt >= 0) sum += *pt++; - if (pt != conf) sum /= pt - conf; - delete [] conf; - return sum; - } +int TessBaseAPI::MeanTextConf() { + int* conf = AllWordConfidences(); + if (!conf) return 0; + int sum = 0; + int *pt = conf; + while (*pt >= 0) sum += *pt++; + if (pt != conf) sum /= pt - conf; + delete [] conf; + return sum; +} /** Returns an array of all word confidences, terminated by -1. */ - int* TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || - (!recognition_done_ && Recognize(nullptr) < 0)) - return nullptr; - int n_word = 0; - PAGE_RES_IT res_it(page_res_); - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) - n_word++; - - int* conf = new int[n_word+1]; - n_word = 0; - for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { - WERD_RES *word = res_it.word(); - WERD_CHOICE* choice = word->best_choice; - int w_conf = static_cast(100 + 5 * choice->certainty()); - // This is the eq for converting Tesseract confidence to 1..100 - if (w_conf < 0) w_conf = 0; - if (w_conf > 100) w_conf = 100; - conf[n_word++] = w_conf; - } - conf[n_word] = -1; - return conf; - } +int* TessBaseAPI::AllWordConfidences() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int n_word = 0; + PAGE_RES_IT res_it(page_res_); + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) + n_word++; + + int* conf = new int[n_word+1]; + n_word = 0; + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { + WERD_RES *word = res_it.word(); + WERD_CHOICE* choice = word->best_choice; + int w_conf = static_cast(100 + 5 * choice->certainty()); + // This is the eq for converting Tesseract confidence to 1..100 + if (w_conf < 0) w_conf = 0; + if (w_conf > 100) w_conf = 100; + conf[n_word++] = w_conf; + } + conf[n_word] = -1; + return conf; +} #ifndef DISABLED_LEGACY_ENGINE /** @@ -2116,61 +2116,61 @@ namespace tesseract { * The currently set PageSegMode is preserved. * Returns false if adaption was not possible for some reason. */ - bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { - int debug = 0; - GetIntVariable("applybox_debug", &debug); - bool success = true; - PageSegMode current_psm = GetPageSegMode(); - SetPageSegMode(mode); - SetVariable("classify_enable_learning", "0"); - const std::unique_ptr text(GetUTF8Text()); - if (debug) { - tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); - } - if (text != nullptr) { - PAGE_RES_IT it(page_res_); - WERD_RES* word_res = it.word(); - if (word_res != nullptr) { - word_res->word->set_text(wordstr); - // Check to see if text matches wordstr. - int w = 0; - int t; - for (t = 0; text[t] != '\0'; ++t) { - if (text[t] == '\n' || text[t] == ' ') - continue; - while (wordstr[w] == ' ') ++w; - if (text[t] != wordstr[w]) - break; - ++w; - } - if (text[t] != '\0' || wordstr[w] != '\0') { - // No match. - delete page_res_; - GenericVector boxes; - page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); - tesseract_->ReSegmentByClassification(page_res_); - tesseract_->TidyUp(page_res_); - PAGE_RES_IT pr_it(page_res_); - if (pr_it.word() == nullptr) - success = false; - else - word_res = pr_it.word(); - } else { - word_res->BestChoiceToCorrectText(); - } - if (success) { - tesseract_->EnableLearning = true; - tesseract_->LearnWord(nullptr, word_res); - } - } else { - success = false; - } - } else { - success = false; - } - SetPageSegMode(current_psm); - return success; +bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { + int debug = 0; + GetIntVariable("applybox_debug", &debug); + bool success = true; + PageSegMode current_psm = GetPageSegMode(); + SetPageSegMode(mode); + SetVariable("classify_enable_learning", "0"); + const std::unique_ptr text(GetUTF8Text()); + if (debug) { + tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); + } + if (text != nullptr) { + PAGE_RES_IT it(page_res_); + WERD_RES* word_res = it.word(); + if (word_res != nullptr) { + word_res->word->set_text(wordstr); + // Check to see if text matches wordstr. + int w = 0; + int t; + for (t = 0; text[t] != '\0'; ++t) { + if (text[t] == '\n' || text[t] == ' ') + continue; + while (wordstr[w] == ' ') ++w; + if (text[t] != wordstr[w]) + break; + ++w; + } + if (text[t] != '\0' || wordstr[w] != '\0') { + // No match. + delete page_res_; + GenericVector boxes; + page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); + tesseract_->ReSegmentByClassification(page_res_); + tesseract_->TidyUp(page_res_); + PAGE_RES_IT pr_it(page_res_); + if (pr_it.word() == nullptr) + success = false; + else + word_res = pr_it.word(); + } else { + word_res->BestChoiceToCorrectText(); + } + if (success) { + tesseract_->EnableLearning = true; + tesseract_->LearnWord(nullptr, word_res); + } + } else { + success = false; } + } else { + success = false; + } + SetPageSegMode(current_psm); + return success; +} #endif // ndef DISABLED_LEGACY_ENGINE /** @@ -2179,12 +2179,12 @@ namespace tesseract { * Afterwards, you must call SetImage or TesseractRect before doing * any Recognize or Get* operation. */ - void TessBaseAPI::Clear() { - if (thresholder_ != nullptr) - thresholder_->Clear(); - ClearResults(); - if (tesseract_ != nullptr) SetInputImage(nullptr); - } +void TessBaseAPI::Clear() { + if (thresholder_ != nullptr) + thresholder_->Clear(); + ClearResults(); + if (tesseract_ != nullptr) SetInputImage(nullptr); +} /** * Close down tesseract and free up all memory. End() is equivalent to @@ -2192,100 +2192,100 @@ namespace tesseract { * Once End() has been used, none of the other API functions may be used * other than Init and anything declared above it in the class definition. */ - void TessBaseAPI::End() { - Clear(); - delete thresholder_; - thresholder_ = nullptr; - delete page_res_; - page_res_ = nullptr; - delete block_list_; - block_list_ = nullptr; - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; - delete tesseract_; - tesseract_ = nullptr; - delete osd_tesseract_; - osd_tesseract_ = nullptr; - delete equ_detect_; - equ_detect_ = nullptr; - delete input_file_; - input_file_ = nullptr; - delete output_file_; - output_file_ = nullptr; - delete datapath_; - datapath_ = nullptr; - delete language_; - language_ = nullptr; - } +void TessBaseAPI::End() { + Clear(); + delete thresholder_; + thresholder_ = nullptr; + delete page_res_; + page_res_ = nullptr; + delete block_list_; + block_list_ = nullptr; + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; + delete tesseract_; + tesseract_ = nullptr; + delete osd_tesseract_; + osd_tesseract_ = nullptr; + delete equ_detect_; + equ_detect_ = nullptr; + delete input_file_; + input_file_ = nullptr; + delete output_file_; + output_file_ = nullptr; + delete datapath_; + datapath_ = nullptr; + delete language_; + language_ = nullptr; +} // Clear any library-level memory caches. // There are a variety of expensive-to-load constant data structures (mostly // language dictionaries) that are cached globally -- surviving the Init() // and End() of individual TessBaseAPI's. This function allows the clearing // of these caches. - void TessBaseAPI::ClearPersistentCache() { - Dict::GlobalDawgCache()->DeleteUnusedDawgs(); - } +void TessBaseAPI::ClearPersistentCache() { + Dict::GlobalDawgCache()->DeleteUnusedDawgs(); +} /** * Check whether a word is valid according to Tesseract's language model * returns 0 if the word is invalid, non-zero if valid */ - int TessBaseAPI::IsValidWord(const char *word) { - return tesseract_->getDict().valid_word(word); - } +int TessBaseAPI::IsValidWord(const char *word) { + return tesseract_->getDict().valid_word(word); +} // Returns true if utf8_character is defined in the UniCharset. - bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { - return tesseract_->unicharset.contains_unichar(utf8_character); - } +bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { + return tesseract_->unicharset.contains_unichar(utf8_character); +} // TODO(rays) Obsolete this function and replace with a more aptly named // function that returns image coordinates rather than tesseract coordinates. - bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { - PageIterator* it = AnalyseLayout(); - if (it == nullptr) { - return false; - } - int x1, x2, y1, y2; - it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); - // Calculate offset and slope (NOTE: Kind of ugly) - if (x2 <= x1) x2 = x1 + 1; - // Convert the point pair to slope/offset of the baseline (in image coords.) - *out_slope = static_cast(y2 - y1) / (x2 - x1); - *out_offset = static_cast(y1 - *out_slope * x1); - // Get the y-coord of the baseline at the left and right edges of the - // textline's bounding box. - int left, top, right, bottom; - if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { - delete it; - return false; - } - int left_y = IntCastRounded(*out_slope * left + *out_offset); - int right_y = IntCastRounded(*out_slope * right + *out_offset); - // Shift the baseline down so it passes through the nearest bottom-corner - // of the textline's bounding box. This is the difference between the y - // at the lowest (max) edge of the box and the actual box bottom. - *out_offset += bottom - std::max(left_y, right_y); - // Switch back to bottom-up tesseract coordinates. Requires negation of - // the slope and height - offset for the offset. - *out_slope = -*out_slope; - *out_offset = rect_height_ - *out_offset; - delete it; - - return true; - } +bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { + PageIterator* it = AnalyseLayout(); + if (it == nullptr) { + return false; + } + int x1, x2, y1, y2; + it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); + // Calculate offset and slope (NOTE: Kind of ugly) + if (x2 <= x1) x2 = x1 + 1; + // Convert the point pair to slope/offset of the baseline (in image coords.) + *out_slope = static_cast(y2 - y1) / (x2 - x1); + *out_offset = static_cast(y1 - *out_slope * x1); + // Get the y-coord of the baseline at the left and right edges of the + // textline's bounding box. + int left, top, right, bottom; + if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { + delete it; + return false; + } + int left_y = IntCastRounded(*out_slope * left + *out_offset); + int right_y = IntCastRounded(*out_slope * right + *out_offset); + // Shift the baseline down so it passes through the nearest bottom-corner + // of the textline's bounding box. This is the difference between the y + // at the lowest (max) edge of the box and the actual box bottom. + *out_offset += bottom - std::max(left_y, right_y); + // Switch back to bottom-up tesseract coordinates. Requires negation of + // the slope and height - offset for the offset. + *out_slope = -*out_slope; + *out_offset = rect_height_ - *out_offset; + delete it; + + return true; +} /** Sets Dict::letter_is_okay_ function to point to the given function. */ - void TessBaseAPI::SetDictFunc(DictFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().letter_is_okay_ = f; - } - } +void TessBaseAPI::SetDictFunc(DictFunc f) { + if (tesseract_ != nullptr) { + tesseract_->getDict().letter_is_okay_ = f; + } +} /** * Sets Dict::probability_in_context_ function to point to the given @@ -2295,35 +2295,35 @@ namespace tesseract { * "character" (in general a utf-8 string), given the context of a previous * utf-8 string. */ - void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().probability_in_context_ = f; - // Set it for the sublangs too. - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) { - tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; - } - } +void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { + if (tesseract_ != nullptr) { + tesseract_->getDict().probability_in_context_ = f; + // Set it for the sublangs too. + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) { + tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; } + } +} #ifndef DISABLED_LEGACY_ENGINE /** Sets Wordrec::fill_lattice_ function to point to the given function. */ - void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { - if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; - } +void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { + if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; +} #endif // ndef DISABLED_LEGACY_ENGINE /** Common code for setting the image. */ - bool TessBaseAPI::InternalSetImage() { - if (tesseract_ == nullptr) { - tprintf("Please call Init before attempting to set an image.\n"); - return false; - } - if (thresholder_ == nullptr) - thresholder_ = new ImageThresholder; - ClearResults(); - return true; - } +bool TessBaseAPI::InternalSetImage() { + if (tesseract_ == nullptr) { + tprintf("Please call Init before attempting to set an image.\n"); + return false; + } + if (thresholder_ == nullptr) + thresholder_ = new ImageThresholder; + ClearResults(); + return true; +} /** * Run the thresholder to make the thresholded image, returned in pix, @@ -2331,155 +2331,155 @@ namespace tesseract { * to an existing pixDestroyable Pix. * The usual argument to Threshold is Tesseract::mutable_pix_binary(). */ - bool TessBaseAPI::Threshold(Pix** pix) { - ASSERT_HOST(pix != nullptr); - if (*pix != nullptr) - pixDestroy(pix); - // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = 0; - bool a = GetIntVariable("user_defined_dpi", &user_dpi); - int y_res = thresholder_->GetScaledYResolution(); - if (user_dpi && (user_dpi < kMinCredibleResolution || - user_dpi > kMaxCredibleResolution)) { - tprintf("Warning: User defined image dpi is outside of expected range " - "(%d - %d)!\n", - kMinCredibleResolution, kMaxCredibleResolution); - } - // Always use user defined dpi - if (user_dpi) { - thresholder_->SetSourceYResolution(user_dpi); - } else if (y_res < kMinCredibleResolution || - y_res > kMaxCredibleResolution) { - tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", - y_res, kMinCredibleResolution); - thresholder_->SetSourceYResolution(kMinCredibleResolution); - } - PageSegMode pageseg_mode = - static_cast( - static_cast(tesseract_->tessedit_pageseg_mode)); - if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; - thresholder_->GetImageSizes(&rect_left_, &rect_top_, - &rect_width_, &rect_height_, - &image_width_, &image_height_); - if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); - } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); - } - // Set the internal resolution that is used for layout parameters from the - // estimated resolution, rather than the image resolution, which may be - // fabricated, but we will use the image resolution, if there is one, to - // report output point sizes. - int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), - kMinCredibleResolution, - kMaxCredibleResolution); - if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { - tprintf("Estimated internal resolution %d out of range! " - "Corrected to %d.\n", - thresholder_->GetScaledEstimatedResolution(), estimated_res); - } - tesseract_->set_source_resolution(estimated_res); - SavePixForCrash(estimated_res, *pix); - return true; - } +bool TessBaseAPI::Threshold(Pix** pix) { + ASSERT_HOST(pix != nullptr); + if (*pix != nullptr) + pixDestroy(pix); + // Zero resolution messes up the algorithms, so make sure it is credible. + int user_dpi = 0; + bool a = GetIntVariable("user_defined_dpi", &user_dpi); + int y_res = thresholder_->GetScaledYResolution(); + if (user_dpi && (user_dpi < kMinCredibleResolution || + user_dpi > kMaxCredibleResolution)) { + tprintf("Warning: User defined image dpi is outside of expected range " + "(%d - %d)!\n", + kMinCredibleResolution, kMaxCredibleResolution); + } + // Always use user defined dpi + if (user_dpi) { + thresholder_->SetSourceYResolution(user_dpi); + } else if (y_res < kMinCredibleResolution || + y_res > kMaxCredibleResolution) { + tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", + y_res, kMinCredibleResolution); + thresholder_->SetSourceYResolution(kMinCredibleResolution); + } + PageSegMode pageseg_mode = + static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); + if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; + thresholder_->GetImageSizes(&rect_left_, &rect_top_, + &rect_width_, &rect_height_, + &image_width_, &image_height_); + if (!thresholder_->IsBinary()) { + tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + } else { + tesseract_->set_pix_thresholds(nullptr); + tesseract_->set_pix_grey(nullptr); + } + // Set the internal resolution that is used for layout parameters from the + // estimated resolution, rather than the image resolution, which may be + // fabricated, but we will use the image resolution, if there is one, to + // report output point sizes. + int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), + kMinCredibleResolution, + kMaxCredibleResolution); + if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { + tprintf("Estimated internal resolution %d out of range! " + "Corrected to %d.\n", + thresholder_->GetScaledEstimatedResolution(), estimated_res); + } + tesseract_->set_source_resolution(estimated_res); + SavePixForCrash(estimated_res, *pix); + return true; +} /** Find lines from the image making the BLOCK_LIST. */ - int TessBaseAPI::FindLines() { - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { - tprintf("Please call SetImage before attempting recognition.\n"); - return -1; - } - if (recognition_done_) - ClearResults(); - if (!block_list_->empty()) { - return 0; - } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract; -#ifndef DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); -#endif - } - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return -1; - } +int TessBaseAPI::FindLines() { + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (recognition_done_) + ClearResults(); + if (!block_list_->empty()) { + return 0; + } + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; + #ifndef DISABLED_LEGACY_ENGINE + tesseract_->InitAdaptiveClassifier(nullptr); + #endif + } + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return -1; + } - tesseract_->PrepareForPageseg(); + tesseract_->PrepareForPageseg(); #ifndef DISABLED_LEGACY_ENGINE - if (tesseract_->textord_equation_detect) { - if (equ_detect_ == nullptr && datapath_ != nullptr) { - equ_detect_ = new EquationDetect(datapath_->string(), nullptr); - } - if (equ_detect_ == nullptr) { - tprintf("Warning: Could not set equation detector\n"); - } else { - tesseract_->SetEquationDetect(equ_detect_); - } - } + if (tesseract_->textord_equation_detect) { + if (equ_detect_ == nullptr && datapath_ != nullptr) { + equ_detect_ = new EquationDetect(datapath_->string(), nullptr); + } + if (equ_detect_ == nullptr) { + tprintf("Warning: Could not set equation detector\n"); + } else { + tesseract_->SetEquationDetect(equ_detect_); + } + } #endif // ndef DISABLED_LEGACY_ENGINE - Tesseract* osd_tess = osd_tesseract_; - OSResults osr; - if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && - osd_tess == nullptr) { - if (strcmp(language_->string(), "osd") == 0) { - osd_tess = tesseract_; - } else { - osd_tesseract_ = new Tesseract; - TessdataManager mgr(reader_); - if (datapath_ == nullptr) { - tprintf("Warning: Auto orientation and script detection requested," - " but data path is undefined\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, - "osd", OEM_TESSERACT_ONLY, - nullptr, 0, nullptr, nullptr, - false, &mgr) == 0) { - osd_tess = osd_tesseract_; - osd_tesseract_->set_source_resolution( - thresholder_->GetSourceYResolution()); - } else { - tprintf("Warning: Auto orientation and script detection requested," - " but osd language failed to load\n"); - delete osd_tesseract_; - osd_tesseract_ = nullptr; - } - } - } + Tesseract* osd_tess = osd_tesseract_; + OSResults osr; + if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && + osd_tess == nullptr) { + if (strcmp(language_->string(), "osd") == 0) { + osd_tess = tesseract_; + } else { + osd_tesseract_ = new Tesseract; + TessdataManager mgr(reader_); + if (datapath_ == nullptr) { + tprintf("Warning: Auto orientation and script detection requested," + " but data path is undefined\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, + "osd", OEM_TESSERACT_ONLY, + nullptr, 0, nullptr, nullptr, + false, &mgr) == 0) { + osd_tess = osd_tesseract_; + osd_tesseract_->set_source_resolution( + thresholder_->GetSourceYResolution()); + } else { + tprintf("Warning: Auto orientation and script detection requested," + " but osd language failed to load\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } + } + } - if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) - return -1; + if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) + return -1; - // If Devanagari is being recognized, we use different images for page seg - // and for OCR. - tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); - return 0; - } + // If Devanagari is being recognized, we use different images for page seg + // and for OCR. + tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); + return 0; +} /** Delete the pageres and clear the block list ready for a new page. */ - void TessBaseAPI::ClearResults() { - if (tesseract_ != nullptr) { - tesseract_->Clear(); - } - delete page_res_; - page_res_ = nullptr; - recognition_done_ = false; - if (block_list_ == nullptr) - block_list_ = new BLOCK_LIST; - else - block_list_->clear(); - if (paragraph_models_ != nullptr) { - paragraph_models_->delete_data_pointers(); - delete paragraph_models_; - paragraph_models_ = nullptr; - } - SavePixForCrash(0, nullptr); - } +void TessBaseAPI::ClearResults() { + if (tesseract_ != nullptr) { + tesseract_->Clear(); + } + delete page_res_; + page_res_ = nullptr; + recognition_done_ = false; + if (block_list_ == nullptr) + block_list_ = new BLOCK_LIST; + else + block_list_->clear(); + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + SavePixForCrash(0, nullptr); +} /** * Return the length of the output text string, as UTF8, assuming @@ -2488,55 +2488,55 @@ namespace tesseract { * character. * Also return the number of recognized blobs in blob_count. */ - int TessBaseAPI::TextLength(int* blob_count) { - if (tesseract_ == nullptr || page_res_ == nullptr) - return 0; - - PAGE_RES_IT page_res_it(page_res_); - int total_length = 2; - int total_blobs = 0; - // Iterate over the data structures to extract the recognition result. - for (page_res_it.restart_page(); page_res_it.word () != nullptr; - page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); - WERD_CHOICE* choice = word->best_choice; - if (choice != nullptr) { - total_blobs += choice->length() + 2; - total_length += choice->unichar_string().length() + 2; - for (int i = 0; i < word->reject_map.length(); ++i) { - if (word->reject_map[i].rejected()) - ++total_length; - } - } - } - if (blob_count != nullptr) - *blob_count = total_blobs; - return total_length; +int TessBaseAPI::TextLength(int* blob_count) { + if (tesseract_ == nullptr || page_res_ == nullptr) + return 0; + + PAGE_RES_IT page_res_it(page_res_); + int total_length = 2; + int total_blobs = 0; + // Iterate over the data structures to extract the recognition result. + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + WERD_CHOICE* choice = word->best_choice; + if (choice != nullptr) { + total_blobs += choice->length() + 2; + total_length += choice->unichar_string().length() + 2; + for (int i = 0; i < word->reject_map.length(); ++i) { + if (word->reject_map[i].rejected()) + ++total_length; + } } + } + if (blob_count != nullptr) + *blob_count = total_blobs; + return total_length; +} #ifndef DISABLED_LEGACY_ENGINE /** * Estimates the Orientation And Script of the image. * Returns true if the image was processed successfully. */ - bool TessBaseAPI::DetectOS(OSResults* osr) { - if (tesseract_ == nullptr) - return false; - ClearResults(); - if (tesseract_->pix_binary() == nullptr && - !Threshold(tesseract_->mutable_pix_binary())) { - return false; - } +bool TessBaseAPI::DetectOS(OSResults* osr) { + if (tesseract_ == nullptr) + return false; + ClearResults(); + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return false; + } - if (input_file_ == nullptr) - input_file_ = new STRING(kInputFile); - return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; - } + if (input_file_ == nullptr) + input_file_ = new STRING(kInputFile); + return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; +} #endif // ndef DISABLED_LEGACY_ENGINE - void TessBaseAPI::set_min_orientation_margin(double margin) { - tesseract_->min_orientation_margin.set_value(margin); - } +void TessBaseAPI::set_min_orientation_margin(double margin) { + tesseract_->min_orientation_margin.set_value(margin); +} /** * Return text orientation of each block as determined in an earlier page layout @@ -2552,98 +2552,98 @@ namespace tesseract { * be less than the total number of blocks. The ordering is intended to be * consistent with GetTextLines(). */ - void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, - bool** vertical_writing) { - delete[] *block_orientation; - *block_orientation = nullptr; - delete[] *vertical_writing; - *vertical_writing = nullptr; - BLOCK_IT block_it(block_list_); - - block_it.move_to_first(); - int num_blocks = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - ++num_blocks; - } - if (!num_blocks) { - tprintf("WARNING: Found no blocks\n"); - return; - } - *block_orientation = new int[num_blocks]; - *vertical_writing = new bool[num_blocks]; - block_it.move_to_first(); - int i = 0; - for (block_it.mark_cycle_pt(); !block_it.cycled_list(); - block_it.forward()) { - if (!block_it.data()->pdblk.poly_block()->IsText()) { - continue; - } - FCOORD re_rotation = block_it.data()->re_rotation(); - float re_theta = re_rotation.angle(); - FCOORD classify_rotation = block_it.data()->classify_rotation(); - float classify_theta = classify_rotation.angle(); - double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; - if (rot_theta < 0) rot_theta += 4; - int num_rotations = static_cast(rot_theta + 0.5); - (*block_orientation)[i] = num_rotations; - // The classify_rotation is non-zero only if the text has vertical - // writing direction. - (*vertical_writing)[i] = classify_rotation.y() != 0.0f; - ++i; - } - } - - - void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { - int debug_level = 0; - GetIntVariable("paragraph_debug_level", &debug_level); - if (paragraph_models_ == nullptr) - paragraph_models_ = new GenericVector; - MutableIterator *result_it = GetMutableIterator(); - do { // Detect paragraphs for this block - GenericVector models; - ::tesseract::DetectParagraphs(debug_level, after_text_recognition, - result_it, &models); - *paragraph_models_ += models; - } while (result_it->Next(RIL_BLOCK)); - delete result_it; - } +void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing) { + delete[] *block_orientation; + *block_orientation = nullptr; + delete[] *vertical_writing; + *vertical_writing = nullptr; + BLOCK_IT block_it(block_list_); + + block_it.move_to_first(); + int num_blocks = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + ++num_blocks; + } + if (!num_blocks) { + tprintf("WARNING: Found no blocks\n"); + return; + } + *block_orientation = new int[num_blocks]; + *vertical_writing = new bool[num_blocks]; + block_it.move_to_first(); + int i = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + FCOORD re_rotation = block_it.data()->re_rotation(); + float re_theta = re_rotation.angle(); + FCOORD classify_rotation = block_it.data()->classify_rotation(); + float classify_theta = classify_rotation.angle(); + double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; + if (rot_theta < 0) rot_theta += 4; + int num_rotations = static_cast(rot_theta + 0.5); + (*block_orientation)[i] = num_rotations; + // The classify_rotation is non-zero only if the text has vertical + // writing direction. + (*vertical_writing)[i] = classify_rotation.y() != 0.0f; + ++i; + } +} + + +void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { + int debug_level = 0; + GetIntVariable("paragraph_debug_level", &debug_level); + if (paragraph_models_ == nullptr) + paragraph_models_ = new GenericVector; + MutableIterator *result_it = GetMutableIterator(); + do { // Detect paragraphs for this block + GenericVector models; + ::tesseract::DetectParagraphs(debug_level, after_text_recognition, + result_it, &models); + *paragraph_models_ += models; + } while (result_it->Next(RIL_BLOCK)); + delete result_it; +} /** This method returns the string form of the specified unichar. */ - const char* TessBaseAPI::GetUnichar(int unichar_id) { - return tesseract_->unicharset.id_to_unichar(unichar_id); - } +const char* TessBaseAPI::GetUnichar(int unichar_id) { + return tesseract_->unicharset.id_to_unichar(unichar_id); +} /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ - const Dawg *TessBaseAPI::GetDawg(int i) const { - if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; - return tesseract_->getDict().GetDawg(i); - } +const Dawg *TessBaseAPI::GetDawg(int i) const { + if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; + return tesseract_->getDict().GetDawg(i); +} /** Return the number of dawgs loaded into tesseract_ object. */ - int TessBaseAPI::NumDawgs() const { - return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); - } +int TessBaseAPI::NumDawgs() const { + return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); +} /** Escape a char string - remove <>&"' with HTML codes. */ - STRING HOcrEscape(const char* text) { - STRING ret; - const char *ptr; - for (ptr = text; *ptr; ptr++) { - switch (*ptr) { - case '<': ret += "<"; break; - case '>': ret += ">"; break; - case '&': ret += "&"; break; - case '"': ret += """; break; - case '\'': ret += "'"; break; - default: ret += *ptr; - } - } - return ret; +STRING HOcrEscape(const char* text) { + STRING ret; + const char *ptr; + for (ptr = text; *ptr; ptr++) { + switch (*ptr) { + case '<': ret += "<"; break; + case '>': ret += ">"; break; + case '&': ret += "&"; break; + case '"': ret += """; break; + case '\'': ret += "'"; break; + default: ret += *ptr; } + } + return ret; +} #ifndef DISABLED_LEGACY_ENGINE @@ -2653,271 +2653,271 @@ namespace tesseract { // Ocropus add-ons. /** Find lines from the image making the BLOCK_LIST. */ - BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { - ASSERT_HOST(FindLines() == 0); - BLOCK_LIST* result = block_list_; - block_list_ = nullptr; - return result; - } +BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { + ASSERT_HOST(FindLines() == 0); + BLOCK_LIST* result = block_list_; + block_list_ = nullptr; + return result; +} /** * Delete a block list. * This is to keep BLOCK_LIST pointer opaque * and let go of including the other headers. */ - void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { - delete block_list; - } - - - ROW *TessBaseAPI::MakeTessOCRRow(float baseline, - float xheight, - float descender, - float ascender) { - int32_t xstarts[] = {-32000}; - double quad_coeffs[] = {0, 0, baseline}; - return new ROW(1, - xstarts, - quad_coeffs, - xheight, - ascender - (baseline + xheight), - descender - baseline, - 0, - 0); - } +void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { + delete block_list; +} + + +ROW *TessBaseAPI::MakeTessOCRRow(float baseline, + float xheight, + float descender, + float ascender) { + int32_t xstarts[] = {-32000}; + double quad_coeffs[] = {0, 0, baseline}; + return new ROW(1, + xstarts, + quad_coeffs, + xheight, + ascender - (baseline + xheight), + descender - baseline, + 0, + 0); +} /** Creates a TBLOB* from the whole pix. */ - TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { - int width = pixGetWidth(pix); - int height = pixGetHeight(pix); - BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); - - // Create C_BLOBs from the page - extract_edges(pix, &block); - - // Merge all C_BLOBs - C_BLOB_LIST *list = block.blob_list(); - C_BLOB_IT c_blob_it(list); - if (c_blob_it.empty()) - return nullptr; - // Move all the outlines to the first blob. - C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); - for (c_blob_it.forward(); - !c_blob_it.at_first(); - c_blob_it.forward()) { - C_BLOB *c_blob = c_blob_it.data(); - ol_it.add_list_after(c_blob->out_list()); - } - // Convert the first blob to the output TBLOB. - return TBLOB::PolygonalCopy(false, c_blob_it.data()); - } +TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); + + // Create C_BLOBs from the page + extract_edges(pix, &block); + + // Merge all C_BLOBs + C_BLOB_LIST *list = block.blob_list(); + C_BLOB_IT c_blob_it(list); + if (c_blob_it.empty()) + return nullptr; + // Move all the outlines to the first blob. + C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); + for (c_blob_it.forward(); + !c_blob_it.at_first(); + c_blob_it.forward()) { + C_BLOB *c_blob = c_blob_it.data(); + ol_it.add_list_after(c_blob->out_list()); + } + // Convert the first blob to the output TBLOB. + return TBLOB::PolygonalCopy(false, c_blob_it.data()); +} /** * This method baseline normalizes a TBLOB in-place. The input row is used * for normalization. The denorm is an optional parameter in which the * normalization-antidote is returned. */ - void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { - TBOX box = tblob->bounding_box(); - float x_center = (box.left() + box.right()) / 2.0f; - float baseline = row->base_line(x_center); - float scale = kBlnXHeight / row->x_height(); - tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); - } +void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { + TBOX box = tblob->bounding_box(); + float x_center = (box.left() + box.right()) / 2.0f; + float baseline = row->base_line(x_center); + float scale = kBlnXHeight / row->x_height(); + tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, + 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); +} /** * Return a TBLOB * from the whole pix. * To be freed later with delete. */ - static TBLOB *make_tesseract_blob(float baseline, float xheight, - float descender, float ascender, - bool numeric_mode, Pix* pix) { - TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); - - // Normalize TBLOB - ROW *row = - TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); - TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); - delete row; - return tblob; - } +static TBLOB *make_tesseract_blob(float baseline, float xheight, + float descender, float ascender, + bool numeric_mode, Pix* pix) { + TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); + + // Normalize TBLOB + ROW *row = + TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); + delete row; + return tblob; +} /** * Adapt to recognize the current image as the given character. * The image must be preloaded into pix_binary_ and be just an image * of a single character. */ - void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender) { - UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); - TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, - tesseract_->classify_bln_numeric_mode, - tesseract_->pix_binary()); - float threshold; - float best_rating = -100; - - - // Classify to get a raw choice. - BLOB_CHOICE_LIST choices; - tesseract_->AdaptiveClassifier(blob, &choices); - BLOB_CHOICE_IT choice_it; - choice_it.set_to_list(&choices); - for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); - choice_it.forward()) { - if (choice_it.data()->rating() > best_rating) { - best_rating = choice_it.data()->rating(); - } - } - - threshold = tesseract_->matcher_good_threshold; - - if (blob->outlines) - tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, - tesseract_->AdaptedTemplates); - delete blob; - } - - - PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { - PAGE_RES *page_res = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); - return page_res; +void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, + int length, + float baseline, + float xheight, + float descender, + float ascender) { + UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); + TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, + tesseract_->classify_bln_numeric_mode, + tesseract_->pix_binary()); + float threshold; + float best_rating = -100; + + + // Classify to get a raw choice. + BLOB_CHOICE_LIST choices; + tesseract_->AdaptiveClassifier(blob, &choices); + BLOB_CHOICE_IT choice_it; + choice_it.set_to_list(&choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + if (choice_it.data()->rating() > best_rating) { + best_rating = choice_it.data()->rating(); } + } - PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result) { - if (!pass1_result) - pass1_result = new PAGE_RES(false, block_list, - &(tesseract_->prev_word_best_choice_)); - tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); - return pass1_result; - } - - struct TESS_CHAR : ELIST_LINK { - char *unicode_repr; - int length; // of unicode_repr - float cost; - TBOX box; - - TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { - length = (len == -1 ? strlen(repr) : len); - unicode_repr = new char[length + 1]; - strncpy(unicode_repr, repr, length); - } + threshold = tesseract_->matcher_good_threshold; + + if (blob->outlines) + tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, + tesseract_->AdaptedTemplates); + delete blob; +} + + +PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { + PAGE_RES *page_res = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); + return page_res; +} + +PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, + PAGE_RES* pass1_result) { + if (!pass1_result) + pass1_result = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); + return pass1_result; +} + +struct TESS_CHAR : ELIST_LINK { + char *unicode_repr; + int length; // of unicode_repr + float cost; + TBOX box; + + TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { + length = (len == -1 ? strlen(repr) : len); + unicode_repr = new char[length + 1]; + strncpy(unicode_repr, repr, length); + } - TESS_CHAR() - : unicode_repr(nullptr), - length(0), - cost(0.0f) - { // Satisfies ELISTIZE. - } - ~TESS_CHAR() { - delete [] unicode_repr; - } - }; + TESS_CHAR() + : unicode_repr(nullptr), + length(0), + cost(0.0f) + { // Satisfies ELISTIZE. + } + ~TESS_CHAR() { + delete [] unicode_repr; + } +}; - ELISTIZEH(TESS_CHAR) - ELISTIZE(TESS_CHAR) +ELISTIZEH(TESS_CHAR) +ELISTIZE(TESS_CHAR) - static void add_space(TESS_CHAR_IT* it) { - TESS_CHAR *t = new TESS_CHAR(0, " "); - it->add_after_then_move(t); - } +static void add_space(TESS_CHAR_IT* it) { + TESS_CHAR *t = new TESS_CHAR(0, " "); + it->add_after_then_move(t); +} - static float rating_to_cost(float rating) { - rating = 100 + rating; - // cuddled that to save from coverage profiler - // (I have never seen ratings worse than -100, - // but the check won't hurt) - if (rating < 0) rating = 0; - return rating; - } +static float rating_to_cost(float rating) { + rating = 100 + rating; + // cuddled that to save from coverage profiler + // (I have never seen ratings worse than -100, + // but the check won't hurt) + if (rating < 0) rating = 0; + return rating; +} /** * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ - static void extract_result(TESS_CHAR_IT* out, - PAGE_RES* page_res) { - PAGE_RES_IT page_res_it(page_res); - int word_count = 0; - while (page_res_it.word() != nullptr) { - WERD_RES *word = page_res_it.word(); - const char *str = word->best_choice->unichar_string().string(); - const char *len = word->best_choice->unichar_lengths().string(); - TBOX real_rect = word->word->bounding_box(); - - if (word_count) - add_space(out); - int n = strlen(len); - for (int i = 0; i < n; i++) { - TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), - str, *len); - tc->box = real_rect.intersection(word->box_word->BlobBox(i)); - out->add_after_then_move(tc); - str += *len; - len++; - } - page_res_it.forward(); - word_count++; - } - } +static void extract_result(TESS_CHAR_IT* out, + PAGE_RES* page_res) { + PAGE_RES_IT page_res_it(page_res); + int word_count = 0; + while (page_res_it.word() != nullptr) { + WERD_RES *word = page_res_it.word(); + const char *str = word->best_choice->unichar_string().string(); + const char *len = word->best_choice->unichar_lengths().string(); + TBOX real_rect = word->word->bounding_box(); + + if (word_count) + add_space(out); + int n = strlen(len); + for (int i = 0; i < n; i++) { + TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), + str, *len); + tc->box = real_rect.intersection(word->box_word->BlobBox(i)); + out->add_after_then_move(tc); + str += *len; + len++; + } + page_res_it.forward(); + word_count++; + } +} /** * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ - int TessBaseAPI::TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res) { - TESS_CHAR_LIST tess_chars; - TESS_CHAR_IT tess_chars_it(&tess_chars); - extract_result(&tess_chars_it, page_res); - tess_chars_it.move_to_first(); - int n = tess_chars.length(); - int text_len = 0; - *lengths = new int[n]; - *costs = new float[n]; - *x0 = new int[n]; - *y0 = new int[n]; - *x1 = new int[n]; - *y1 = new int[n]; - int i = 0; - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward(), i++) { - TESS_CHAR *tc = tess_chars_it.data(); - text_len += (*lengths)[i] = tc->length; - (*costs)[i] = tc->cost; - (*x0)[i] = tc->box.left(); - (*y0)[i] = tc->box.bottom(); - (*x1)[i] = tc->box.right(); - (*y1)[i] = tc->box.top(); - } - char *p = *text = new char[text_len]; - - tess_chars_it.move_to_first(); - for (tess_chars_it.mark_cycle_pt(); - !tess_chars_it.cycled_list(); - tess_chars_it.forward()) { - TESS_CHAR *tc = tess_chars_it.data(); - strncpy(p, tc->unicode_repr, tc->length); - p += tc->length; - } - return n; - } +int TessBaseAPI::TesseractExtractResult(char** text, + int** lengths, + float** costs, + int** x0, + int** y0, + int** x1, + int** y1, + PAGE_RES* page_res) { + TESS_CHAR_LIST tess_chars; + TESS_CHAR_IT tess_chars_it(&tess_chars); + extract_result(&tess_chars_it, page_res); + tess_chars_it.move_to_first(); + int n = tess_chars.length(); + int text_len = 0; + *lengths = new int[n]; + *costs = new float[n]; + *x0 = new int[n]; + *y0 = new int[n]; + *x1 = new int[n]; + *y1 = new int[n]; + int i = 0; + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward(), i++) { + TESS_CHAR *tc = tess_chars_it.data(); + text_len += (*lengths)[i] = tc->length; + (*costs)[i] = tc->cost; + (*x0)[i] = tc->box.left(); + (*y0)[i] = tc->box.bottom(); + (*x1)[i] = tc->box.right(); + (*y1)[i] = tc->box.top(); + } + char *p = *text = new char[text_len]; + + tess_chars_it.move_to_first(); + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward()) { + TESS_CHAR *tc = tess_chars_it.data(); + strncpy(p, tc->unicode_repr, tc->length); + p += tc->length; + } + return n; +} /** This method returns the features associated with the input blob. */ // The resulting features are returned in int_features, which must be @@ -2926,80 +2926,80 @@ namespace tesseract { // On return feature_outline_index is filled with an index of the outline // corresponding to each feature in int_features. // TODO(rays) Fix the caller to out outline_counts instead. - void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, - INT_FEATURE_STRUCT* int_features, - int* num_features, - int* feature_outline_index) { - GenericVector outline_counts; - GenericVector bl_features; - GenericVector cn_features; - INT_FX_RESULT_STRUCT fx_info; - tesseract_->ExtractFeatures(*blob, false, &bl_features, - &cn_features, &fx_info, &outline_counts); - if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { - *num_features = 0; - return; // Feature extraction failed. - } - *num_features = cn_features.size(); - memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); - // TODO(rays) Pass outline_counts back and simplify the calling code. - if (feature_outline_index != nullptr) { - int f = 0; - for (int i = 0; i < outline_counts.size(); ++i) { - while (f < outline_counts[i]) - feature_outline_index[f++] = i; - } - } +void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, + INT_FEATURE_STRUCT* int_features, + int* num_features, + int* feature_outline_index) { + GenericVector outline_counts; + GenericVector bl_features; + GenericVector cn_features; + INT_FX_RESULT_STRUCT fx_info; + tesseract_->ExtractFeatures(*blob, false, &bl_features, + &cn_features, &fx_info, &outline_counts); + if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { + *num_features = 0; + return; // Feature extraction failed. + } + *num_features = cn_features.size(); + memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); + // TODO(rays) Pass outline_counts back and simplify the calling code. + if (feature_outline_index != nullptr) { + int f = 0; + for (int i = 0; i < outline_counts.size(); ++i) { + while (f < outline_counts[i]) + feature_outline_index[f++] = i; } + } +} // This method returns the row to which a box of specified dimensions would // belong. If no good match is found, it returns nullptr. - ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, - int left, int top, int right, int bottom) { - TBOX box(left, bottom, right, top); - BLOCK_IT b_it(blocks); - for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { - BLOCK* block = b_it.data(); - if (!box.major_overlap(block->pdblk.bounding_box())) - continue; - ROW_IT r_it(block->row_list()); - for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { - ROW* row = r_it.data(); - if (!box.major_overlap(row->bounding_box())) - continue; - WERD_IT w_it(row->word_list()); - for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { - WERD* word = w_it.data(); - if (box.major_overlap(word->bounding_box())) - return row; - } - } - } - return nullptr; +ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, + int left, int top, int right, int bottom) { + TBOX box(left, bottom, right, top); + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + if (!box.major_overlap(block->pdblk.bounding_box())) + continue; + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { + ROW* row = r_it.data(); + if (!box.major_overlap(row->bounding_box())) + continue; + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (box.major_overlap(word->bounding_box())) + return row; + } } + } + return nullptr; +} /** Method to run adaptive classifier on a blob. */ - void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned) { - BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; - tesseract_->AdaptiveClassifier(blob, choices); - BLOB_CHOICE_IT choices_it(choices); - int& index = *num_matches_returned; - index = 0; - for (choices_it.mark_cycle_pt(); - !choices_it.cycled_list() && index < num_max_matches; - choices_it.forward()) { - BLOB_CHOICE* choice = choices_it.data(); - unichar_ids[index] = choice->unichar_id(); - ratings[index] = choice->rating(); - ++index; - } - *num_matches_returned = index; - delete choices; - } +void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, + int num_max_matches, + int* unichar_ids, + float* ratings, + int* num_matches_returned) { + BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; + tesseract_->AdaptiveClassifier(blob, choices); + BLOB_CHOICE_IT choices_it(choices); + int& index = *num_matches_returned; + index = 0; + for (choices_it.mark_cycle_pt(); + !choices_it.cycled_list() && index < num_max_matches; + choices_it.forward()) { + BLOB_CHOICE* choice = choices_it.data(); + unichar_ids[index] = choice->unichar_id(); + ratings[index] = choice->rating(); + ++index; + } + *num_matches_returned = index; + delete choices; +} #endif // ndef DISABLED_LEGACY_ENGINE } // namespace tesseract. diff --git a/src/api/baseapi.h b/src/api/baseapi.h index f82dfa2d56..efa97ecd8f 100644 --- a/src/api/baseapi.h +++ b/src/api/baseapi.h @@ -61,34 +61,34 @@ struct TBLOB; namespace tesseract { - class Dawg; - class Dict; - class EquationDetect; - class PageIterator; - class LTRResultIterator; - class ResultIterator; - class MutableIterator; - class TessResultRenderer; - class Tesseract; - class Trie; - class Wordrec; - - typedef int (Dict::*DictFunc)(void* void_dawg_args, - const UNICHARSET& unicharset, - UNICHAR_ID unichar_id, bool word_end) const; - typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, - const char* context, - int context_bytes, - const char* character, - int character_bytes); - typedef float (Dict::*ParamsModelClassifyFunc)( - const char *lang, void *path); - typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, - const WERD_CHOICE_LIST &best_choices, - const UNICHARSET &unicharset, - BlamerBundle *blamer_bundle); - typedef TessCallback4 - TruthCallback; +class Dawg; +class Dict; +class EquationDetect; +class PageIterator; +class LTRResultIterator; +class ResultIterator; +class MutableIterator; +class TessResultRenderer; +class Tesseract; +class Trie; +class Wordrec; + +typedef int (Dict::*DictFunc)(void* void_dawg_args, + const UNICHARSET& unicharset, + UNICHAR_ID unichar_id, bool word_end) const; +typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, + const char* context, + int context_bytes, + const char* character, + int character_bytes); +typedef float (Dict::*ParamsModelClassifyFunc)( + const char *lang, void *path); +typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, + const WERD_CHOICE_LIST &best_choices, + const UNICHARSET &unicharset, + BlamerBundle *blamer_bundle); +typedef TessCallback4 + TruthCallback; /** * Base class for all tesseract APIs. @@ -98,842 +98,842 @@ namespace tesseract { * class to hide the data types so that users of this class don't have to * include any other Tesseract headers. */ - class TESS_API TessBaseAPI { - public: - TessBaseAPI(); - virtual ~TessBaseAPI(); - - /** - * Returns the version identifier as a static string. Do not delete. - */ - static const char* Version(); - - /** - * If compiled with OpenCL AND an available OpenCL - * device is deemed faster than serial code, then - * "device" is populated with the cl_device_id - * and returns sizeof(cl_device_id) - * otherwise *device=nullptr and returns 0. - */ - static size_t getOpenCLDevice(void **device); - - /** - * Writes the thresholded image to stderr as a PBM file on receipt of a - * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). - */ - static void CatchSignals(); - - /** - * Set the name of the input file. Needed for training and - * reading a UNLV zone file, and for searchable PDF output. - */ - void SetInputName(const char* name); - /** - * These functions are required for searchable PDF output. - * We need our hands on the input file so that we can include - * it in the PDF without transcoding. If that is not possible, - * we need the original image. Finally, resolution metadata - * is stored in the PDF so we need that as well. - */ - const char* GetInputName(); - // Takes ownership of the input pix. - void SetInputImage(Pix *pix); - Pix* GetInputImage(); - int GetSourceYResolution(); - const char* GetDatapath(); - - /** Set the name of the bonus output files. Needed only for debugging. */ - void SetOutputName(const char* name); - - /** - * Set the value of an internal "parameter." - * Supply the name of the parameter and the value as a string, just as - * you would in a config file. - * Returns false if the name lookup failed. - * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. - * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. - * SetVariable may be used before Init, but settings will revert to - * defaults on End(). - * - * Note: Must be called after Init(). Only works for non-init variables - * (init variables should be passed to Init()). - */ - bool SetVariable(const char* name, const char* value); - bool SetDebugVariable(const char* name, const char* value); - - /** - * Returns true if the parameter was found among Tesseract parameters. - * Fills in value with the value of the parameter. - */ - bool GetIntVariable(const char *name, int *value) const; - bool GetBoolVariable(const char *name, bool *value) const; - bool GetDoubleVariable(const char *name, double *value) const; - - /** - * Returns the pointer to the string that represents the value of the - * parameter if it was found among Tesseract parameters. - */ - const char *GetStringVariable(const char *name) const; - - /** - * Print Tesseract parameters to the given file. - */ - void PrintVariables(FILE *fp) const; - - /** - * Get value of named variable as a string, if it exists. - */ - bool GetVariableAsString(const char *name, STRING *val); - - /** - * Instances are now mostly thread-safe and totally independent, - * but some global parameters remain. Basically it is safe to use multiple - * TessBaseAPIs in different threads in parallel, UNLESS: - * you use SetVariable on some of the Params in classify and textord. - * If you do, then the effect will be to change it for all your instances. - * - * Start tesseract. Returns zero on success and -1 on failure. - * NOTE that the only members that may be called before Init are those - * listed above here in the class definition. - * - * The datapath must be the name of the parent directory of tessdata and - * must end in / . Any name after the last / will be stripped. - * The language is (usually) an ISO 639-3 string or nullptr will default to eng. - * It is entirely safe (and eventually will be efficient too) to call - * Init multiple times on the same instance to change language, or just - * to reset the classifier. - * The language may be a string of the form [~][+[~]]* indicating - * that multiple languages are to be loaded. Eg hin+eng will load Hindi and - * English. Languages may specify internally that they want to be loaded - * with one or more other languages, so the ~ sign is available to override - * that. Eg if hin were set to load eng by default, then hin+~eng would force - * loading only hin. The number of loaded languages is limited only by - * memory, with the caveat that loading additional languages will impact - * both speed and accuracy, as there is more work to do to decide on the - * applicable language, and there is more chance of hallucinating incorrect - * words. - * WARNING: On changing languages, all Tesseract parameters are reset - * back to their default values. (Which may vary between languages.) - * If you have a rare need to set a Variable that controls - * initialization for a second call to Init you should explicitly - * call End() and then use SetVariable before Init. This is only a very - * rare use case, since there are very few uses that require any parameters - * to be set before Init. - * - * If set_only_non_debug_params is true, only params that do not contain - * "debug" in the name will be set. - */ - int Init(const char* datapath, const char* language, OcrEngineMode mode, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, - bool set_only_non_debug_params); - int Init(const char* datapath, const char* language, OcrEngineMode oem) { - return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); - } - int Init(const char* datapath, const char* language) { - return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); - } - // In-memory version reads the traineddata file directly from the given - // data[data_size] array, and/or reads data via a FileReader. - int Init(const char* data, int data_size, const char* language, - OcrEngineMode mode, char** configs, int configs_size, - const GenericVector* vars_vec, - const GenericVector* vars_values, - bool set_only_non_debug_params, FileReader reader); - - /** - * Returns the languages string used in the last valid initialization. - * If the last initialization specified "deu+hin" then that will be - * returned. If hin loaded eng automatically as well, then that will - * not be included in this list. To find the languages actually - * loaded use GetLoadedLanguagesAsVector. - * The returned string should NOT be deleted. - */ - const char* GetInitLanguagesAsString() const; - - /** - * Returns the loaded languages in the vector of STRINGs. - * Includes all languages loaded by the last Init, including those loaded - * as dependencies of other loaded languages. - */ - void GetLoadedLanguagesAsVector(GenericVector* langs) const; - - /** - * Returns the available languages in the sorted vector of STRINGs. - */ - void GetAvailableLanguagesAsVector(GenericVector* langs) const; - - /** - * Init only the lang model component of Tesseract. The only functions - * that work after this init are SetVariable and IsValidWord. - * WARNING: temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ - int InitLangMod(const char* datapath, const char* language); - - /** - * Init only for page layout analysis. Use only for calls to SetImage and - * AnalysePage. Calls that attempt recognition will generate an error. - */ - void InitForAnalysePage(); - - /** - * Read a "config" file containing a set of param, value pairs. - * Searches the standard places: tessdata/configs, tessdata/tessconfigs - * and also accepts a relative or absolute path name. - * Note: only non-init params will be set (init params are set by Init()). - */ - void ReadConfigFile(const char* filename); - /** Same as above, but only set debug params from the given config file. */ - void ReadDebugConfigFile(const char* filename); - - /** - * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. - * The mode is stored as an IntParam so it can also be modified by - * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). - */ - void SetPageSegMode(PageSegMode mode); - - /** Return the current page segmentation mode. */ - PageSegMode GetPageSegMode() const; - - /** - * Recognize a rectangle from an image and return the result as a string. - * May be called many times for a single Init. - * Currently has no error checking. - * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. - * Palette color images will not work properly and must be converted to - * 24 bit. - * Binary images of 1 bit per pixel may also be given but they must be - * byte packed with the MSB of the first byte being the first pixel, and a - * 1 represents WHITE. For binary images set bytes_per_pixel=0. - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * - * Note that TesseractRect is the simplified convenience interface. - * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, - * and one or more of the Get*Text functions below. - */ - char* TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); - - /** - * Call between pages or documents etc to free up memory and forget - * adaptive data. - */ - void ClearAdaptiveClassifier(); - - /** - * @defgroup AdvancedAPI Advanced API - * The following methods break TesseractRect into pieces, so you can - * get hold of the thresholded image, get the text in different formats, - * get bounding boxes, confidences etc. - */ - /* @{ */ - - /** - * Provide an image for Tesseract to recognize. Format is as - * TesseractRect above. Copies the image buffer and converts to Pix. - * SetImage clears all recognition results, and sets the rectangle to the - * full image, so it may be followed immediately by a GetUTF8Text, and it - * will automatically perform recognition. - */ - void SetImage(const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line); - - /** - * Provide an image for Tesseract to recognize. As with SetImage above, - * Tesseract takes its own copy of the image, so it need not persist until - * after Recognize. - * Pix vs raw, which to use? - * Use Pix where possible. Tesseract uses Pix as its internal representation - * and it is therefore more efficient to provide a Pix directly. - */ - void SetImage(Pix* pix); - - /** - * Set the resolution of the source image in pixels per inch so font size - * information can be calculated in results. Call this after SetImage(). - */ - void SetSourceResolution(int ppi); - - /** - * Restrict recognition to a sub-rectangle of the image. Call after SetImage. - * Each SetRectangle clears the recogntion results so multiple rectangles - * can be recognized with the same image. - */ - void SetRectangle(int left, int top, int width, int height); - - /** - * In extreme cases only, usually with a subclass of Thresholder, it - * is possible to provide a different Thresholder. The Thresholder may - * be preloaded with an image, settings etc, or they may be set after. - * Note that Tesseract takes ownership of the Thresholder and will - * delete it when it it is replaced or the API is destructed. - */ - void SetThresholder(ImageThresholder* thresholder) { - delete thresholder_; - thresholder_ = thresholder; - ClearResults(); - } - - /** - * Get a copy of the internal thresholded image from Tesseract. - * Caller takes ownership of the Pix and must pixDestroy it. - * May be called any time after SetImage, or after TesseractRect. - */ - Pix* GetThresholdedImage(); - - /** - * Get the result of page layout analysis as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ - Boxa* GetRegions(Pixa** pixa); - - /** - * Get the textlines as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If raw_image is true, then extract from the original image instead of the - * thresholded image and pad by raw_padding pixels. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - * If paraids is not nullptr, the paragraph-id of each line within its block is - * also returned as an array of one element per line. delete [] after use. - */ - Boxa* GetTextlines(const bool raw_image, const int raw_padding, +class TESS_API TessBaseAPI { + public: + TessBaseAPI(); + virtual ~TessBaseAPI(); + + /** + * Returns the version identifier as a static string. Do not delete. + */ + static const char* Version(); + + /** + * If compiled with OpenCL AND an available OpenCL + * device is deemed faster than serial code, then + * "device" is populated with the cl_device_id + * and returns sizeof(cl_device_id) + * otherwise *device=nullptr and returns 0. + */ + static size_t getOpenCLDevice(void **device); + + /** + * Writes the thresholded image to stderr as a PBM file on receipt of a + * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). + */ + static void CatchSignals(); + + /** + * Set the name of the input file. Needed for training and + * reading a UNLV zone file, and for searchable PDF output. + */ + void SetInputName(const char* name); + /** + * These functions are required for searchable PDF output. + * We need our hands on the input file so that we can include + * it in the PDF without transcoding. If that is not possible, + * we need the original image. Finally, resolution metadata + * is stored in the PDF so we need that as well. + */ + const char* GetInputName(); + // Takes ownership of the input pix. + void SetInputImage(Pix *pix); + Pix* GetInputImage(); + int GetSourceYResolution(); + const char* GetDatapath(); + + /** Set the name of the bonus output files. Needed only for debugging. */ + void SetOutputName(const char* name); + + /** + * Set the value of an internal "parameter." + * Supply the name of the parameter and the value as a string, just as + * you would in a config file. + * Returns false if the name lookup failed. + * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. + * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. + * SetVariable may be used before Init, but settings will revert to + * defaults on End(). + * + * Note: Must be called after Init(). Only works for non-init variables + * (init variables should be passed to Init()). + */ + bool SetVariable(const char* name, const char* value); + bool SetDebugVariable(const char* name, const char* value); + + /** + * Returns true if the parameter was found among Tesseract parameters. + * Fills in value with the value of the parameter. + */ + bool GetIntVariable(const char *name, int *value) const; + bool GetBoolVariable(const char *name, bool *value) const; + bool GetDoubleVariable(const char *name, double *value) const; + + /** + * Returns the pointer to the string that represents the value of the + * parameter if it was found among Tesseract parameters. + */ + const char *GetStringVariable(const char *name) const; + + /** + * Print Tesseract parameters to the given file. + */ + void PrintVariables(FILE *fp) const; + + /** + * Get value of named variable as a string, if it exists. + */ + bool GetVariableAsString(const char *name, STRING *val); + + /** + * Instances are now mostly thread-safe and totally independent, + * but some global parameters remain. Basically it is safe to use multiple + * TessBaseAPIs in different threads in parallel, UNLESS: + * you use SetVariable on some of the Params in classify and textord. + * If you do, then the effect will be to change it for all your instances. + * + * Start tesseract. Returns zero on success and -1 on failure. + * NOTE that the only members that may be called before Init are those + * listed above here in the class definition. + * + * The datapath must be the name of the parent directory of tessdata and + * must end in / . Any name after the last / will be stripped. + * The language is (usually) an ISO 639-3 string or nullptr will default to eng. + * It is entirely safe (and eventually will be efficient too) to call + * Init multiple times on the same instance to change language, or just + * to reset the classifier. + * The language may be a string of the form [~][+[~]]* indicating + * that multiple languages are to be loaded. Eg hin+eng will load Hindi and + * English. Languages may specify internally that they want to be loaded + * with one or more other languages, so the ~ sign is available to override + * that. Eg if hin were set to load eng by default, then hin+~eng would force + * loading only hin. The number of loaded languages is limited only by + * memory, with the caveat that loading additional languages will impact + * both speed and accuracy, as there is more work to do to decide on the + * applicable language, and there is more chance of hallucinating incorrect + * words. + * WARNING: On changing languages, all Tesseract parameters are reset + * back to their default values. (Which may vary between languages.) + * If you have a rare need to set a Variable that controls + * initialization for a second call to Init you should explicitly + * call End() and then use SetVariable before Init. This is only a very + * rare use case, since there are very few uses that require any parameters + * to be set before Init. + * + * If set_only_non_debug_params is true, only params that do not contain + * "debug" in the name will be set. + */ + int Init(const char* datapath, const char* language, OcrEngineMode mode, + char **configs, int configs_size, + const GenericVector *vars_vec, + const GenericVector *vars_values, + bool set_only_non_debug_params); + int Init(const char* datapath, const char* language, OcrEngineMode oem) { + return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); + } + int Init(const char* datapath, const char* language) { + return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); + } + // In-memory version reads the traineddata file directly from the given + // data[data_size] array, and/or reads data via a FileReader. + int Init(const char* data, int data_size, const char* language, + OcrEngineMode mode, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, + bool set_only_non_debug_params, FileReader reader); + + /** + * Returns the languages string used in the last valid initialization. + * If the last initialization specified "deu+hin" then that will be + * returned. If hin loaded eng automatically as well, then that will + * not be included in this list. To find the languages actually + * loaded use GetLoadedLanguagesAsVector. + * The returned string should NOT be deleted. + */ + const char* GetInitLanguagesAsString() const; + + /** + * Returns the loaded languages in the vector of STRINGs. + * Includes all languages loaded by the last Init, including those loaded + * as dependencies of other loaded languages. + */ + void GetLoadedLanguagesAsVector(GenericVector* langs) const; + + /** + * Returns the available languages in the sorted vector of STRINGs. + */ + void GetAvailableLanguagesAsVector(GenericVector* langs) const; + + /** + * Init only the lang model component of Tesseract. The only functions + * that work after this init are SetVariable and IsValidWord. + * WARNING: temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ + int InitLangMod(const char* datapath, const char* language); + + /** + * Init only for page layout analysis. Use only for calls to SetImage and + * AnalysePage. Calls that attempt recognition will generate an error. + */ + void InitForAnalysePage(); + + /** + * Read a "config" file containing a set of param, value pairs. + * Searches the standard places: tessdata/configs, tessdata/tessconfigs + * and also accepts a relative or absolute path name. + * Note: only non-init params will be set (init params are set by Init()). + */ + void ReadConfigFile(const char* filename); + /** Same as above, but only set debug params from the given config file. */ + void ReadDebugConfigFile(const char* filename); + + /** + * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. + * The mode is stored as an IntParam so it can also be modified by + * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). + */ + void SetPageSegMode(PageSegMode mode); + + /** Return the current page segmentation mode. */ + PageSegMode GetPageSegMode() const; + + /** + * Recognize a rectangle from an image and return the result as a string. + * May be called many times for a single Init. + * Currently has no error checking. + * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + * Palette color images will not work properly and must be converted to + * 24 bit. + * Binary images of 1 bit per pixel may also be given but they must be + * byte packed with the MSB of the first byte being the first pixel, and a + * 1 represents WHITE. For binary images set bytes_per_pixel=0. + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * + * Note that TesseractRect is the simplified convenience interface. + * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, + * and one or more of the Get*Text functions below. + */ + char* TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height); + + /** + * Call between pages or documents etc to free up memory and forget + * adaptive data. + */ + void ClearAdaptiveClassifier(); + + /** + * @defgroup AdvancedAPI Advanced API + * The following methods break TesseractRect into pieces, so you can + * get hold of the thresholded image, get the text in different formats, + * get bounding boxes, confidences etc. + */ + /* @{ */ + + /** + * Provide an image for Tesseract to recognize. Format is as + * TesseractRect above. Copies the image buffer and converts to Pix. + * SetImage clears all recognition results, and sets the rectangle to the + * full image, so it may be followed immediately by a GetUTF8Text, and it + * will automatically perform recognition. + */ + void SetImage(const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line); + + /** + * Provide an image for Tesseract to recognize. As with SetImage above, + * Tesseract takes its own copy of the image, so it need not persist until + * after Recognize. + * Pix vs raw, which to use? + * Use Pix where possible. Tesseract uses Pix as its internal representation + * and it is therefore more efficient to provide a Pix directly. + */ + void SetImage(Pix* pix); + + /** + * Set the resolution of the source image in pixels per inch so font size + * information can be calculated in results. Call this after SetImage(). + */ + void SetSourceResolution(int ppi); + + /** + * Restrict recognition to a sub-rectangle of the image. Call after SetImage. + * Each SetRectangle clears the recogntion results so multiple rectangles + * can be recognized with the same image. + */ + void SetRectangle(int left, int top, int width, int height); + + /** + * In extreme cases only, usually with a subclass of Thresholder, it + * is possible to provide a different Thresholder. The Thresholder may + * be preloaded with an image, settings etc, or they may be set after. + * Note that Tesseract takes ownership of the Thresholder and will + * delete it when it it is replaced or the API is destructed. + */ + void SetThresholder(ImageThresholder* thresholder) { + delete thresholder_; + thresholder_ = thresholder; + ClearResults(); + } + + /** + * Get a copy of the internal thresholded image from Tesseract. + * Caller takes ownership of the Pix and must pixDestroy it. + * May be called any time after SetImage, or after TesseractRect. + */ + Pix* GetThresholdedImage(); + + /** + * Get the result of page layout analysis as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ + Boxa* GetRegions(Pixa** pixa); + + /** + * Get the textlines as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If raw_image is true, then extract from the original image instead of the + * thresholded image and pad by raw_padding pixels. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + * If paraids is not nullptr, the paragraph-id of each line within its block is + * also returned as an array of one element per line. delete [] after use. + */ + Boxa* GetTextlines(const bool raw_image, const int raw_padding, + Pixa** pixa, int** blockids, int** paraids); + /* + Helper method to extract from the thresholded image. (most common usage) + */ + Boxa* GetTextlines(Pixa** pixa, int** blockids) { + return GetTextlines(false, 0, pixa, blockids, nullptr); + } + + /** + * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa + * pair, in reading order. Enables downstream handling of non-rectangular + * regions. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + */ + Boxa* GetStrips(Pixa** pixa, int** blockids); + + /** + * Get the words as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ + Boxa* GetWords(Pixa** pixa); + + /** + * Gets the individual connected (text) components (created + * after pages segmentation step, but before recognition) + * as a leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * Note: the caller is responsible for calling boxaDestroy() + * on the returned Boxa array and pixaDestroy() on cc array. + */ + Boxa* GetConnectedComponents(Pixa** cc); + + /** + * Get the given level kind of components (block, textline, word etc.) as a + * leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each component is also returned + * as an array of one element per component. delete [] after use. + * If blockids is not nullptr, the paragraph-id of each component with its block + * is also returned as an array of one element per component. delete [] after + * use. + * If raw_image is true, then portions of the original image are extracted + * instead of the thresholded image and padded with raw_padding. + * If text_only is true, then only text components are returned. + */ + Boxa* GetComponentImages(const PageIteratorLevel level, + const bool text_only, const bool raw_image, + const int raw_padding, Pixa** pixa, int** blockids, int** paraids); - /* - Helper method to extract from the thresholded image. (most common usage) - */ - Boxa* GetTextlines(Pixa** pixa, int** blockids) { - return GetTextlines(false, 0, pixa, blockids, nullptr); - } - - /** - * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa - * pair, in reading order. Enables downstream handling of non-rectangular - * regions. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - */ - Boxa* GetStrips(Pixa** pixa, int** blockids); - - /** - * Get the words as a leptonica-style - * Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - */ - Boxa* GetWords(Pixa** pixa); - - /** - * Gets the individual connected (text) components (created - * after pages segmentation step, but before recognition) - * as a leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * Note: the caller is responsible for calling boxaDestroy() - * on the returned Boxa array and pixaDestroy() on cc array. - */ - Boxa* GetConnectedComponents(Pixa** cc); - - /** - * Get the given level kind of components (block, textline, word etc.) as a - * leptonica-style Boxa, Pixa pair, in reading order. - * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each component is also returned - * as an array of one element per component. delete [] after use. - * If blockids is not nullptr, the paragraph-id of each component with its block - * is also returned as an array of one element per component. delete [] after - * use. - * If raw_image is true, then portions of the original image are extracted - * instead of the thresholded image and padded with raw_padding. - * If text_only is true, then only text components are returned. - */ - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, const bool raw_image, - const int raw_padding, - Pixa** pixa, int** blockids, int** paraids); - // Helper function to get binary images with no padding (most common usage). - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, - Pixa** pixa, int** blockids) { - return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); - } - - /** - * Returns the scale factor of the thresholded image that would be returned by - * GetThresholdedImage() and the various GetX() methods that call - * GetComponentImages(). - * Returns 0 if no thresholder has been set. - */ - int GetThresholdedImageScaleFactor() const; - - /** - * Runs page layout analysis in the mode set by SetPageSegMode. - * May optionally be called prior to Recognize to get access to just - * the page layout results. Returns an iterator to the results. - * If merge_similar_words is true, words are combined where suitable for use - * with a line recognizer. Use if you want to use AnalyseLayout to find the - * textlines, and then want to process textline fragments with an external - * line recognizer. - * Returns nullptr on error or an empty page. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - PageIterator* AnalyseLayout(); - PageIterator* AnalyseLayout(bool merge_similar_words); - - /** - * Recognize the image from SetAndThresholdImage, generating Tesseract - * internal structures. Returns 0 on success. - * Optional. The Get*Text functions below will call Recognize if needed. - * After Recognize, the output is kept internally until the next SetImage. - */ - int Recognize(ETEXT_DESC* monitor); - - /** - * Methods to retrieve information after SetAndThresholdImage(), - * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) - */ + // Helper function to get binary images with no padding (most common usage). + Boxa* GetComponentImages(const PageIteratorLevel level, + const bool text_only, + Pixa** pixa, int** blockids) { + return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); + } + + /** + * Returns the scale factor of the thresholded image that would be returned by + * GetThresholdedImage() and the various GetX() methods that call + * GetComponentImages(). + * Returns 0 if no thresholder has been set. + */ + int GetThresholdedImageScaleFactor() const; + + /** + * Runs page layout analysis in the mode set by SetPageSegMode. + * May optionally be called prior to Recognize to get access to just + * the page layout results. Returns an iterator to the results. + * If merge_similar_words is true, words are combined where suitable for use + * with a line recognizer. Use if you want to use AnalyseLayout to find the + * textlines, and then want to process textline fragments with an external + * line recognizer. + * Returns nullptr on error or an empty page. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + PageIterator* AnalyseLayout(); + PageIterator* AnalyseLayout(bool merge_similar_words); + + /** + * Recognize the image from SetAndThresholdImage, generating Tesseract + * internal structures. Returns 0 on success. + * Optional. The Get*Text functions below will call Recognize if needed. + * After Recognize, the output is kept internally until the next SetImage. + */ + int Recognize(ETEXT_DESC* monitor); + + /** + * Methods to retrieve information after SetAndThresholdImage(), + * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) + */ + + #ifndef DISABLED_LEGACY_ENGINE + /** Variant on Recognize used for testing chopper. */ + int RecognizeForChopTest(ETEXT_DESC* monitor); + #endif + + /** + * Turns images into symbolic text. + * + * filename can point to a single image, a multi-page TIFF, + * or a plain text list of image filenames. + * + * retry_config is useful for debugging. If not nullptr, you can fall + * back to an alternate configuration if a page fails for some + * reason. + * + * timeout_millisec terminates processing if any single page + * takes too long. Set to 0 for unlimited time. + * + * renderer is responible for creating the output. For example, + * use the TessTextRenderer if you want plaintext output, or + * the TessPDFRender to produce searchable PDF. + * + * If tessedit_page_number is non-negative, will only process that + * single page. Works for multi-page tiff file, or filelist. + * + * Returns true if successful, false on error. + */ + bool ProcessPages(const char* filename, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer); + // Does the real work of ProcessPages. + bool ProcessPagesInternal(const char* filename, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer); + + /** + * Turn a single image into symbolic text. + * + * The pix is the image processed. filename and page_index are + * metadata used by side-effect processes, such as reading a box + * file or formatting as hOCR. + * + * See ProcessPages for desciptions of other parameters. + */ + bool ProcessPage(Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer); + + /** + * Get a reading-order iterator to the results of LayoutAnalysis and/or + * Recognize. The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + ResultIterator* GetIterator(); + + /** + * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + MutableIterator* GetMutableIterator(); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + */ + char* GetUTF8Text(); + + /** + * Make a HTML-formatted string with hOCR markup from the internal + * data structures. + * page_number is 0-based but will appear in the output as 1-based. + * monitor can be used to + * cancel the recognition + * receive progress callbacks + * Returned string must be freed with the delete [] operator. + */ + char* GetHOCRText(ETEXT_DESC* monitor, int page_number); + + /** + * Make a HTML-formatted string with hOCR markup from the internal + * data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ + char* GetHOCRText(int page_number); + + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(ETEXT_DESC* monitor, int page_number); + + + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(int page_number); + + /** + * Make a TSV-formatted string from the internal data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ + char* GetTSVText(int page_number); + + /** + * The recognized text is returned as a char* which is coded in the same + * format as a box file used in training. + * Constructs coordinates in the original image - not just the rectangle. + * page_number is a 0-based page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. + */ + char* GetBoxText(int page_number); + + /** + * The recognized text is returned as a char* which is coded + * as UNLV format Latin-1 with specific reject and suspect codes. + * Returned string must be freed with the delete [] operator. + */ + char* GetUNLVText(); + + /** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image in degrees + * (0, 90, 180, 270) + * orient_conf is the confidence (15.0 is reasonably confident) + * script_name is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + * Returns true on success and writes values to each parameter as an output + */ + bool DetectOrientationScript(int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ + char* GetOsdText(int page_number); + + /** Returns the (average) confidence value between 0 and 100. */ + int MeanTextConf(); + /** + * Returns all word confidences (between 0 and 100) in an array, terminated + * by -1. The calling function must delete [] after use. + * The number of confidences should correspond to the number of space- + * delimited words in GetUTF8Text. + */ + int* AllWordConfidences(); #ifndef DISABLED_LEGACY_ENGINE - /** Variant on Recognize used for testing chopper. */ - int RecognizeForChopTest(ETEXT_DESC* monitor); -#endif - - /** - * Turns images into symbolic text. - * - * filename can point to a single image, a multi-page TIFF, - * or a plain text list of image filenames. - * - * retry_config is useful for debugging. If not nullptr, you can fall - * back to an alternate configuration if a page fails for some - * reason. - * - * timeout_millisec terminates processing if any single page - * takes too long. Set to 0 for unlimited time. - * - * renderer is responible for creating the output. For example, - * use the TessTextRenderer if you want plaintext output, or - * the TessPDFRender to produce searchable PDF. - * - * If tessedit_page_number is non-negative, will only process that - * single page. Works for multi-page tiff file, or filelist. - * - * Returns true if successful, false on error. - */ - bool ProcessPages(const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); - // Does the real work of ProcessPages. - bool ProcessPagesInternal(const char* filename, const char* retry_config, - int timeout_millisec, TessResultRenderer* renderer); - - /** - * Turn a single image into symbolic text. - * - * The pix is the image processed. filename and page_index are - * metadata used by side-effect processes, such as reading a box - * file or formatting as hOCR. - * - * See ProcessPages for desciptions of other parameters. - */ - bool ProcessPage(Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer); - - /** - * Get a reading-order iterator to the results of LayoutAnalysis and/or - * Recognize. The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - ResultIterator* GetIterator(); - - /** - * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. - * The returned iterator must be deleted after use. - * WARNING! This class points to data held within the TessBaseAPI class, and - * therefore can only be used while the TessBaseAPI class still exists and - * has not been subjected to a call of Init, SetImage, Recognize, Clear, End - * DetectOS, or anything else that changes the internal PAGE_RES. - */ - MutableIterator* GetMutableIterator(); - - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - */ - char* GetUTF8Text(); - - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * monitor can be used to - * cancel the recognition - * receive progress callbacks - * Returned string must be freed with the delete [] operator. - */ - char* GetHOCRText(ETEXT_DESC* monitor, int page_number); - - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Returned string must be freed with the delete [] operator. - */ - char* GetHOCRText(int page_number); - - /** - * Make an XML-formatted string with Alto markup from the internal - * data structures. - */ - char* GetAltoText(ETEXT_DESC* monitor, int page_number); - - - /** - * Make an XML-formatted string with Alto markup from the internal - * data structures. - */ - char* GetAltoText(int page_number); - - /** - * Make a TSV-formatted string from the internal data structures. - * page_number is 0-based but will appear in the output as 1-based. - * Returned string must be freed with the delete [] operator. - */ - char* GetTSVText(int page_number); - - /** - * The recognized text is returned as a char* which is coded in the same - * format as a box file used in training. - * Constructs coordinates in the original image - not just the rectangle. - * page_number is a 0-based page index that will appear in the box file. - * Returned string must be freed with the delete [] operator. - */ - char* GetBoxText(int page_number); - - /** - * The recognized text is returned as a char* which is coded - * as UNLV format Latin-1 with specific reject and suspect codes. - * Returned string must be freed with the delete [] operator. - */ - char* GetUNLVText(); - - /** - * Detect the orientation of the input image and apparent script (alphabet). - * orient_deg is the detected clockwise rotation of the input image in degrees - * (0, 90, 180, 270) - * orient_conf is the confidence (15.0 is reasonably confident) - * script_name is an ASCII string, the name of the script, e.g. "Latin" - * script_conf is confidence level in the script - * Returns true on success and writes values to each parameter as an output - */ - bool DetectOrientationScript(int* orient_deg, float* orient_conf, - const char** script_name, float* script_conf); - - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ - char* GetOsdText(int page_number); - - /** Returns the (average) confidence value between 0 and 100. */ - int MeanTextConf(); - /** - * Returns all word confidences (between 0 and 100) in an array, terminated - * by -1. The calling function must delete [] after use. - * The number of confidences should correspond to the number of space- - * delimited words in GetUTF8Text. - */ - int* AllWordConfidences(); - -#ifndef DISABLED_LEGACY_ENGINE - /** - * Applies the given word to the adaptive classifier if possible. - * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can - * tell the boundaries of the graphemes. - * Assumes that SetImage/SetRectangle have been used to set the image - * to the given word. The mode arg should be PSM_SINGLE_WORD or - * PSM_CIRCLE_WORD, as that will be used to control layout analysis. - * The currently set PageSegMode is preserved. - * Returns false if adaption was not possible for some reason. - */ - bool AdaptToWordStr(PageSegMode mode, const char* wordstr); + /** + * Applies the given word to the adaptive classifier if possible. + * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can + * tell the boundaries of the graphemes. + * Assumes that SetImage/SetRectangle have been used to set the image + * to the given word. The mode arg should be PSM_SINGLE_WORD or + * PSM_CIRCLE_WORD, as that will be used to control layout analysis. + * The currently set PageSegMode is preserved. + * Returns false if adaption was not possible for some reason. + */ + bool AdaptToWordStr(PageSegMode mode, const char* wordstr); #endif // ndef DISABLED_LEGACY_ENGINE - /** - * Free up recognition results and any stored image data, without actually - * freeing any recognition data that would be time-consuming to reload. - * Afterwards, you must call SetImage or TesseractRect before doing - * any Recognize or Get* operation. - */ - void Clear(); - - /** - * Close down tesseract and free up all memory. End() is equivalent to - * destructing and reconstructing your TessBaseAPI. - * Once End() has been used, none of the other API functions may be used - * other than Init and anything declared above it in the class definition. - */ - void End(); - - /** - * Clear any library-level memory caches. - * There are a variety of expensive-to-load constant data structures (mostly - * language dictionaries) that are cached globally -- surviving the Init() - * and End() of individual TessBaseAPI's. This function allows the clearing - * of these caches. - **/ - static void ClearPersistentCache(); - - /** - * Check whether a word is valid according to Tesseract's language model - * @return 0 if the word is invalid, non-zero if valid. - * @warning temporary! This function will be removed from here and placed - * in a separate API at some future time. - */ - int IsValidWord(const char *word); - // Returns true if utf8_character is defined in the UniCharset. - bool IsValidCharacter(const char *utf8_character); - - - bool GetTextDirection(int* out_offset, float* out_slope); - - /** Sets Dict::letter_is_okay_ function to point to the given function. */ - void SetDictFunc(DictFunc f); - - /** Sets Dict::probability_in_context_ function to point to the given - * function. - */ - void SetProbabilityInContextFunc(ProbabilityInContextFunc f); - - /** - * Estimates the Orientation And Script of the image. - * @return true if the image was processed successfully. - */ - bool DetectOS(OSResults*); - - /** - * Return text orientation of each block as determined by an earlier run - * of layout analysis. - */ - void GetBlockTextOrientations(int** block_orientation, - bool** vertical_writing); - - -#ifndef DISABLED_LEGACY_ENGINE - - /** Sets Wordrec::fill_lattice_ function to point to the given function. */ - void SetFillLatticeFunc(FillLatticeFunc f); - - /** Find lines from the image making the BLOCK_LIST. */ - BLOCK_LIST* FindLinesCreateBlockList(); - - /** - * Delete a block list. - * This is to keep BLOCK_LIST pointer opaque - * and let go of including the other headers. - */ - static void DeleteBlockList(BLOCK_LIST* block_list); - - /** Returns a ROW object created from the input row specification. */ - static ROW *MakeTessOCRRow(float baseline, float xheight, - float descender, float ascender); - - /** Returns a TBLOB corresponding to the entire input image. */ - static TBLOB *MakeTBLOB(Pix *pix); - - /** - * This method baseline normalizes a TBLOB in-place. The input row is used - * for normalization. The denorm is an optional parameter in which the - * normalization-antidote is returned. - */ - static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); - - /** This method returns the features associated with the input image. */ - void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* feature_outline_index); - - /** - * This method returns the row to which a box of specified dimensions would - * belong. If no good match is found, it returns nullptr. - */ - static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); - - /** - * Method to run adaptive classifier on a blob. - * It returns at max num_max_matches results. - */ - void RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned); + /** + * Free up recognition results and any stored image data, without actually + * freeing any recognition data that would be time-consuming to reload. + * Afterwards, you must call SetImage or TesseractRect before doing + * any Recognize or Get* operation. + */ + void Clear(); + + /** + * Close down tesseract and free up all memory. End() is equivalent to + * destructing and reconstructing your TessBaseAPI. + * Once End() has been used, none of the other API functions may be used + * other than Init and anything declared above it in the class definition. + */ + void End(); + + /** + * Clear any library-level memory caches. + * There are a variety of expensive-to-load constant data structures (mostly + * language dictionaries) that are cached globally -- surviving the Init() + * and End() of individual TessBaseAPI's. This function allows the clearing + * of these caches. + **/ + static void ClearPersistentCache(); + + /** + * Check whether a word is valid according to Tesseract's language model + * @return 0 if the word is invalid, non-zero if valid. + * @warning temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ + int IsValidWord(const char *word); + // Returns true if utf8_character is defined in the UniCharset. + bool IsValidCharacter(const char *utf8_character); + + + bool GetTextDirection(int* out_offset, float* out_slope); + + /** Sets Dict::letter_is_okay_ function to point to the given function. */ + void SetDictFunc(DictFunc f); + + /** Sets Dict::probability_in_context_ function to point to the given + * function. + */ + void SetProbabilityInContextFunc(ProbabilityInContextFunc f); + + /** + * Estimates the Orientation And Script of the image. + * @return true if the image was processed successfully. + */ + bool DetectOS(OSResults*); + + /** + * Return text orientation of each block as determined by an earlier run + * of layout analysis. + */ + void GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing); + + + #ifndef DISABLED_LEGACY_ENGINE + + /** Sets Wordrec::fill_lattice_ function to point to the given function. */ + void SetFillLatticeFunc(FillLatticeFunc f); + + /** Find lines from the image making the BLOCK_LIST. */ + BLOCK_LIST* FindLinesCreateBlockList(); + + /** + * Delete a block list. + * This is to keep BLOCK_LIST pointer opaque + * and let go of including the other headers. + */ + static void DeleteBlockList(BLOCK_LIST* block_list); + + /** Returns a ROW object created from the input row specification. */ + static ROW *MakeTessOCRRow(float baseline, float xheight, + float descender, float ascender); + + /** Returns a TBLOB corresponding to the entire input image. */ + static TBLOB *MakeTBLOB(Pix *pix); + + /** + * This method baseline normalizes a TBLOB in-place. The input row is used + * for normalization. The denorm is an optional parameter in which the + * normalization-antidote is returned. + */ + static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); + + /** This method returns the features associated with the input image. */ + void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* feature_outline_index); + + /** + * This method returns the row to which a box of specified dimensions would + * belong. If no good match is found, it returns nullptr. + */ + static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom); + + /** + * Method to run adaptive classifier on a blob. + * It returns at max num_max_matches results. + */ + void RunAdaptiveClassifier(TBLOB* blob, + int num_max_matches, + int* unichar_ids, + float* ratings, + int* num_matches_returned); #endif // ndef DISABLED_LEGACY_ENGINE - /** This method returns the string form of the specified unichar. */ - const char* GetUnichar(int unichar_id); - - /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ - const Dawg *GetDawg(int i) const; - - /** Return the number of dawgs loaded into tesseract_ object. */ - int NumDawgs() const; - - Tesseract* tesseract() const { return tesseract_; } - - OcrEngineMode oem() const { return last_oem_requested_; } - - void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } - - void set_min_orientation_margin(double margin); - /* @} */ - - protected: - - /** Common code for setting the image. Returns true if Init has been called. */ - TESS_LOCAL bool InternalSetImage(); - - /** - * Run the thresholder to make the thresholded image. If pix is not nullptr, - * the source is thresholded to pix instead of the internal IMAGE. - */ - TESS_LOCAL virtual bool Threshold(Pix** pix); - - /** - * Find lines from the image making the BLOCK_LIST. - * @return 0 on success. - */ - TESS_LOCAL int FindLines(); - - /** Delete the pageres and block list ready for a new page. */ - void ClearResults(); - - /** - * Return an LTR Result Iterator -- used only for training, as we really want - * to ignore all BiDi smarts at that point. - * delete once you're done with it. - */ - TESS_LOCAL LTRResultIterator* GetLTRIterator(); - - /** - * Return the length of the output text string, as UTF8, assuming - * one newline per line and one per block, with a terminator, - * and assuming a single character reject marker for each rejected character. - * Also return the number of recognized blobs in blob_count. - */ - TESS_LOCAL int TextLength(int* blob_count); - - //// paragraphs.cpp //////////////////////////////////////////////////// - TESS_LOCAL void DetectParagraphs(bool after_text_recognition); - -#ifndef DISABLED_LEGACY_ENGINE - - /** @defgroup ocropusAddOns ocropus add-ons */ - /* @{ */ - - /** - * Adapt to recognize the current image as the given character. - * The image must be preloaded and be just an image of a single character. - */ - TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender); - - /** Recognize text doing one pass only, using settings for a given pass. */ - TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); - - TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, - PAGE_RES* pass1_result); - - /** - * Extract the OCR results, costs (penalty points for uncertainty), - * and the bounding boxes of the characters. - */ - TESS_LOCAL static int TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res); - - TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } - /* @} */ + /** This method returns the string form of the specified unichar. */ + const char* GetUnichar(int unichar_id); + + /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ + const Dawg *GetDawg(int i) const; + + /** Return the number of dawgs loaded into tesseract_ object. */ + int NumDawgs() const; + + Tesseract* tesseract() const { return tesseract_; } + + OcrEngineMode oem() const { return last_oem_requested_; } + + void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } + + void set_min_orientation_margin(double margin); + /* @} */ + + protected: + + /** Common code for setting the image. Returns true if Init has been called. */ + TESS_LOCAL bool InternalSetImage(); + + /** + * Run the thresholder to make the thresholded image. If pix is not nullptr, + * the source is thresholded to pix instead of the internal IMAGE. + */ + TESS_LOCAL virtual bool Threshold(Pix** pix); + + /** + * Find lines from the image making the BLOCK_LIST. + * @return 0 on success. + */ + TESS_LOCAL int FindLines(); + + /** Delete the pageres and block list ready for a new page. */ + void ClearResults(); + + /** + * Return an LTR Result Iterator -- used only for training, as we really want + * to ignore all BiDi smarts at that point. + * delete once you're done with it. + */ + TESS_LOCAL LTRResultIterator* GetLTRIterator(); + + /** + * Return the length of the output text string, as UTF8, assuming + * one newline per line and one per block, with a terminator, + * and assuming a single character reject marker for each rejected character. + * Also return the number of recognized blobs in blob_count. + */ + TESS_LOCAL int TextLength(int* blob_count); + + //// paragraphs.cpp //////////////////////////////////////////////////// + TESS_LOCAL void DetectParagraphs(bool after_text_recognition); + + #ifndef DISABLED_LEGACY_ENGINE + + /** @defgroup ocropusAddOns ocropus add-ons */ + /* @{ */ + + /** + * Adapt to recognize the current image as the given character. + * The image must be preloaded and be just an image of a single character. + */ + TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, + int length, + float baseline, + float xheight, + float descender, + float ascender); + + /** Recognize text doing one pass only, using settings for a given pass. */ + TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); + + TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, + PAGE_RES* pass1_result); + + /** + * Extract the OCR results, costs (penalty points for uncertainty), + * and the bounding boxes of the characters. + */ + TESS_LOCAL static int TesseractExtractResult(char** text, + int** lengths, + float** costs, + int** x0, + int** y0, + int** x1, + int** y1, + PAGE_RES* page_res); + + TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } + /* @} */ #endif // ndef DISABLED_LEGACY_ENGINE - protected: - Tesseract* tesseract_; ///< The underlying data object. - Tesseract* osd_tesseract_; ///< For orientation & script detection. - EquationDetect* equ_detect_; ///* paragraph_models_; - BLOCK_LIST* block_list_; ///< The page layout. - PAGE_RES* page_res_; ///< The page-level data. - STRING* input_file_; ///< Name used by training code. - STRING* output_file_; ///< Name used by debug code. - STRING* datapath_; ///< Current location of tessdata. - STRING* language_; ///< Last initialized language. - OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. - TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES - - /** - * @defgroup ThresholderParams Thresholder Parameters - * Parameters saved from the Thresholder. Needed to rebuild coordinates. - */ - /* @{ */ - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; - int image_width_; - int image_height_; - /* @} */ - - private: - // A list of image filenames gets special consideration - bool ProcessPagesFileList(FILE *fp, - STRING *buf, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number); - // TIFF supports multipage so gets special consideration. - bool ProcessPagesMultipageTiff(const unsigned char *data, - size_t size, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer, - int tessedit_page_number); - // There's currently no way to pass a document title from the - // Tesseract command line, and we have multiple places that choose - // to set the title to an empty string. Using a single named - // variable will hopefully reduce confusion if the situation changes - // in the future. - const char *unknown_title_ = ""; - }; // class TessBaseAPI. + protected: + Tesseract* tesseract_; ///< The underlying data object. + Tesseract* osd_tesseract_; ///< For orientation & script detection. + EquationDetect* equ_detect_; ///* paragraph_models_; + BLOCK_LIST* block_list_; ///< The page layout. + PAGE_RES* page_res_; ///< The page-level data. + STRING* input_file_; ///< Name used by training code. + STRING* output_file_; ///< Name used by debug code. + STRING* datapath_; ///< Current location of tessdata. + STRING* language_; ///< Last initialized language. + OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. + bool recognition_done_; ///< page_res_ contains recognition data. + TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES + + /** + * @defgroup ThresholderParams Thresholder Parameters + * Parameters saved from the Thresholder. Needed to rebuild coordinates. + */ + /* @{ */ + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; + int image_width_; + int image_height_; + /* @} */ + + private: + // A list of image filenames gets special consideration + bool ProcessPagesFileList(FILE *fp, + STRING *buf, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number); + // TIFF supports multipage so gets special consideration. + bool ProcessPagesMultipageTiff(const unsigned char *data, + size_t size, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number); + // There's currently no way to pass a document title from the + // Tesseract command line, and we have multiple places that choose + // to set the title to an empty string. Using a single named + // variable will hopefully reduce confusion if the situation changes + // in the future. + const char *unknown_title_ = ""; +}; // class TessBaseAPI. /** Escape a char string - remove &<>"' with HTML codes. */ - STRING HOcrEscape(const char* text); +STRING HOcrEscape(const char* text); } // namespace tesseract. #endif // TESSERACT_API_BASEAPI_H_ diff --git a/src/api/capi.cpp b/src/api/capi.cpp index 1bbf621c25..333bbcd3fe 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -244,9 +244,9 @@ TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, cons } TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params) + TessOcrEngineMode mode, char** configs, int configs_size, + char** vars_vec, char** vars_values, size_t vars_vec_size, + BOOL set_only_non_debug_params) { GenericVector varNames; GenericVector varValues; @@ -337,8 +337,8 @@ TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* } TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height) + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height) { return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height); } @@ -351,7 +351,7 @@ TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) #endif TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, - int bytes_per_pixel, int bytes_per_line) + int bytes_per_pixel, int bytes_per_line) { handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); } @@ -392,7 +392,7 @@ TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, str } TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids) + struct Pixa** pixa, int** blockids, int** paraids) { return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids); } @@ -550,7 +550,7 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand #ifndef DISABLED_LEGACY_ENGINE TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, - int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) + int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) { bool success; success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); @@ -558,7 +558,7 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, } TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex) + int* num_features, int* FeatureOutlineIndex) { handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex); } @@ -569,7 +569,7 @@ TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, } TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches, - int* unichar_ids, float* ratings, int* num_matches_returned) + int* unichar_ids, float* ratings, int* num_matches_returned) { handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned); } @@ -661,13 +661,13 @@ TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* } TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element) + TessPageIteratorLevel element) { return handle->IsAtFinalElement(level, element) ? TRUE : FALSE; } TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, - int* left, int* top, int* right, int* bottom) + int* left, int* top, int* right, int* bottom) { return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE; } @@ -689,14 +689,14 @@ TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* } TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) + int* x1, int* y1, int* x2, int* y2) { return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE; } TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation, - TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, - float* deskew_angle) + TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, + float* deskew_angle) { handle->Orientation(orientation, writing_direction, textline_order, deskew_angle); } @@ -759,8 +759,8 @@ TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const T } TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id) + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, + BOOL* is_smallcaps, int* pointsize, int* font_id) { bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps; const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif, diff --git a/src/api/capi.h b/src/api/capi.h index 8f999e8536..5101fdcf77 100644 --- a/src/api/capi.h +++ b/src/api/capi.h @@ -118,7 +118,7 @@ struct Pixa; /* General free functions */ TESS_API const char* -TESS_CALL TessVersion(); + TESS_CALL TessVersion(); TESS_API void TESS_CALL TessDeleteText(char* text); TESS_API void TESS_CALL TessDeleteTextArray(char** arr); TESS_API void TESS_CALL TessDeleteIntArray(int* arr); @@ -136,7 +136,7 @@ TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* out TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer); TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next); TESS_API TessResultRenderer* -TESS_CALL TessResultRendererNext(TessResultRenderer* renderer); + TESS_CALL TessResultRendererNext(TessResultRenderer* renderer); TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title); TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api); TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer); @@ -148,7 +148,7 @@ TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer); /* Base API */ TESS_API TessBaseAPI* -TESS_CALL TessBaseAPICreate(); + TESS_CALL TessBaseAPICreate(); TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device); @@ -171,7 +171,7 @@ TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, co TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value); TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value); TESS_API const char* -TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); + TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp); TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename); @@ -193,16 +193,16 @@ TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datap TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language); TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode, - char** configs, int configs_size, - char** vars_vec, char** vars_values, size_t vars_vec_size, - BOOL set_only_non_debug_params); + char** configs, int configs_size, + char** vars_vec, char** vars_values, size_t vars_vec_size, + BOOL set_only_non_debug_params); TESS_API const char* -TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); TESS_API char** -TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); TESS_API char** -TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language); TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); @@ -212,7 +212,7 @@ TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, con TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode); TESS_API TessPageSegMode -TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, @@ -233,32 +233,32 @@ TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImag #endif TESS_API struct Pix* -TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa); + TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); + TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); + TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); + TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa); + TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); + TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - struct Pixa** pixa, int** blockids); + TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, + struct Pixa** pixa, int** blockids); TESS_API struct Boxa* -TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, - const BOOL raw_image, const int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); + TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only, + const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); TESS_API TessPageIterator* -TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); + TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); @@ -269,12 +269,12 @@ TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ET TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename, - const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); + const char* retry_config, int timeout_millisec, TessResultRenderer* renderer); TESS_API TessResultIterator* -TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); TESS_API TessMutableIterator* -TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); + TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); @@ -314,19 +314,19 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, #endif // def TESS_CAPI_INCLUDE_BASEAPI TESS_API const char* -TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); + TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin); #ifdef TESS_CAPI_INCLUDE_BASEAPI TESS_API const TessDawg* -TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); + TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); TESS_API TessOcrEngineMode -TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); + TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb); @@ -339,7 +339,7 @@ TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, i TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); TESS_API TessPageIterator* -TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); + TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); @@ -354,14 +354,14 @@ TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* han int* left, int* top, int* right, int* bottom); TESS_API TessPolyBlockType -TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); + TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); TESS_API struct Pix* -TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); + TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); TESS_API struct Pix* -TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top); + TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + struct Pix* original_image, int* left, int* top); TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, int* x1, int* y1, int* x2, int* y2); @@ -377,23 +377,23 @@ TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); TESS_API TessResultIterator* -TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); + TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); TESS_API TessPageIterator* -TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); + TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); TESS_API const TessPageIterator* -TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); + TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); TESS_API TessChoiceIterator* -TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); + TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level); TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level); TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level); TESS_API const char* -TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); + TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); TESS_API const char* -TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, - BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, - BOOL* is_smallcaps, int* pointsize, int* font_id); + TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, + BOOL* is_smallcaps, int* pointsize, int* font_id); TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); @@ -434,12 +434,12 @@ TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, T TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender); TESS_API TBLOB* -TESS_CALL TessMakeTBLOB(Pix* pix); + TESS_CALL TessMakeTBLOB(Pix* pix); TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode); TESS_API BLOCK_LIST* -TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); + TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp index c4c24e032f..af31be8e59 100644 --- a/src/api/renderer.cpp +++ b/src/api/renderer.cpp @@ -30,250 +30,250 @@ namespace tesseract { /********************************************************************** * Base Renderer interface implementation **********************************************************************/ - TessResultRenderer::TessResultRenderer(const char *outputbase, - const char* extension) - : file_extension_(extension), - title_(""), imagenum_(-1), - fout_(stdout), - next_(nullptr), - happy_(true) { - if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { - STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); - fout_ = fopen(outfile.string(), "wb"); - if (fout_ == nullptr) { - happy_ = false; - } - } +TessResultRenderer::TessResultRenderer(const char *outputbase, + const char* extension) + : file_extension_(extension), + title_(""), imagenum_(-1), + fout_(stdout), + next_(nullptr), + happy_(true) { + if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { + STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_); + fout_ = fopen(outfile.string(), "wb"); + if (fout_ == nullptr) { + happy_ = false; } - - TessResultRenderer::~TessResultRenderer() { - if (fout_ != nullptr) { - if (fout_ != stdout) - fclose(fout_); - else - clearerr(fout_); - } - delete next_; - } - - void TessResultRenderer::insert(TessResultRenderer* next) { - if (next == nullptr) return; - - TessResultRenderer* remainder = next_; - next_ = next; - if (remainder) { - while (next->next_ != nullptr) { - next = next->next_; - } - next->next_ = remainder; - } - } - - bool TessResultRenderer::BeginDocument(const char* title) { - if (!happy_) return false; - title_ = title; - imagenum_ = -1; - bool ok = BeginDocumentHandler(); - if (next_) { - ok = next_->BeginDocument(title) && ok; - } - return ok; - } - - bool TessResultRenderer::AddImage(TessBaseAPI* api) { - if (!happy_) return false; - ++imagenum_; - bool ok = AddImageHandler(api); - if (next_) { - ok = next_->AddImage(api) && ok; - } - return ok; - } - - bool TessResultRenderer::EndDocument() { - if (!happy_) return false; - bool ok = EndDocumentHandler(); - if (next_) { - ok = next_->EndDocument() && ok; - } - return ok; - } - - void TessResultRenderer::AppendString(const char* s) { - AppendData(s, strlen(s)); - } - - void TessResultRenderer::AppendData(const char* s, int len) { - if (!tesseract::Serialize(fout_, s, len)) happy_ = false; - } - - bool TessResultRenderer::BeginDocumentHandler() { - return happy_; - } - - bool TessResultRenderer::EndDocumentHandler() { - return happy_; + } +} + +TessResultRenderer::~TessResultRenderer() { + if (fout_ != nullptr) { + if (fout_ != stdout) + fclose(fout_); + else + clearerr(fout_); + } + delete next_; +} + +void TessResultRenderer::insert(TessResultRenderer* next) { + if (next == nullptr) return; + + TessResultRenderer* remainder = next_; + next_ = next; + if (remainder) { + while (next->next_ != nullptr) { + next = next->next_; } + next->next_ = remainder; + } +} + +bool TessResultRenderer::BeginDocument(const char* title) { + if (!happy_) return false; + title_ = title; + imagenum_ = -1; + bool ok = BeginDocumentHandler(); + if (next_) { + ok = next_->BeginDocument(title) && ok; + } + return ok; +} + +bool TessResultRenderer::AddImage(TessBaseAPI* api) { + if (!happy_) return false; + ++imagenum_; + bool ok = AddImageHandler(api); + if (next_) { + ok = next_->AddImage(api) && ok; + } + return ok; +} + +bool TessResultRenderer::EndDocument() { + if (!happy_) return false; + bool ok = EndDocumentHandler(); + if (next_) { + ok = next_->EndDocument() && ok; + } + return ok; +} + +void TessResultRenderer::AppendString(const char* s) { + AppendData(s, strlen(s)); +} + +void TessResultRenderer::AppendData(const char* s, int len) { + if (!tesseract::Serialize(fout_, s, len)) happy_ = false; +} + +bool TessResultRenderer::BeginDocumentHandler() { + return happy_; +} + +bool TessResultRenderer::EndDocumentHandler() { + return happy_; +} /********************************************************************** * UTF8 Text Renderer interface implementation **********************************************************************/ - TessTextRenderer::TessTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "txt") { - } +TessTextRenderer::TessTextRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "txt") { +} - bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr utf8(api->GetUTF8Text()); - if (utf8 == nullptr) { - return false; - } +bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr utf8(api->GetUTF8Text()); + if (utf8 == nullptr) { + return false; + } - AppendString(utf8.get()); + AppendString(utf8.get()); - const char* pageSeparator = api->GetStringVariable("page_separator"); - if (pageSeparator != nullptr && *pageSeparator != '\0') { - AppendString(pageSeparator); - } + const char* pageSeparator = api->GetStringVariable("page_separator"); + if (pageSeparator != nullptr && *pageSeparator != '\0') { + AppendString(pageSeparator); + } - return true; - } + return true; +} /********************************************************************** * HOcr Text Renderer interface implementation **********************************************************************/ - TessHOcrRenderer::TessHOcrRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "hocr") { - font_info_ = false; - } - - TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) - : TessResultRenderer(outputbase, "hocr") { - font_info_ = font_info; - } - - bool TessHOcrRenderer::BeginDocumentHandler() { - AppendString( - "\n" - "\n" - "\n \n "); - AppendString(title()); - AppendString( - "\n" - "\n" - " \n" - " \n" - "\n\n"); - - return true; - } - - bool TessHOcrRenderer::EndDocumentHandler() { - AppendString(" \n\n"); - - return true; - } - - bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr hocr(api->GetHOCRText(imagenum())); - if (hocr == nullptr) return false; - - AppendString(hocr.get()); - - return true; - } +TessHOcrRenderer::TessHOcrRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "hocr") { + font_info_ = false; +} + +TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) + : TessResultRenderer(outputbase, "hocr") { + font_info_ = font_info; +} + +bool TessHOcrRenderer::BeginDocumentHandler() { + AppendString( + "\n" + "\n" + "\n \n "); + AppendString(title()); + AppendString( + "\n" + "\n" + " \n" + " \n" + "\n\n"); + + return true; +} + +bool TessHOcrRenderer::EndDocumentHandler() { + AppendString(" \n\n"); + + return true; +} + +bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr hocr(api->GetHOCRText(imagenum())); + if (hocr == nullptr) return false; + + AppendString(hocr.get()); + + return true; +} /********************************************************************** * TSV Text Renderer interface implementation **********************************************************************/ - TessTsvRenderer::TessTsvRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "tsv") { - font_info_ = false; - } +TessTsvRenderer::TessTsvRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "tsv") { + font_info_ = false; +} - TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) - : TessResultRenderer(outputbase, "tsv") { - font_info_ = font_info; - } +TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) + : TessResultRenderer(outputbase, "tsv") { + font_info_ = font_info; +} - bool TessTsvRenderer::BeginDocumentHandler() { - // Output TSV column headings - AppendString( - "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" - "num\tleft\ttop\twidth\theight\tconf\ttext\n"); - return true; - } +bool TessTsvRenderer::BeginDocumentHandler() { + // Output TSV column headings + AppendString( + "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" + "num\tleft\ttop\twidth\theight\tconf\ttext\n"); + return true; +} - bool TessTsvRenderer::EndDocumentHandler() { return true; } +bool TessTsvRenderer::EndDocumentHandler() { return true; } - bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr tsv(api->GetTSVText(imagenum())); - if (tsv == nullptr) return false; +bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr tsv(api->GetTSVText(imagenum())); + if (tsv == nullptr) return false; - AppendString(tsv.get()); + AppendString(tsv.get()); - return true; - } + return true; +} /********************************************************************** * UNLV Text Renderer interface implementation **********************************************************************/ - TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "unlv") { - } +TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "unlv") { +} - bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr unlv(api->GetUNLVText()); - if (unlv == nullptr) return false; +bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr unlv(api->GetUNLVText()); + if (unlv == nullptr) return false; - AppendString(unlv.get()); + AppendString(unlv.get()); - return true; - } + return true; +} /********************************************************************** * BoxText Renderer interface implementation **********************************************************************/ - TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "box") { - } +TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "box") { +} - bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { - const std::unique_ptr text(api->GetBoxText(imagenum())); - if (text == nullptr) return false; +bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { + const std::unique_ptr text(api->GetBoxText(imagenum())); + if (text == nullptr) return false; - AppendString(text.get()); + AppendString(text.get()); - return true; - } + return true; +} #ifndef DISABLED_LEGACY_ENGINE /********************************************************************** * Osd Text Renderer interface implementation **********************************************************************/ - TessOsdRenderer::TessOsdRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "osd") {} +TessOsdRenderer::TessOsdRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "osd") {} - bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { - char* osd = api->GetOsdText(imagenum()); - if (osd == nullptr) return false; +bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { + char* osd = api->GetOsdText(imagenum()); + if (osd == nullptr) return false; - AppendString(osd); - delete[] osd; + AppendString(osd); + delete[] osd; - return true; - } + return true; +} #endif // ndef DISABLED_LEGACY_ENGINE diff --git a/src/api/renderer.h b/src/api/renderer.h index cb91f3e005..5c31e4b848 100644 --- a/src/api/renderer.h +++ b/src/api/renderer.h @@ -27,7 +27,7 @@ namespace tesseract { - class TessBaseAPI; +class TessBaseAPI; /** * Interface for rendering tesseract results into a document, such as text, @@ -42,129 +42,129 @@ namespace tesseract { * renderers can manage the associated state needed for the specific formats * in addition to the heuristics for producing it. */ - class TESS_API TessResultRenderer { - public: - virtual ~TessResultRenderer(); - - // Takes ownership of pointer so must be new'd instance. - // Renderers aren't ordered, but appends the sequences of next parameter - // and existing next(). The renderers should be unique across both lists. - void insert(TessResultRenderer* next); - - // Returns the next renderer or nullptr. - TessResultRenderer* next() { return next_; } - - /** - * Starts a new document with the given title. - * This clears the contents of the output data. - * Title should use UTF-8 encoding. - */ - bool BeginDocument(const char* title); - - /** - * Adds the recognized text from the source image to the current document. - * Invalid if BeginDocument not yet called. - * - * Note that this API is a bit weird but is designed to fit into the - * current TessBaseAPI implementation where the api has lots of state - * information that we might want to add in. - */ - bool AddImage(TessBaseAPI* api); - - /** - * Finishes the document and finalizes the output data - * Invalid if BeginDocument not yet called. - */ - bool EndDocument(); - - const char* file_extension() const { return file_extension_; } - const char* title() const { return title_.c_str(); } - - // Is everything fine? Otherwise something went wrong. - bool happy() { return happy_; } - - /** - * Returns the index of the last image given to AddImage - * (i.e. images are incremented whether the image succeeded or not) - * - * This is always defined. It means either the number of the - * current image, the last image ended, or in the completed document - * depending on when in the document lifecycle you are looking at it. - * Will return -1 if a document was never started. - */ - int imagenum() const { return imagenum_; } - - protected: - /** - * Called by concrete classes. - * - * outputbase is the name of the output file excluding - * extension. For example, "/path/to/chocolate-chip-cookie-recipe" - * - * extension indicates the file extension to be used for output - * files. For example "pdf" will produce a .pdf file, and "hocr" - * will produce .hocr files. - */ - TessResultRenderer(const char *outputbase, - const char* extension); - - // Hook for specialized handling in BeginDocument() - virtual bool BeginDocumentHandler(); - - // This must be overridden to render the OCR'd results - virtual bool AddImageHandler(TessBaseAPI* api) = 0; - - // Hook for specialized handling in EndDocument() - virtual bool EndDocumentHandler(); - - // Renderers can call this to append '\0' terminated strings into - // the output string returned by GetOutput. - // This method will grow the output buffer if needed. - void AppendString(const char* s); - - // Renderers can call this to append binary byte sequences into - // the output string returned by GetOutput. Note that s is not necessarily - // '\0' terminated (and can contain '\0' within it). - // This method will grow the output buffer if needed. - void AppendData(const char* s, int len); - - private: - const char* file_extension_; // standard extension for generated output - STRING title_; // title of document being renderered - int imagenum_; // index of last image added - - FILE* fout_; // output file pointer - TessResultRenderer* next_; // Can link multiple renderers together - bool happy_; // I get grumpy when the disk fills up, etc. - }; +class TESS_API TessResultRenderer { + public: + virtual ~TessResultRenderer(); + + // Takes ownership of pointer so must be new'd instance. + // Renderers aren't ordered, but appends the sequences of next parameter + // and existing next(). The renderers should be unique across both lists. + void insert(TessResultRenderer* next); + + // Returns the next renderer or nullptr. + TessResultRenderer* next() { return next_; } + + /** + * Starts a new document with the given title. + * This clears the contents of the output data. + * Title should use UTF-8 encoding. + */ + bool BeginDocument(const char* title); + + /** + * Adds the recognized text from the source image to the current document. + * Invalid if BeginDocument not yet called. + * + * Note that this API is a bit weird but is designed to fit into the + * current TessBaseAPI implementation where the api has lots of state + * information that we might want to add in. + */ + bool AddImage(TessBaseAPI* api); + + /** + * Finishes the document and finalizes the output data + * Invalid if BeginDocument not yet called. + */ + bool EndDocument(); + + const char* file_extension() const { return file_extension_; } + const char* title() const { return title_.c_str(); } + + // Is everything fine? Otherwise something went wrong. + bool happy() { return happy_; } + + /** + * Returns the index of the last image given to AddImage + * (i.e. images are incremented whether the image succeeded or not) + * + * This is always defined. It means either the number of the + * current image, the last image ended, or in the completed document + * depending on when in the document lifecycle you are looking at it. + * Will return -1 if a document was never started. + */ + int imagenum() const { return imagenum_; } + + protected: + /** + * Called by concrete classes. + * + * outputbase is the name of the output file excluding + * extension. For example, "/path/to/chocolate-chip-cookie-recipe" + * + * extension indicates the file extension to be used for output + * files. For example "pdf" will produce a .pdf file, and "hocr" + * will produce .hocr files. + */ + TessResultRenderer(const char *outputbase, + const char* extension); + + // Hook for specialized handling in BeginDocument() + virtual bool BeginDocumentHandler(); + + // This must be overridden to render the OCR'd results + virtual bool AddImageHandler(TessBaseAPI* api) = 0; + + // Hook for specialized handling in EndDocument() + virtual bool EndDocumentHandler(); + + // Renderers can call this to append '\0' terminated strings into + // the output string returned by GetOutput. + // This method will grow the output buffer if needed. + void AppendString(const char* s); + + // Renderers can call this to append binary byte sequences into + // the output string returned by GetOutput. Note that s is not necessarily + // '\0' terminated (and can contain '\0' within it). + // This method will grow the output buffer if needed. + void AppendData(const char* s, int len); + + private: + const char* file_extension_; // standard extension for generated output + STRING title_; // title of document being renderered + int imagenum_; // index of last image added + + FILE* fout_; // output file pointer + TessResultRenderer* next_; // Can link multiple renderers together + bool happy_; // I get grumpy when the disk fills up, etc. +}; /** * Renders tesseract output into a plain UTF-8 text string */ - class TESS_API TessTextRenderer : public TessResultRenderer { - public: - explicit TessTextRenderer(const char *outputbase); +class TESS_API TessTextRenderer : public TessResultRenderer { + public: + explicit TessTextRenderer(const char *outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); - }; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); +}; /** * Renders tesseract output into an hocr text string */ - class TESS_API TessHOcrRenderer : public TessResultRenderer { - public: - explicit TessHOcrRenderer(const char *outputbase, bool font_info); - explicit TessHOcrRenderer(const char *outputbase); +class TESS_API TessHOcrRenderer : public TessResultRenderer { + public: + explicit TessHOcrRenderer(const char *outputbase, bool font_info); + explicit TessHOcrRenderer(const char *outputbase); - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); - private: - bool font_info_; // whether to print font information - }; + private: + bool font_info_; // whether to print font information +}; /** * Renders tesseract output into an alto text string @@ -183,91 +183,91 @@ namespace tesseract { /** * Renders Tesseract output into a TSV string */ - class TESS_API TessTsvRenderer : public TessResultRenderer { - public: - explicit TessTsvRenderer(const char* outputbase, bool font_info); - explicit TessTsvRenderer(const char* outputbase); +class TESS_API TessTsvRenderer : public TessResultRenderer { + public: + explicit TessTsvRenderer(const char* outputbase, bool font_info); + explicit TessTsvRenderer(const char* outputbase); - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); - private: - bool font_info_; // whether to print font information - }; + private: + bool font_info_; // whether to print font information +}; /** * Renders tesseract output into searchable PDF */ - class TESS_API TessPDFRenderer : public TessResultRenderer { - public: - // datadir is the location of the TESSDATA. We need it because - // we load a custom PDF font from this location. - TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false); - - protected: - virtual bool BeginDocumentHandler(); - virtual bool AddImageHandler(TessBaseAPI* api); - virtual bool EndDocumentHandler(); - - private: - // We don't want to have every image in memory at once, - // so we store some metadata as we go along producing - // PDFs one page at a time. At the end, that metadata is - // used to make everything that isn't easily handled in a - // streaming fashion. - long int obj_; // counter for PDF objects - GenericVector offsets_; // offset of every PDF object in bytes - GenericVector pages_; // object number for every /Page object - std::string datadir_; // where to find the custom font - bool textonly_; // skip images if set - // Bookkeeping only. DIY = Do It Yourself. - void AppendPDFObjectDIY(size_t objectsize); - // Bookkeeping + emit data. - void AppendPDFObject(const char *data); - // Create the /Contents object for an entire page. - char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); - // Turn an image into a PDF object. Only transcode if we have to. - static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum, - char** pdf_object, long int* pdf_object_size, const int jpg_quality); - }; +class TESS_API TessPDFRenderer : public TessResultRenderer { + public: + // datadir is the location of the TESSDATA. We need it because + // we load a custom PDF font from this location. + TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false); + + protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); + + private: + // We don't want to have every image in memory at once, + // so we store some metadata as we go along producing + // PDFs one page at a time. At the end, that metadata is + // used to make everything that isn't easily handled in a + // streaming fashion. + long int obj_; // counter for PDF objects + GenericVector offsets_; // offset of every PDF object in bytes + GenericVector pages_; // object number for every /Page object + std::string datadir_; // where to find the custom font + bool textonly_; // skip images if set + // Bookkeeping only. DIY = Do It Yourself. + void AppendPDFObjectDIY(size_t objectsize); + // Bookkeeping + emit data. + void AppendPDFObject(const char *data); + // Create the /Contents object for an entire page. + char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); + // Turn an image into a PDF object. Only transcode if we have to. + static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum, + char** pdf_object, long int* pdf_object_size, const int jpg_quality); +}; /** * Renders tesseract output into a plain UTF-8 text string */ - class TESS_API TessUnlvRenderer : public TessResultRenderer { - public: - explicit TessUnlvRenderer(const char *outputbase); +class TESS_API TessUnlvRenderer : public TessResultRenderer { + public: + explicit TessUnlvRenderer(const char *outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); - }; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); +}; /** * Renders tesseract output into a plain UTF-8 text string */ - class TESS_API TessBoxTextRenderer : public TessResultRenderer { - public: - explicit TessBoxTextRenderer(const char *outputbase); +class TESS_API TessBoxTextRenderer : public TessResultRenderer { + public: + explicit TessBoxTextRenderer(const char *outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); - }; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); +}; #ifndef DISABLED_LEGACY_ENGINE /** * Renders tesseract output into an osd text string */ - class TESS_API TessOsdRenderer : public TessResultRenderer { - public: - explicit TessOsdRenderer(const char* outputbase); +class TESS_API TessOsdRenderer : public TessResultRenderer { + public: + explicit TessOsdRenderer(const char* outputbase); - protected: - virtual bool AddImageHandler(TessBaseAPI* api); - }; + protected: + virtual bool AddImageHandler(TessBaseAPI* api); +}; #endif // ndef DISABLED_LEGACY_ENGINE diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp index 8ec5e7b75f..816dfe4faf 100644 --- a/src/api/tesseractmain.cpp +++ b/src/api/tesseractmain.cpp @@ -66,20 +66,20 @@ static void Win32WarningHandler(const char* module, const char* fmt, #endif // _WIN32 static void PrintVersionInfo() { - char* versionStrP; + char* versionStrP; - printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); + printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); - versionStrP = getLeptonicaVersion(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getLeptonicaVersion(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); - versionStrP = getImagelibVersions(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getImagelibVersions(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); #ifdef USE_OPENCL - cl_platform_id platform[4]; + cl_platform_id platform[4]; cl_uint num_platforms; printf(" OpenCL info:\n"); @@ -118,155 +118,155 @@ static void PrintVersionInfo() { } static void PrintHelpForPSM() { - const char* msg = - "Page segmentation modes:\n" - " 0 Orientation and script detection (OSD) only.\n" - " 1 Automatic page segmentation with OSD.\n" - " 2 Automatic page segmentation, but no OSD, or OCR.\n" - " 3 Fully automatic page segmentation, but no OSD. (Default)\n" - " 4 Assume a single column of text of variable sizes.\n" - " 5 Assume a single uniform block of vertically aligned text.\n" - " 6 Assume a single uniform block of text.\n" - " 7 Treat the image as a single text line.\n" - " 8 Treat the image as a single word.\n" - " 9 Treat the image as a single word in a circle.\n" - " 10 Treat the image as a single character.\n" - " 11 Sparse text. Find as much text as possible in no" - " particular order.\n" - " 12 Sparse text with OSD.\n" - " 13 Raw line. Treat the image as a single text line,\n" - " bypassing hacks that are Tesseract-specific.\n"; + const char* msg = + "Page segmentation modes:\n" + " 0 Orientation and script detection (OSD) only.\n" + " 1 Automatic page segmentation with OSD.\n" + " 2 Automatic page segmentation, but no OSD, or OCR.\n" + " 3 Fully automatic page segmentation, but no OSD. (Default)\n" + " 4 Assume a single column of text of variable sizes.\n" + " 5 Assume a single uniform block of vertically aligned text.\n" + " 6 Assume a single uniform block of text.\n" + " 7 Treat the image as a single text line.\n" + " 8 Treat the image as a single word.\n" + " 9 Treat the image as a single word in a circle.\n" + " 10 Treat the image as a single character.\n" + " 11 Sparse text. Find as much text as possible in no" + " particular order.\n" + " 12 Sparse text with OSD.\n" + " 13 Raw line. Treat the image as a single text line,\n" + " bypassing hacks that are Tesseract-specific.\n"; #ifdef DISABLED_LEGACY_ENGINE - const char* disabled_osd_msg = + const char* disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n"; printf("%s%s", msg, disabled_osd_msg); #else - printf("%s", msg); + printf("%s", msg); #endif } #ifndef DISABLED_LEGACY_ENGINE static void PrintHelpForOEM() { - const char* msg = - "OCR Engine modes:\n" - " 0 Legacy engine only.\n" - " 1 Neural nets LSTM engine only.\n" - " 2 Legacy + LSTM engines.\n" - " 3 Default, based on what is available.\n"; - - printf("%s", msg); + const char* msg = + "OCR Engine modes:\n" + " 0 Legacy engine only.\n" + " 1 Neural nets LSTM engine only.\n" + " 2 Legacy + LSTM engines.\n" + " 3 Default, based on what is available.\n"; + + printf("%s", msg); } #endif // ndef DISABLED_LEGACY_ENGINE static void PrintHelpExtra(const char* program) { - printf( - "Usage:\n" - " %s --help | --help-extra | --help-psm | " - #ifndef DISABLED_LEGACY_ENGINE - "--help-oem | " - #endif - "--version\n" - " %s --list-langs [--tessdata-dir PATH]\n" - " %s --print-parameters [options...] [configfile...]\n" - " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n" - "\n" - "OCR options:\n" - " --tessdata-dir PATH Specify the location of tessdata path.\n" - " --user-words PATH Specify the location of user words file.\n" - " --user-patterns PATH Specify the location of user patterns file.\n" - " --dpi VALUE Specify DPI for input image.\n" - " -l LANG[+LANG] Specify language(s) used for OCR.\n" - " -c VAR=VALUE Set value for config variables.\n" - " Multiple -c arguments are allowed.\n" - " --psm NUM Specify page segmentation mode.\n" - #ifndef DISABLED_LEGACY_ENGINE - " --oem NUM Specify OCR Engine mode.\n" - #endif - "NOTE: These options must occur before any configfile.\n" - "\n", - program, program, program, program - ); - - PrintHelpForPSM(); + printf( + "Usage:\n" + " %s --help | --help-extra | --help-psm | " +#ifndef DISABLED_LEGACY_ENGINE + "--help-oem | " +#endif + "--version\n" + " %s --list-langs [--tessdata-dir PATH]\n" + " %s --print-parameters [options...] [configfile...]\n" + " %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n" + "\n" + "OCR options:\n" + " --tessdata-dir PATH Specify the location of tessdata path.\n" + " --user-words PATH Specify the location of user words file.\n" + " --user-patterns PATH Specify the location of user patterns file.\n" + " --dpi VALUE Specify DPI for input image.\n" + " -l LANG[+LANG] Specify language(s) used for OCR.\n" + " -c VAR=VALUE Set value for config variables.\n" + " Multiple -c arguments are allowed.\n" + " --psm NUM Specify page segmentation mode.\n" +#ifndef DISABLED_LEGACY_ENGINE + " --oem NUM Specify OCR Engine mode.\n" +#endif + "NOTE: These options must occur before any configfile.\n" + "\n", + program, program, program, program + ); + + PrintHelpForPSM(); #ifndef DISABLED_LEGACY_ENGINE - printf("\n"); - PrintHelpForOEM(); + printf("\n"); + PrintHelpForOEM(); #endif - printf( - "\n" - "Single options:\n" - " -h, --help Show minimal help message.\n" - " --help-extra Show extra help for advanced users.\n" - " --help-psm Show page segmentation modes.\n" - #ifndef DISABLED_LEGACY_ENGINE - " --help-oem Show OCR Engine modes.\n" - #endif - " -v, --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n" - " --print-parameters Print tesseract parameters.\n" - ); + printf( + "\n" + "Single options:\n" + " -h, --help Show minimal help message.\n" + " --help-extra Show extra help for advanced users.\n" + " --help-psm Show page segmentation modes.\n" +#ifndef DISABLED_LEGACY_ENGINE + " --help-oem Show OCR Engine modes.\n" +#endif + " -v, --version Show version information.\n" + " --list-langs List available languages for tesseract engine.\n" + " --print-parameters Print tesseract parameters.\n" + ); } static void PrintHelpMessage(const char* program) { - printf( - "Usage:\n" - " %s --help | --help-extra | --version\n" - " %s --list-langs\n" - " %s imagename outputbase [options...] [configfile...]\n" - "\n" - "OCR options:\n" - " -l LANG[+LANG] Specify language(s) used for OCR.\n" - "NOTE: These options must occur before any configfile.\n" - "\n" - "Single options:\n" - " --help Show this help message.\n" - " --help-extra Show extra help for advanced users.\n" - " --version Show version information.\n" - " --list-langs List available languages for tesseract engine.\n", - program, program, program - ); + printf( + "Usage:\n" + " %s --help | --help-extra | --version\n" + " %s --list-langs\n" + " %s imagename outputbase [options...] [configfile...]\n" + "\n" + "OCR options:\n" + " -l LANG[+LANG] Specify language(s) used for OCR.\n" + "NOTE: These options must occur before any configfile.\n" + "\n" + "Single options:\n" + " --help Show this help message.\n" + " --help-extra Show extra help for advanced users.\n" + " --version Show version information.\n" + " --list-langs List available languages for tesseract engine.\n", + program, program, program + ); } static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { - char opt1[256], opt2[255]; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - strncpy(opt1, argv[i + 1], 255); - opt1[255] = '\0'; - char* p = strchr(opt1, '='); - if (!p) { - fprintf(stderr, "Missing = in configvar assignment\n"); - exit(EXIT_FAILURE); - } - *p = 0; - strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255); - opt2[254] = 0; - ++i; - - if (!api->SetVariable(opt1, opt2)) { - fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); - } - } + char opt1[256], opt2[255]; + for (int i = 0; i < argc; i++) { + if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { + strncpy(opt1, argv[i + 1], 255); + opt1[255] = '\0'; + char* p = strchr(opt1, '='); + if (!p) { + fprintf(stderr, "Missing = in configvar assignment\n"); + exit(EXIT_FAILURE); + } + *p = 0; + strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255); + opt2[254] = 0; + ++i; + + if (!api->SetVariable(opt1, opt2)) { + fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); + } } + } } static void PrintLangsList(tesseract::TessBaseAPI* api) { - GenericVector languages; - api->GetAvailableLanguagesAsVector(&languages); - printf("List of available languages (%d):\n", languages.size()); - for (int index = 0; index < languages.size(); ++index) { - STRING& string = languages[index]; - printf("%s\n", string.string()); - } - api->End(); + GenericVector languages; + api->GetAvailableLanguagesAsVector(&languages); + printf("List of available languages (%d):\n", languages.size()); + for (int index = 0; index < languages.size(); ++index) { + STRING& string = languages[index]; + printf("%s\n", string.string()); + } + api->End(); } static void PrintBanner() { - tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", - tesseract::TessBaseAPI::Version()); + tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", + tesseract::TessBaseAPI::Version()); } /** @@ -285,15 +285,15 @@ static void PrintBanner() { */ static void FixPageSegMode(tesseract::TessBaseAPI* api, tesseract::PageSegMode pagesegmode) { - if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) - api->SetPageSegMode(pagesegmode); + if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) + api->SetPageSegMode(pagesegmode); } static void checkArgValues(int arg, const char* mode, int count) { - if (arg >= count || arg < 0) { - printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); - exit(EXIT_SUCCESS); - } + if (arg >= count || arg < 0) { + printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); + exit(EXIT_SUCCESS); + } } // NOTE: arg_i is used here to avoid ugly *i so many times in this function @@ -304,218 +304,218 @@ static void ParseArgs(const int argc, char** argv, const char** lang, GenericVector* vars_values, l_int32* arg_i, tesseract::PageSegMode* pagesegmode, tesseract::OcrEngineMode* enginemode) { - bool noocr = false; - int i; - for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) { - if (*image != nullptr && *outputbase == nullptr) { - // outputbase follows image, don't allow options at that position. - *outputbase = argv[i]; - } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { - PrintHelpMessage(argv[0]); - noocr = true; - } else if (strcmp(argv[i], "--help-extra") == 0) { - PrintHelpExtra(argv[0]); - noocr = true; - } else if ((strcmp(argv[i], "--help-psm") == 0)) { - PrintHelpForPSM(); - noocr = true; + bool noocr = false; + int i; + for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) { + if (*image != nullptr && *outputbase == nullptr) { + // outputbase follows image, don't allow options at that position. + *outputbase = argv[i]; + } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { + PrintHelpMessage(argv[0]); + noocr = true; + } else if (strcmp(argv[i], "--help-extra") == 0) { + PrintHelpExtra(argv[0]); + noocr = true; + } else if ((strcmp(argv[i], "--help-psm") == 0)) { + PrintHelpForPSM(); + noocr = true; #ifndef DISABLED_LEGACY_ENGINE - } else if ((strcmp(argv[i], "--help-oem") == 0)) { - PrintHelpForOEM(); - noocr = true; + } else if ((strcmp(argv[i], "--help-oem") == 0)) { + PrintHelpForOEM(); + noocr = true; #endif - } else if ((strcmp(argv[i], "-v") == 0) || - (strcmp(argv[i], "--version") == 0)) { - PrintVersionInfo(); - noocr = true; - } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) { - *lang = argv[i + 1]; - ++i; - } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) { - *datapath = argv[i + 1]; - ++i; - } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) { - *dpi = atoi(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) { - vars_vec->push_back("user_words_file"); - vars_values->push_back(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) { - vars_vec->push_back("user_patterns_file"); - vars_values->push_back(argv[i + 1]); - ++i; - } else if (strcmp(argv[i], "--list-langs") == 0) { - noocr = true; - *list_langs = true; - } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { - checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT); - *pagesegmode = static_cast(atoi(argv[i + 1])); - ++i; - } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { + } else if ((strcmp(argv[i], "-v") == 0) || + (strcmp(argv[i], "--version") == 0)) { + PrintVersionInfo(); + noocr = true; + } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) { + *lang = argv[i + 1]; + ++i; + } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) { + *datapath = argv[i + 1]; + ++i; + } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) { + *dpi = atoi(argv[i + 1]); + ++i; + } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) { + vars_vec->push_back("user_words_file"); + vars_values->push_back(argv[i + 1]); + ++i; + } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) { + vars_vec->push_back("user_patterns_file"); + vars_values->push_back(argv[i + 1]); + ++i; + } else if (strcmp(argv[i], "--list-langs") == 0) { + noocr = true; + *list_langs = true; + } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { + checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT); + *pagesegmode = static_cast(atoi(argv[i + 1])); + ++i; + } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { #ifndef DISABLED_LEGACY_ENGINE - int oem = atoi(argv[i + 1]); - checkArgValues(oem, "OEM", tesseract::OEM_COUNT); - *enginemode = static_cast(oem); + int oem = atoi(argv[i + 1]); + checkArgValues(oem, "OEM", tesseract::OEM_COUNT); + *enginemode = static_cast(oem); #endif - ++i; - } else if (strcmp(argv[i], "--print-parameters") == 0) { - noocr = true; - *print_parameters = true; - } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - // handled properly after api init - ++i; - } else if (*image == nullptr) { - *image = argv[i]; - } else { - // Unexpected argument. - fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]); - exit(EXIT_FAILURE); - } + ++i; + } else if (strcmp(argv[i], "--print-parameters") == 0) { + noocr = true; + *print_parameters = true; + } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { + // handled properly after api init + ++i; + } else if (*image == nullptr) { + *image = argv[i]; + } else { + // Unexpected argument. + fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]); + exit(EXIT_FAILURE); } + } - *arg_i = i; + *arg_i = i; - if (*pagesegmode == tesseract::PSM_OSD_ONLY) { - // OSD = orientation and script detection. - if (*lang != nullptr && strcmp(*lang, "osd")) { - // If the user explicitly specifies a language (other than osd) - // or a script, only orientation can be detected. - fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang); - } else { - // That mode requires osd.traineddata to detect orientation and script. - *lang = "osd"; - } + if (*pagesegmode == tesseract::PSM_OSD_ONLY) { + // OSD = orientation and script detection. + if (*lang != nullptr && strcmp(*lang, "osd")) { + // If the user explicitly specifies a language (other than osd) + // or a script, only orientation can be detected. + fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang); + } else { + // That mode requires osd.traineddata to detect orientation and script. + *lang = "osd"; } + } - if (*outputbase == nullptr && noocr == false) { - PrintHelpMessage(argv[0]); - exit(EXIT_FAILURE); - } + if (*outputbase == nullptr && noocr == false) { + PrintHelpMessage(argv[0]); + exit(EXIT_FAILURE); + } } static void PreloadRenderers( - tesseract::TessBaseAPI* api, - tesseract::PointerVector* renderers, - tesseract::PageSegMode pagesegmode, const char* outputbase) { - if (pagesegmode == tesseract::PSM_OSD_ONLY) { + tesseract::TessBaseAPI* api, + tesseract::PointerVector* renderers, + tesseract::PageSegMode pagesegmode, const char* outputbase) { + if (pagesegmode == tesseract::PSM_OSD_ONLY) { #ifndef DISABLED_LEGACY_ENGINE - renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); + renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); #endif // ndef DISABLED_LEGACY_ENGINE - } else { - bool b; - api->GetBoolVariable("tessedit_create_hocr", &b); - if (b) { - bool font_info; - api->GetBoolVariable("hocr_font_info", &font_info); - tesseract::TessHOcrRenderer* renderer = - new tesseract::TessHOcrRenderer(outputbase, font_info); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create hOCR output file: %s\n", - strerror(errno)); - } - } + } else { + bool b; + api->GetBoolVariable("tessedit_create_hocr", &b); + if (b) { + bool font_info; + api->GetBoolVariable("hocr_font_info", &font_info); + tesseract::TessHOcrRenderer* renderer = + new tesseract::TessHOcrRenderer(outputbase, font_info); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create hOCR output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_alto", &b); - if (b) { - tesseract::TessAltoRenderer* renderer = - new tesseract::TessAltoRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create ALTO output file: %s\n", - strerror(errno)); - } + api->GetBoolVariable("tessedit_create_alto", &b); + if (b) { + tesseract::TessAltoRenderer* renderer = + new tesseract::TessAltoRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create ALTO output file: %s\n", + strerror(errno)); } + } - api->GetBoolVariable("tessedit_create_tsv", &b); - if (b) { - bool font_info; - api->GetBoolVariable("hocr_font_info", &font_info); - tesseract::TessTsvRenderer* renderer = - new tesseract::TessTsvRenderer(outputbase, font_info); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create TSV output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_create_tsv", &b); + if (b) { + bool font_info; + api->GetBoolVariable("hocr_font_info", &font_info); + tesseract::TessTsvRenderer* renderer = + new tesseract::TessTsvRenderer(outputbase, font_info); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create TSV output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_pdf", &b); - if (b) { -#ifdef WIN32 - if (_setmode(_fileno(stdout), _O_BINARY) == -1) + api->GetBoolVariable("tessedit_create_pdf", &b); + if (b) { + #ifdef WIN32 + if (_setmode(_fileno(stdout), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); -#endif // WIN32 - bool textonly; - api->GetBoolVariable("textonly_pdf", &textonly); - tesseract::TessPDFRenderer* renderer = - new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(), - textonly); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create PDF output file: %s\n", - strerror(errno)); - } - } + #endif // WIN32 + bool textonly; + api->GetBoolVariable("textonly_pdf", &textonly); + tesseract::TessPDFRenderer* renderer = + new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(), + textonly); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create PDF output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_write_unlv", &b); - if (b) { - api->SetVariable("unlv_tilde_crunching", "true"); - tesseract::TessUnlvRenderer* renderer = - new tesseract::TessUnlvRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create UNLV output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_write_unlv", &b); + if (b) { + api->SetVariable("unlv_tilde_crunching", "true"); + tesseract::TessUnlvRenderer* renderer = + new tesseract::TessUnlvRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create UNLV output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_boxfile", &b); - if (b) { - tesseract::TessBoxTextRenderer* renderer = - new tesseract::TessBoxTextRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create BOX output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_create_boxfile", &b); + if (b) { + tesseract::TessBoxTextRenderer* renderer = + new tesseract::TessBoxTextRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create BOX output file: %s\n", + strerror(errno)); + } + } - api->GetBoolVariable("tessedit_create_txt", &b); - if (b || renderers->empty()) { - tesseract::TessTextRenderer* renderer = - new tesseract::TessTextRenderer(outputbase); - if (renderer->happy()) { - renderers->push_back(renderer); - } else { - delete renderer; - tprintf("Error, could not create TXT output file: %s\n", - strerror(errno)); - } - } + api->GetBoolVariable("tessedit_create_txt", &b); + if (b || renderers->empty()) { + tesseract::TessTextRenderer* renderer = + new tesseract::TessTextRenderer(outputbase); + if (renderer->happy()) { + renderers->push_back(renderer); + } else { + delete renderer; + tprintf("Error, could not create TXT output file: %s\n", + strerror(errno)); + } } + } - if (!renderers->empty()) { - // Since the PointerVector auto-deletes, null-out the renderers that are - // added to the root, and leave the root in the vector. - for (int r = 1; r < renderers->size(); ++r) { - (*renderers)[0]->insert((*renderers)[r]); - (*renderers)[r] = nullptr; - } + if (!renderers->empty()) { + // Since the PointerVector auto-deletes, null-out the renderers that are + // added to the root, and leave the root in the vector. + for (int r = 1; r < renderers->size(); ++r) { + (*renderers)[0]->insert((*renderers)[r]); + (*renderers)[r] = nullptr; } + } } @@ -525,135 +525,135 @@ static void PreloadRenderers( **********************************************************************/ int main(int argc, char** argv) { - const char* lang = nullptr; - const char* image = nullptr; - const char* outputbase = nullptr; - const char* datapath = nullptr; - bool list_langs = false; - bool print_parameters = false; - l_int32 dpi = 0; - int arg_i = 1; - tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; + const char* lang = nullptr; + const char* image = nullptr; + const char* outputbase = nullptr; + const char* datapath = nullptr; + bool list_langs = false; + bool print_parameters = false; + l_int32 dpi = 0; + int arg_i = 1; + tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; #ifdef DISABLED_LEGACY_ENGINE - auto enginemode = tesseract::OEM_LSTM_ONLY; + auto enginemode = tesseract::OEM_LSTM_ONLY; #else - tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT; + tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT; #endif - /* main() calls functions like ParseArgs which call exit(). - * This results in memory leaks if vars_vec and vars_values are - * declared as auto variables (destructor is not called then). */ - static GenericVector vars_vec; - static GenericVector vars_values; + /* main() calls functions like ParseArgs which call exit(). + * This results in memory leaks if vars_vec and vars_values are + * declared as auto variables (destructor is not called then). */ + static GenericVector vars_vec; + static GenericVector vars_values; #if !defined(DEBUG) - // Disable debugging and informational messages from Leptonica. - setMsgSeverity(L_SEVERITY_ERROR); + // Disable debugging and informational messages from Leptonica. + setMsgSeverity(L_SEVERITY_ERROR); #endif #if defined(HAVE_TIFFIO_H) && defined(_WIN32) - /* Show libtiff errors and warnings on console (not in GUI). */ + /* Show libtiff errors and warnings on console (not in GUI). */ TIFFSetErrorHandler(Win32ErrorHandler); TIFFSetWarningHandler(Win32WarningHandler); #endif // HAVE_TIFFIO_H && _WIN32 - ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, - &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i, - &pagesegmode, &enginemode); + ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, + &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i, + &pagesegmode, &enginemode); - if (lang == nullptr) { - // Set default language if none was given. - lang = "eng"; - } + if (lang == nullptr) { + // Set default language if none was given. + lang = "eng"; + } - if (image == nullptr && !list_langs && !print_parameters) - return EXIT_SUCCESS; + if (image == nullptr && !list_langs && !print_parameters) + return EXIT_SUCCESS; - PERF_COUNT_START("Tesseract:main") + PERF_COUNT_START("Tesseract:main") - // Call GlobalDawgCache here to create the global DawgCache object before - // the TessBaseAPI object. This fixes the order of destructor calls: - // first TessBaseAPI must be destructed, DawgCache must be the last object. - tesseract::Dict::GlobalDawgCache(); + // Call GlobalDawgCache here to create the global DawgCache object before + // the TessBaseAPI object. This fixes the order of destructor calls: + // first TessBaseAPI must be destructed, DawgCache must be the last object. + tesseract::Dict::GlobalDawgCache(); - // Avoid memory leak caused by auto variable when return is called. - static tesseract::TessBaseAPI api; + // Avoid memory leak caused by auto variable when return is called. + static tesseract::TessBaseAPI api; - api.SetOutputName(outputbase); + api.SetOutputName(outputbase); - const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), - argc - arg_i, &vars_vec, &vars_values, false); + const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), + argc - arg_i, &vars_vec, &vars_values, false); - SetVariablesFromCLArgs(&api, argc, argv); + SetVariablesFromCLArgs(&api, argc, argv); - if (list_langs) { - PrintLangsList(&api); - return EXIT_SUCCESS; - } + if (list_langs) { + PrintLangsList(&api); + return EXIT_SUCCESS; + } - if (init_failed) { - fprintf(stderr, "Could not initialize tesseract.\n"); - return EXIT_FAILURE; - } + if (init_failed) { + fprintf(stderr, "Could not initialize tesseract.\n"); + return EXIT_FAILURE; + } - if (print_parameters) { - FILE* fout = stdout; - fprintf(stdout, "Tesseract parameters:\n"); - api.PrintVariables(fout); - api.End(); - return EXIT_SUCCESS; - } + if (print_parameters) { + FILE* fout = stdout; + fprintf(stdout, "Tesseract parameters:\n"); + api.PrintVariables(fout); + api.End(); + return EXIT_SUCCESS; + } - FixPageSegMode(&api, pagesegmode); + FixPageSegMode(&api, pagesegmode); - if (dpi) { - char dpi_string[255]; - snprintf(dpi_string, 254, "%d", dpi); - api.SetVariable("user_defined_dpi", dpi_string); - } + if (dpi) { + char dpi_string[255]; + snprintf(dpi_string, 254, "%d", dpi); + api.SetVariable("user_defined_dpi", dpi_string); + } - if (pagesegmode == tesseract::PSM_AUTO_ONLY) { - int ret_val = EXIT_SUCCESS; + if (pagesegmode == tesseract::PSM_AUTO_ONLY) { + int ret_val = EXIT_SUCCESS; - Pix* pixs = pixRead(image); - if (!pixs) { - fprintf(stderr, "Leptonica can't process input file: %s\n", image); - return 2; - } + Pix* pixs = pixRead(image); + if (!pixs) { + fprintf(stderr, "Leptonica can't process input file: %s\n", image); + return 2; + } - api.SetImage(pixs); + api.SetImage(pixs); - tesseract::Orientation orientation; - tesseract::WritingDirection direction; - tesseract::TextlineOrder order; - float deskew_angle; + tesseract::Orientation orientation; + tesseract::WritingDirection direction; + tesseract::TextlineOrder order; + float deskew_angle; - const tesseract::PageIterator* it = api.AnalyseLayout(); - if (it) { - it->Orientation(&orientation, &direction, &order, &deskew_angle); - tprintf( - "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" - "Deskew angle: %.4f\n", - orientation, direction, order, deskew_angle); - } else { - ret_val = EXIT_FAILURE; - } + const tesseract::PageIterator* it = api.AnalyseLayout(); + if (it) { + it->Orientation(&orientation, &direction, &order, &deskew_angle); + tprintf( + "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" + "Deskew angle: %.4f\n", + orientation, direction, order, deskew_angle); + } else { + ret_val = EXIT_FAILURE; + } - delete it; + delete it; - pixDestroy(&pixs); - return ret_val; - } + pixDestroy(&pixs); + return ret_val; + } - // set in_training_mode to true when using one of these configs: - // ambigs.train, box.train, box.train.stderr, linebox, rebox - bool b = false; - bool in_training_mode = - (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || - (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); + // set in_training_mode to true when using one of these configs: + // ambigs.train, box.train, box.train.stderr, linebox, rebox + bool b = false; + bool in_training_mode = + (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || + (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || + (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); #ifdef DISABLED_LEGACY_ENGINE - auto cur_psm = api.GetPageSegMode(); + auto cur_psm = api.GetPageSegMode(); auto osd_warning = std::string(""); if (cur_psm == tesseract::PSM_OSD_ONLY) { const char* disabled_osd_msg = @@ -673,37 +673,37 @@ int main(int argc, char** argv) { } #endif // def DISABLED_LEGACY_ENGINE - // Avoid memory leak caused by auto variable when exit() is called. - static tesseract::PointerVector renderers; + // Avoid memory leak caused by auto variable when exit() is called. + static tesseract::PointerVector renderers; - if (in_training_mode) { - renderers.push_back(nullptr); - } else { - PreloadRenderers(&api, &renderers, pagesegmode, outputbase); - } + if (in_training_mode) { + renderers.push_back(nullptr); + } else { + PreloadRenderers(&api, &renderers, pagesegmode, outputbase); + } - bool banner = false; - if (outputbase != nullptr && strcmp(outputbase, "-") && - strcmp(outputbase, "stdout")) { - banner = true; - } + bool banner = false; + if (outputbase != nullptr && strcmp(outputbase, "-") && + strcmp(outputbase, "stdout")) { + banner = true; + } - if (!renderers.empty()) { - if (banner) PrintBanner(); + if (!renderers.empty()) { + if (banner) PrintBanner(); #ifdef DISABLED_LEGACY_ENGINE - if (!osd_warning.empty()) { + if (!osd_warning.empty()) { fprintf(stderr, "%s",osd_warning.c_str()); } #endif - bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]); - if (!succeed) { - fprintf(stderr, "Error during processing.\n"); - return EXIT_FAILURE; - } + bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]); + if (!succeed) { + fprintf(stderr, "Error during processing.\n"); + return EXIT_FAILURE; } + } - PERF_COUNT_END + PERF_COUNT_END - return EXIT_SUCCESS; + return EXIT_SUCCESS; } From 664f7eabcd321b397092dafb505d45b26e988be5 Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Thu, 29 Nov 2018 21:11:53 -0500 Subject: [PATCH 5/9] Add #include to altorenderer.cpp --- src/api/altorenderer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index a654ad818d..037de3c999 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -17,6 +17,7 @@ **********************************************************************/ #include "baseapi.h" +#include #include "renderer.h" namespace tesseract { From a74b6b104982006092bfa09626800e59c6bd7828 Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Thu, 29 Nov 2018 21:49:29 -0500 Subject: [PATCH 6/9] Small style fixes --- src/api/altorenderer.cpp | 86 +++++++++++++++++++--------------------- src/api/capi.cpp | 2 +- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index 037de3c999..89c0e463d7 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -1,20 +1,17 @@ -/********************************************************************** - * File: altorenderer.cpp - * Description: ALTO rendering interface - * Author: Jake Sebright - * - * (C) Copyright 2018 - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ +// File: altorenderer.cpp +// Description: ALTO rendering interface +// Author: Jake Sebright + +// (C) Copyright 2018 +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "baseapi.h" #include @@ -22,16 +19,13 @@ namespace tesseract { -/********************************************************************** - * Alto Text Renderer interface implementation - **********************************************************************/ TessAltoRenderer::TessAltoRenderer(const char *outputbase) : TessResultRenderer(outputbase, "xml") { } - /** - * Append the ALTO XML for the beginning of the document - */ + /// + /// Append the ALTO XML for the beginning of the document + /// bool TessAltoRenderer::BeginDocumentHandler() { AppendString( "\n" @@ -58,19 +52,19 @@ namespace tesseract { return true; } - /** - * Append the ALTO XML for the end of the document - */ + /// + /// Append the ALTO XML for the end of the document + /// bool TessAltoRenderer::EndDocumentHandler() { AppendString("\t\n\n"); return true; } - /** - * Append the ALTO XML for the layout of the image - */ - bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) { + /// + /// Append the ALTO XML for the layout of the image + /// + bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) { const std::unique_ptr hocr(api->GetAltoText(imagenum())); if (hocr == nullptr) return false; @@ -79,9 +73,9 @@ namespace tesseract { return true; } - /** - * Add a unique ID to an ALTO element - */ + /// + /// Add a unique ID to an ALTO element + /// static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) { const size_t BUFSIZE = 64; char id_buffer[BUFSIZE]; @@ -92,10 +86,10 @@ namespace tesseract { *alto_str += "\""; } - /** - * Add coordinates to specified TextBlock, TextLine, or String bounding box - * Add word confidence if adding to a String bounding box - */ + /// + /// Add coordinates to specified TextBlock, TextLine, or String bounding box + /// Add word confidence if adding to a String bounding box + /// static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, STRING *alto_str) { int left, top, right, bottom; @@ -131,18 +125,18 @@ namespace tesseract { } } - /** - * Make an XML-formatted string with ALTO markup from the internal - * data structures. - */ + /// + /// Make an XML-formatted string with ALTO markup from the internal + /// data structures. + /// char *TessBaseAPI::GetAltoText(int page_number) { return GetAltoText(nullptr, page_number); } - /** - * Make an XML-formatted string with ALTO markup from the internal - * data structures. - */ + /// + /// Make an XML-formatted string with ALTO markup from the internal + /// data structures. + /// char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) return nullptr; @@ -253,4 +247,4 @@ namespace tesseract { return ret; } - } \ No newline at end of file + } diff --git a/src/api/capi.cpp b/src/api/capi.cpp index 333bbcd3fe..cffdf613ea 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -68,7 +68,7 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase) { - return new TessHOcrRenderer(outputbase); + return new TessAltoRenderer(outputbase); } TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, From 7da328ef831fc08e39bfb5ec2a273cd1c3caa5f1 Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Thu, 29 Nov 2018 22:01:26 -0500 Subject: [PATCH 7/9] Dynamically generate tesseract version value --- src/api/altorenderer.cpp | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index 89c0e463d7..5895b84b54 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -28,26 +28,28 @@ namespace tesseract { /// bool TessAltoRenderer::BeginDocumentHandler() { AppendString( - "\n" - "\n" - "\t\n" - "\t\tpixel\n" - "\t\t\n" - "\t\t\t"); + "\n" + "\n" + "\t\n" + "\t\tpixel\n" + "\t\t\n" + "\t\t\t"); AppendString(title()); AppendString("\t\t\t\n" - "\t\t\n" - "\t\t\n" - "\t\t\t\n" - "\t\t\t\t\n" - "\t\t\t\t\ttesseract 4.0.0\n" - "\t\t\t\t\n" - "\t\t\t\n" - "\t\t\n" - "\t\n" - "\t\n"); + "\t\t\n" + "\t\t\n" + "\t\t\t\n" + "\t\t\t\t\n" + "\t\t\t\t\ttesseract "); + AppendString(TessBaseAPI::Version()); + AppendString("\n" + "\t\t\t\t\n" + "\t\t\t\n" + "\t\t\n" + "\t\n" + "\t\n"); return true; } From 5c8cf13254729a668b8493bf4eb615a487495e47 Mon Sep 17 00:00:00 2001 From: Jake Sebright Date: Thu, 29 Nov 2018 22:15:20 -0500 Subject: [PATCH 8/9] Re-organize functions alphabetically --- src/api/altorenderer.cpp | 180 +++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 90 deletions(-) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index 5895b84b54..fe7c3f99ea 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -19,8 +19,56 @@ namespace tesseract { - TessAltoRenderer::TessAltoRenderer(const char *outputbase) - : TessResultRenderer(outputbase, "xml") { + /// + /// Add coordinates to specified TextBlock, TextLine, or String bounding box + /// Add word confidence if adding to a String bounding box + /// + static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, + STRING *alto_str) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + + int hpos = left; + int vpos = top; + int height = bottom - top; + int width = right - left; + + *alto_str += " HPOS=\""; + alto_str->add_str_int("", hpos); + *alto_str += "\""; + *alto_str += " VPOS=\""; + alto_str->add_str_int("", vpos); + *alto_str += "\""; + *alto_str += " WIDTH=\""; + alto_str->add_str_int("", width); + *alto_str += "\""; + *alto_str += " HEIGHT=\""; + alto_str->add_str_int("", height); + *alto_str += "\""; + + if (level == RIL_WORD) { + int wc = it->Confidence(RIL_WORD); + *alto_str += " WC=\"0."; + alto_str->add_str_int("", wc); + *alto_str += "\""; + } + if (level != RIL_WORD) { + + *alto_str += ">"; + } + } + + /// + /// Add a unique ID to an ALTO element + /// + static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) { + const size_t BUFSIZE = 64; + char id_buffer[BUFSIZE]; + snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); + id_buffer[BUFSIZE - 1] = '\0'; + *alto_str += " ID=\""; + *alto_str += id_buffer; + *alto_str += "\""; } /// @@ -54,15 +102,6 @@ namespace tesseract { return true; } - /// - /// Append the ALTO XML for the end of the document - /// - bool TessAltoRenderer::EndDocumentHandler() { - AppendString("\t\n\n"); - - return true; - } - /// /// Append the ALTO XML for the layout of the image /// @@ -76,97 +115,58 @@ namespace tesseract { } /// - /// Add a unique ID to an ALTO element - /// - static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) { - const size_t BUFSIZE = 64; - char id_buffer[BUFSIZE]; - snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1); - id_buffer[BUFSIZE - 1] = '\0'; - *alto_str += " ID=\""; - *alto_str += id_buffer; - *alto_str += "\""; - } - - /// - /// Add coordinates to specified TextBlock, TextLine, or String bounding box - /// Add word confidence if adding to a String bounding box + /// Append the ALTO XML for the end of the document /// - static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, - STRING *alto_str) { - int left, top, right, bottom; - it->BoundingBox(level, &left, &top, &right, &bottom); - - int hpos = left; - int vpos = top; - int height = bottom - top; - int width = right - left; - - *alto_str += " HPOS=\""; - alto_str->add_str_int("", hpos); - *alto_str += "\""; - *alto_str += " VPOS=\""; - alto_str->add_str_int("", vpos); - *alto_str += "\""; - *alto_str += " WIDTH=\""; - alto_str->add_str_int("", width); - *alto_str += "\""; - *alto_str += " HEIGHT=\""; - alto_str->add_str_int("", height); - *alto_str += "\""; + bool TessAltoRenderer::EndDocumentHandler() { + AppendString("\t\n\n"); - if (level == RIL_WORD) { - int wc = it->Confidence(RIL_WORD); - *alto_str += " WC=\"0."; - alto_str->add_str_int("", wc); - *alto_str += "\""; - } - if (level != RIL_WORD) { + return true; + } - *alto_str += ">"; - } + TessAltoRenderer::TessAltoRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "xml") { } /// /// Make an XML-formatted string with ALTO markup from the internal /// data structures. /// - char *TessBaseAPI::GetAltoText(int page_number) { - return GetAltoText(nullptr, page_number); - } + char *TessBaseAPI::GetAltoText(int page_number) { + return GetAltoText(nullptr, page_number); + } /// /// Make an XML-formatted string with ALTO markup from the internal /// data structures. /// - char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) - return nullptr; - - int lcnt = 0, bcnt = 0, wcnt = 0; - int page_id = page_number; - - STRING alto_str(""); - - if (input_file_ == nullptr) - SetInputName(nullptr); - - #ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 - int str16_len = - MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, - uni16_str, str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, - nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, - utf8_len, nullptr, nullptr); - *input_file_ = utf8_str; - delete[] uni16_str; - delete[] utf8_str; - #endif + char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) + return nullptr; + + int lcnt = 0, bcnt = 0, wcnt = 0; + int page_id = page_number; + + STRING alto_str(""); + + if (input_file_ == nullptr) + SetInputName(nullptr); + + #ifdef _WIN32 + // convert input name from ANSI encoding to utf-8 + int str16_len = + MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0); + wchar_t *uni16_str = new WCHAR[str16_len]; + str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, + uni16_str, str16_len); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, + nullptr, nullptr); + char *utf8_str = new char[utf8_len]; + WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, + utf8_len, nullptr, nullptr); + *input_file_ = utf8_str; + delete[] uni16_str; + delete[] utf8_str; + #endif alto_str += "\t\t Date: Fri, 30 Nov 2018 05:53:10 +0100 Subject: [PATCH 9/9] Fix include statement --- src/api/altorenderer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index fe7c3f99ea..11af90f692 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -14,7 +14,7 @@ // limitations under the License. #include "baseapi.h" -#include +#include #include "renderer.h" namespace tesseract {