From e38b98468d87d2a57484c8d062c4015c0f076dab Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Mon, 19 Nov 2018 20:21:41 -0500
Subject: [PATCH 1/9] Add support for ALTO output

---
 src/api/baseapi.cpp       | 4537 +++++++++++++++++++------------------
 src/api/baseapi.h         | 1692 +++++++-------
 src/api/capi.cpp          |   44 +-
 src/api/capi.h            |   96 +-
 src/api/renderer.cpp      |  426 ++--
 src/api/renderer.h        |  362 +--
 src/api/tesseractmain.cpp |  868 +++----
 tessdata/configs/alto     |    1 +
 8 files changed, 4148 insertions(+), 3878 deletions(-)
 create mode 100644 tessdata/configs/alto
diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 4caf4428f8..05f04a443c 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -94,34 +94,34 @@ BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin");
 namespace tesseract {
 
 /** Minimum sensible image size to be worth running tesseract. */
-const int kMinRectSize = 10;
+    const int kMinRectSize = 10;
 /** Character returned when Tesseract couldn't recognize as anything. */
-const char kTesseractReject = '~';
+    const char kTesseractReject = '~';
 /** Character used by UNLV error counter as a reject. */
-const char kUNLVReject = '~';
+    const char kUNLVReject = '~';
 /** Character used by UNLV as a suspect marker. */
-const char kUNLVSuspect = '^';
+    const char kUNLVSuspect = '^';
 /**
  * Filename used for input image file, from which to derive a name to search
  * for a possible UNLV zone file, if none is specified by SetInputName.
  */
-const char* kInputFile = "noname.tif";
+    const char* kInputFile = "noname.tif";
 /**
  * Temp file used for storing current parameters before applying retry values.
  */
-const char* kOldVarsFile = "failed_vars.txt";
+    const char* kOldVarsFile = "failed_vars.txt";
 /** Max string length of an int.  */
-const int kMaxIntSize = 22;
+    const int kMaxIntSize = 22;
 
 /* Add all available languages recursively.
 */
-static void addAvailableLanguages(const STRING &datadir, const STRING &base,
-                                  GenericVector<STRING>* langs)
-{
-  const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
-  const size_t extlen = sizeof(kTrainedDataSuffix);
+    static void addAvailableLanguages(const STRING &datadir, const STRING &base,
+                                      GenericVector<STRING>* langs)
+    {
+      const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
+      const size_t extlen = sizeof(kTrainedDataSuffix);
 #ifdef _WIN32
-    WIN32_FIND_DATA data;
+      WIN32_FIND_DATA data;
     HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data);
     if (handle != INVALID_HANDLE_VALUE) {
       BOOL result = TRUE;
@@ -146,83 +146,83 @@ static void addAvailableLanguages(const STRING &datadir, const STRING &base,
       FindClose(handle);
     }
 #else  // _WIN32
-  DIR* dir = opendir((datadir + base).string());
-  if (dir != nullptr) {
-    dirent *de;
-    while ((de = readdir(dir))) {
-      char *name = de->d_name;
-      // Skip '.', '..', and hidden files
-      if (name[0] != '.') {
-        struct stat st;
-        if (stat((datadir + base2 + name).string(), &st) == 0 &&
-            (st.st_mode & S_IFDIR) == S_IFDIR) {
-          addAvailableLanguages(datadir, base2 + name, langs);
-        } else {
-          size_t len = strlen(name);
-          if (len > extlen && name[len - extlen] == '.' &&
-              strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
-            name[len - extlen] = '\0';
-            langs->push_back(base2 + name);
+      DIR* dir = opendir((datadir + base).string());
+      if (dir != nullptr) {
+        dirent *de;
+        while ((de = readdir(dir))) {
+          char *name = de->d_name;
+          // Skip '.', '..', and hidden files
+          if (name[0] != '.') {
+            struct stat st;
+            if (stat((datadir + base2 + name).string(), &st) == 0 &&
+                (st.st_mode & S_IFDIR) == S_IFDIR) {
+              addAvailableLanguages(datadir, base2 + name, langs);
+            } else {
+              size_t len = strlen(name);
+              if (len > extlen && name[len - extlen] == '.' &&
+                  strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
+                name[len - extlen] = '\0';
+                langs->push_back(base2 + name);
+              }
+            }
           }
         }
+        closedir(dir);
       }
-    }
-    closedir(dir);
-  }
 #endif
-}
+    }
 
 // Compare two STRING values (used for sorting).
-static int CompareSTRING(const void* p1, const void* p2) {
-  const STRING* s1 = static_cast<const STRING*>(p1);
-  const STRING* s2 = static_cast<const STRING*>(p2);
-  return strcmp(s1->c_str(), s2->c_str());
-}
-
-TessBaseAPI::TessBaseAPI()
-    : tesseract_(nullptr),
-      osd_tesseract_(nullptr),
-      equ_detect_(nullptr),
-      reader_(nullptr),
-      // Thresholder is initialized to nullptr here, but will be set before use by:
-      // A constructor of a derived API,  SetThresholder(), or
-      // created implicitly when used in InternalSetImage.
-      thresholder_(nullptr),
-      paragraph_models_(nullptr),
-      block_list_(nullptr),
-      page_res_(nullptr),
-      input_file_(nullptr),
-      output_file_(nullptr),
-      datapath_(nullptr),
-      language_(nullptr),
-      last_oem_requested_(OEM_DEFAULT),
-      recognition_done_(false),
-      truth_cb_(nullptr),
-      rect_left_(0),
-      rect_top_(0),
-      rect_width_(0),
-      rect_height_(0),
-      image_width_(0),
-      image_height_(0) {
-  const char *locale;
-  locale = std::setlocale(LC_ALL, nullptr);
-  ASSERT_HOST(!strcmp(locale, "C"));
-  locale = std::setlocale(LC_CTYPE, nullptr);
-  ASSERT_HOST(!strcmp(locale, "C"));
-  locale = std::setlocale(LC_NUMERIC, nullptr);
-  ASSERT_HOST(!strcmp(locale, "C"));
-}
-
-TessBaseAPI::~TessBaseAPI() {
-  End();
-}
+    static int CompareSTRING(const void* p1, const void* p2) {
+      const STRING* s1 = static_cast<const STRING*>(p1);
+      const STRING* s2 = static_cast<const STRING*>(p2);
+      return strcmp(s1->c_str(), s2->c_str());
+    }
+
+    TessBaseAPI::TessBaseAPI()
+            : tesseract_(nullptr),
+              osd_tesseract_(nullptr),
+              equ_detect_(nullptr),
+              reader_(nullptr),
+            // Thresholder is initialized to nullptr here, but will be set before use by:
+            // A constructor of a derived API,  SetThresholder(), or
+            // created implicitly when used in InternalSetImage.
+              thresholder_(nullptr),
+              paragraph_models_(nullptr),
+              block_list_(nullptr),
+              page_res_(nullptr),
+              input_file_(nullptr),
+              output_file_(nullptr),
+              datapath_(nullptr),
+              language_(nullptr),
+              last_oem_requested_(OEM_DEFAULT),
+              recognition_done_(false),
+              truth_cb_(nullptr),
+              rect_left_(0),
+              rect_top_(0),
+              rect_width_(0),
+              rect_height_(0),
+              image_width_(0),
+              image_height_(0) {
+      const char *locale;
+      locale = std::setlocale(LC_ALL, nullptr);
+      ASSERT_HOST(!strcmp(locale, "C"));
+      locale = std::setlocale(LC_CTYPE, nullptr);
+      ASSERT_HOST(!strcmp(locale, "C"));
+      locale = std::setlocale(LC_NUMERIC, nullptr);
+      ASSERT_HOST(!strcmp(locale, "C"));
+    }
+
+    TessBaseAPI::~TessBaseAPI() {
+      End();
+    }
 
 /**
  * Returns the version identifier as a static string. Do not delete.
  */
-const char* TessBaseAPI::Version() {
-  return PACKAGE_VERSION;
-}
+    const char* TessBaseAPI::Version() {
+      return PACKAGE_VERSION;
+    }
 
 /**
  * If compiled with OpenCL AND an available OpenCL
@@ -232,13 +232,13 @@ const char* TessBaseAPI::Version() {
  * otherwise *device=nullptr and returns 0.
  */
 #ifdef USE_OPENCL
-#ifdef USE_DEVICE_SELECTION
+    #ifdef USE_DEVICE_SELECTION
 #include "opencl_device_selection.h"
 #endif
 #endif
-size_t TessBaseAPI::getOpenCLDevice(void **data) {
+    size_t TessBaseAPI::getOpenCLDevice(void **data) {
 #ifdef USE_OPENCL
-#ifdef USE_DEVICE_SELECTION
+      #ifdef USE_DEVICE_SELECTION
   ds_device device = OpenclDevice::getDeviceSelection();
   if (device.type == DS_DEVICE_OPENCL_DEVICE) {
     *data = new cl_device_id;
@@ -248,17 +248,17 @@ size_t TessBaseAPI::getOpenCLDevice(void **data) {
 #endif
 #endif
 
-  *data = nullptr;
-  return 0;
-}
+      *data = nullptr;
+      return 0;
+    }
 
 /**
  * Writes the thresholded image to stderr as a PBM file on receipt of a
  * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
  */
-void TessBaseAPI::CatchSignals() {
+    void TessBaseAPI::CatchSignals() {
 #ifdef __linux__
-  struct sigaction action;
+      struct sigaction action;
   memset(&action, 0, sizeof(action));
   action.sa_handler = &signal_exit;
   action.sa_flags = SA_RESETHAND;
@@ -266,81 +266,81 @@ void TessBaseAPI::CatchSignals() {
   sigaction(SIGFPE, &action, nullptr);
   sigaction(SIGBUS, &action, nullptr);
 #else
-  // Warn API users that an implementation is needed.
-  tprintf("CatchSignals has no non-linux implementation!\n");
+      // Warn API users that an implementation is needed.
+      tprintf("CatchSignals has no non-linux implementation!\n");
 #endif
-}
+    }
 
 /**
  * Set the name of the input file. Needed only for training and
  * loading a UNLV zone file.
  */
-void TessBaseAPI::SetInputName(const char* name) {
-  if (input_file_ == nullptr)
-    input_file_ = new STRING(name);
-  else
-    *input_file_ = name;
-}
+    void TessBaseAPI::SetInputName(const char* name) {
+      if (input_file_ == nullptr)
+        input_file_ = new STRING(name);
+      else
+        *input_file_ = name;
+    }
 
 /** Set the name of the output files. Needed only for debugging. */
-void TessBaseAPI::SetOutputName(const char* name) {
-  if (output_file_ == nullptr)
-    output_file_ = new STRING(name);
-  else
-    *output_file_ = name;
-}
-
-bool TessBaseAPI::SetVariable(const char* name, const char* value) {
-  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
-  return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
-                              tesseract_->params());
-}
-
-bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
-  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
-  return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
-                              tesseract_->params());
-}
-
-bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
-  IntParam *p = ParamUtils::FindParam<IntParam>(
-      name, GlobalParams()->int_params, tesseract_->params()->int_params);
-  if (p == nullptr) return false;
-  *value = (int32_t)(*p);
-  return true;
-}
-
-bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
-  BoolParam *p = ParamUtils::FindParam<BoolParam>(
-      name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
-  if (p == nullptr) return false;
-  *value = (BOOL8)(*p);
-  return true;
-}
-
-const char *TessBaseAPI::GetStringVariable(const char *name) const {
-  StringParam *p = ParamUtils::FindParam<StringParam>(
-      name, GlobalParams()->string_params, tesseract_->params()->string_params);
-  return (p != nullptr) ? p->string() : nullptr;
-}
-
-bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
-  DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
-      name, GlobalParams()->double_params, tesseract_->params()->double_params);
-  if (p == nullptr) return false;
-  *value = (double)(*p);
-  return true;
-}
+    void TessBaseAPI::SetOutputName(const char* name) {
+      if (output_file_ == nullptr)
+        output_file_ = new STRING(name);
+      else
+        *output_file_ = name;
+    }
+
+    bool TessBaseAPI::SetVariable(const char* name, const char* value) {
+      if (tesseract_ == nullptr) tesseract_ = new Tesseract;
+      return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
+                                  tesseract_->params());
+    }
+
+    bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
+      if (tesseract_ == nullptr) tesseract_ = new Tesseract;
+      return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
+                                  tesseract_->params());
+    }
+
+    bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
+      IntParam *p = ParamUtils::FindParam<IntParam>(
+              name, GlobalParams()->int_params, tesseract_->params()->int_params);
+      if (p == nullptr) return false;
+      *value = (int32_t)(*p);
+      return true;
+    }
+
+    bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
+      BoolParam *p = ParamUtils::FindParam<BoolParam>(
+              name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
+      if (p == nullptr) return false;
+      *value = (BOOL8)(*p);
+      return true;
+    }
+
+    const char *TessBaseAPI::GetStringVariable(const char *name) const {
+      StringParam *p = ParamUtils::FindParam<StringParam>(
+              name, GlobalParams()->string_params, tesseract_->params()->string_params);
+      return (p != nullptr) ? p->string() : nullptr;
+    }
+
+    bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
+      DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
+              name, GlobalParams()->double_params, tesseract_->params()->double_params);
+      if (p == nullptr) return false;
+      *value = (double)(*p);
+      return true;
+    }
 
 /** Get value of named variable as a string, if it exists. */
-bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
-  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
-}
+    bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
+      return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
+    }
 
 /** Print Tesseract parameters to the given file. */
-void TessBaseAPI::PrintVariables(FILE *fp) const {
-  ParamUtils::PrintParams(fp, tesseract_->params());
-}
+    void TessBaseAPI::PrintVariables(FILE *fp) const {
+      ParamUtils::PrintParams(fp, tesseract_->params());
+    }
 
 /**
  * The datapath must be the name of the data directory (no ending /) or
@@ -350,90 +350,90 @@ void TessBaseAPI::PrintVariables(FILE *fp) const {
  * be returned.
  * @return: 0 on success and -1 on initialization failure.
  */
-int TessBaseAPI::Init(const char* datapath, const char* language,
-                      OcrEngineMode oem, char **configs, int configs_size,
-                      const GenericVector<STRING> *vars_vec,
-                      const GenericVector<STRING> *vars_values,
-                      bool set_only_non_debug_params) {
-  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
-              vars_values, set_only_non_debug_params, nullptr);
-}
+    int TessBaseAPI::Init(const char* datapath, const char* language,
+                          OcrEngineMode oem, char **configs, int configs_size,
+                          const GenericVector<STRING> *vars_vec,
+                          const GenericVector<STRING> *vars_values,
+                          bool set_only_non_debug_params) {
+      return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
+                  vars_values, set_only_non_debug_params, nullptr);
+    }
 
 // In-memory version reads the traineddata file directly from the given
 // data[data_size] array. Also implements the version with a datapath in data,
 // flagged by data_size = 0.
-int TessBaseAPI::Init(const char* data, int data_size, const char* language,
-                      OcrEngineMode oem, char** configs, int configs_size,
-                      const GenericVector<STRING>* vars_vec,
-                      const GenericVector<STRING>* vars_values,
-                      bool set_only_non_debug_params, FileReader reader) {
-  PERF_COUNT_START("TessBaseAPI::Init")
-  // Default language is "eng".
-  if (language == nullptr) language = "eng";
-  STRING datapath = data_size == 0 ? data : language;
-  // If the datapath, OcrEngineMode or the language have changed - start again.
-  // Note that the language_ field stores the last requested language that was
-  // initialized successfully, while tesseract_->lang stores the language
-  // actually used. They differ only if the requested language was nullptr, in
-  // which case tesseract_->lang is set to the Tesseract default ("eng").
-  if (tesseract_ != nullptr &&
-      (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
-       last_oem_requested_ != oem ||
-       (*language_ != language && tesseract_->lang != language))) {
-    delete tesseract_;
-    tesseract_ = nullptr;
-  }
-  // PERF_COUNT_SUB("delete tesseract_")
+    int TessBaseAPI::Init(const char* data, int data_size, const char* language,
+                          OcrEngineMode oem, char** configs, int configs_size,
+                          const GenericVector<STRING>* vars_vec,
+                          const GenericVector<STRING>* vars_values,
+                          bool set_only_non_debug_params, FileReader reader) {
+      PERF_COUNT_START("TessBaseAPI::Init")
+      // Default language is "eng".
+      if (language == nullptr) language = "eng";
+      STRING datapath = data_size == 0 ? data : language;
+      // If the datapath, OcrEngineMode or the language have changed - start again.
+      // Note that the language_ field stores the last requested language that was
+      // initialized successfully, while tesseract_->lang stores the language
+      // actually used. They differ only if the requested language was nullptr, in
+      // which case tesseract_->lang is set to the Tesseract default ("eng").
+      if (tesseract_ != nullptr &&
+          (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
+           last_oem_requested_ != oem ||
+           (*language_ != language && tesseract_->lang != language))) {
+        delete tesseract_;
+        tesseract_ = nullptr;
+      }
+        // PERF_COUNT_SUB("delete tesseract_")
 #ifdef USE_OPENCL
-  OpenclDevice od;
+        OpenclDevice od;
   od.InitEnv();
 #endif
-  PERF_COUNT_SUB("OD::InitEnv()")
-  bool reset_classifier = true;
-  if (tesseract_ == nullptr) {
-    reset_classifier = false;
-    tesseract_ = new Tesseract;
-    if (reader != nullptr) reader_ = reader;
-    TessdataManager mgr(reader_);
-    if (data_size != 0) {
-      mgr.LoadMemBuffer(language, data, data_size);
-    }
-    if (tesseract_->init_tesseract(
-            datapath.string(),
-            output_file_ != nullptr ? output_file_->string() : nullptr,
-            language, oem, configs, configs_size, vars_vec, vars_values,
-            set_only_non_debug_params, &mgr) != 0) {
-      return -1;
-    }
-  }
+      PERF_COUNT_SUB("OD::InitEnv()")
+      bool reset_classifier = true;
+      if (tesseract_ == nullptr) {
+        reset_classifier = false;
+        tesseract_ = new Tesseract;
+        if (reader != nullptr) reader_ = reader;
+        TessdataManager mgr(reader_);
+        if (data_size != 0) {
+          mgr.LoadMemBuffer(language, data, data_size);
+        }
+        if (tesseract_->init_tesseract(
+                datapath.string(),
+                output_file_ != nullptr ? output_file_->string() : nullptr,
+                language, oem, configs, configs_size, vars_vec, vars_values,
+                set_only_non_debug_params, &mgr) != 0) {
+          return -1;
+        }
+      }
 
-  PERF_COUNT_SUB("update tesseract_")
-  // Update datapath and language requested for the last valid initialization.
-  if (datapath_ == nullptr)
-    datapath_ = new STRING(datapath);
-  else
-    *datapath_ = datapath;
-  if ((strcmp(datapath_->string(), "") == 0) &&
-      (strcmp(tesseract_->datadir.string(), "") != 0))
-     *datapath_ = tesseract_->datadir;
-
-  if (language_ == nullptr)
-    language_ = new STRING(language);
-  else
-    *language_ = language;
-  last_oem_requested_ = oem;
+      PERF_COUNT_SUB("update tesseract_")
+      // Update datapath and language requested for the last valid initialization.
+      if (datapath_ == nullptr)
+        datapath_ = new STRING(datapath);
+      else
+        *datapath_ = datapath;
+      if ((strcmp(datapath_->string(), "") == 0) &&
+          (strcmp(tesseract_->datadir.string(), "") != 0))
+        *datapath_ = tesseract_->datadir;
+
+      if (language_ == nullptr)
+        language_ = new STRING(language);
+      else
+        *language_ = language;
+      last_oem_requested_ = oem;
 
 #ifndef DISABLED_LEGACY_ENGINE
-  // PERF_COUNT_SUB("update last_oem_requested_")
-  // For same language and datapath, just reset the adaptive classifier.
-  if (reset_classifier) {
-    tesseract_->ResetAdaptiveClassifier();
-    PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
-  }
+      // PERF_COUNT_SUB("update last_oem_requested_")
+      // For same language and datapath, just reset the adaptive classifier.
+      if (reset_classifier) {
+        tesseract_->ResetAdaptiveClassifier();
+        PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
+      }
 #endif  // ndef DISABLED_LEGACY_ENGINE
-  PERF_COUNT_END
-  return 0;
-}
+      PERF_COUNT_END
+      return 0;
+    }
 
 /**
  * Returns the languages string used in the last valid initialization.
@@ -443,38 +443,38 @@ int TessBaseAPI::Init(const char* data, int data_size, const char* language,
  * loaded use GetLoadedLanguagesAsVector.
  * The returned string should NOT be deleted.
  */
-const char* TessBaseAPI::GetInitLanguagesAsString() const {
-  return (language_ == nullptr || language_->string() == nullptr) ?
-      "" : language_->string();
-}
+    const char* TessBaseAPI::GetInitLanguagesAsString() const {
+      return (language_ == nullptr || language_->string() == nullptr) ?
+             "" : language_->string();
+    }
 
 /**
  * Returns the loaded languages in the vector of STRINGs.
  * Includes all languages loaded by the last Init, including those loaded
  * as dependencies of other loaded languages.
  */
-void TessBaseAPI::GetLoadedLanguagesAsVector(
-    GenericVector<STRING>* langs) const {
-  langs->clear();
-  if (tesseract_ != nullptr) {
-    langs->push_back(tesseract_->lang);
-    int num_subs = tesseract_->num_sub_langs();
-    for (int i = 0; i < num_subs; ++i)
-      langs->push_back(tesseract_->get_sub_lang(i)->lang);
-  }
-}
+    void TessBaseAPI::GetLoadedLanguagesAsVector(
+            GenericVector<STRING>* langs) const {
+      langs->clear();
+      if (tesseract_ != nullptr) {
+        langs->push_back(tesseract_->lang);
+        int num_subs = tesseract_->num_sub_langs();
+        for (int i = 0; i < num_subs; ++i)
+          langs->push_back(tesseract_->get_sub_lang(i)->lang);
+      }
+    }
 
 /**
  * Returns the available languages in the sorted vector of STRINGs.
  */
-void TessBaseAPI::GetAvailableLanguagesAsVector(
-    GenericVector<STRING>* langs) const {
-  langs->clear();
-  if (tesseract_ != nullptr) {
-    addAvailableLanguages(tesseract_->datadir, "", langs);
-    langs->sort(CompareSTRING);
-  }
-}
+    void TessBaseAPI::GetAvailableLanguagesAsVector(
+            GenericVector<STRING>* langs) const {
+      langs->clear();
+      if (tesseract_ != nullptr) {
+        addAvailableLanguages(tesseract_->datadir, "", langs);
+        langs->sort(CompareSTRING);
+      }
+    }
 
 //TODO(amit): Adapt to lstm
 #ifndef DISABLED_LEGACY_ENGINE
@@ -484,61 +484,61 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(
  * WARNING: temporary! This function will be removed from here and placed
  * in a separate API at some future time.
  */
-int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
-  if (tesseract_ == nullptr)
-    tesseract_ = new Tesseract;
-  else
-    ParamUtils::ResetToDefaults(tesseract_->params());
-  TessdataManager mgr;
-  return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
-}
+    int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
+      if (tesseract_ == nullptr)
+        tesseract_ = new Tesseract;
+      else
+        ParamUtils::ResetToDefaults(tesseract_->params());
+      TessdataManager mgr;
+      return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
+    }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /**
  * Init only for page layout analysis. Use only for calls to SetImage and
  * AnalysePage. Calls that attempt recognition will generate an error.
  */
-void TessBaseAPI::InitForAnalysePage() {
-  if (tesseract_ == nullptr) {
-    tesseract_ = new Tesseract;
-    #ifndef DISABLED_LEGACY_ENGINE
-    tesseract_->InitAdaptiveClassifier(nullptr);
-    #endif
-  }
-}
+    void TessBaseAPI::InitForAnalysePage() {
+      if (tesseract_ == nullptr) {
+        tesseract_ = new Tesseract;
+#ifndef DISABLED_LEGACY_ENGINE
+        tesseract_->InitAdaptiveClassifier(nullptr);
+#endif
+      }
+    }
 
 /**
  * Read a "config" file containing a set of parameter name, value pairs.
  * Searches the standard places: tessdata/configs, tessdata/tessconfigs
  * and also accepts a relative or absolute path name.
  */
-void TessBaseAPI::ReadConfigFile(const char* filename) {
-  tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
-}
+    void TessBaseAPI::ReadConfigFile(const char* filename) {
+      tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
+    }
 
 /** Same as above, but only set debug params from the given config file. */
-void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
-  tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
-}
+    void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
+      tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
+    }
 
 /**
  * Set the current page segmentation mode. Defaults to PSM_AUTO.
  * The mode is stored as an IntParam so it can also be modified by
  * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
  */
-void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
-  if (tesseract_ == nullptr)
-    tesseract_ = new Tesseract;
-  tesseract_->tessedit_pageseg_mode.set_value(mode);
-}
+    void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
+      if (tesseract_ == nullptr)
+        tesseract_ = new Tesseract;
+      tesseract_->tessedit_pageseg_mode.set_value(mode);
+    }
 
 /** Return the current page segmentation mode. */
-PageSegMode TessBaseAPI::GetPageSegMode() const {
-  if (tesseract_ == nullptr)
-    return PSM_SINGLE_BLOCK;
-  return static_cast<PageSegMode>(
-    static_cast<int>(tesseract_->tessedit_pageseg_mode));
-}
+    PageSegMode TessBaseAPI::GetPageSegMode() const {
+      if (tesseract_ == nullptr)
+        return PSM_SINGLE_BLOCK;
+      return static_cast<PageSegMode>(
+              static_cast<int>(tesseract_->tessedit_pageseg_mode));
+    }
 
 /**
  * Recognize a rectangle from an image and return the result as a string.
@@ -553,35 +553,35 @@ PageSegMode TessBaseAPI::GetPageSegMode() const {
  * The recognized text is returned as a char* which is coded
  * as UTF8 and must be freed with the delete [] operator.
  */
-char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
-                                 int bytes_per_pixel,
-                                 int bytes_per_line,
-                                 int left, int top,
-                                 int width, int height) {
-  if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
-    return nullptr;  // Nothing worth doing.
-
-  // Since this original api didn't give the exact size of the image,
-  // we have to invent a reasonable value.
-  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
-  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
-           bytes_per_pixel, bytes_per_line);
-  SetRectangle(left, top, width, height);
-
-  return GetUTF8Text();
-}
+    char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
+                                     int bytes_per_pixel,
+                                     int bytes_per_line,
+                                     int left, int top,
+                                     int width, int height) {
+      if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
+        return nullptr;  // Nothing worth doing.
+
+      // Since this original api didn't give the exact size of the image,
+      // we have to invent a reasonable value.
+      int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
+      SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
+               bytes_per_pixel, bytes_per_line);
+      SetRectangle(left, top, width, height);
+
+      return GetUTF8Text();
+    }
 
 #ifndef DISABLED_LEGACY_ENGINE
 /**
  * Call between pages or documents etc to free up memory and forget
  * adaptive data.
  */
-void TessBaseAPI::ClearAdaptiveClassifier() {
-  if (tesseract_ == nullptr)
-    return;
-  tesseract_->ResetAdaptiveClassifier();
-  tesseract_->ResetDocumentDictionary();
-}
+    void TessBaseAPI::ClearAdaptiveClassifier() {
+      if (tesseract_ == nullptr)
+        return;
+      tesseract_->ResetAdaptiveClassifier();
+      tesseract_->ResetDocumentDictionary();
+    }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /**
@@ -591,22 +591,22 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
  * full image, so it may be followed immediately by a GetUTF8Text, and it
  * will automatically perform recognition.
  */
-void TessBaseAPI::SetImage(const unsigned char* imagedata,
-                           int width, int height,
-                           int bytes_per_pixel, int bytes_per_line) {
-  if (InternalSetImage()) {
-    thresholder_->SetImage(imagedata, width, height,
-                           bytes_per_pixel, bytes_per_line);
-    SetInputImage(thresholder_->GetPixRect());
-  }
-}
+    void TessBaseAPI::SetImage(const unsigned char* imagedata,
+                               int width, int height,
+                               int bytes_per_pixel, int bytes_per_line) {
+      if (InternalSetImage()) {
+        thresholder_->SetImage(imagedata, width, height,
+                               bytes_per_pixel, bytes_per_line);
+        SetInputImage(thresholder_->GetPixRect());
+      }
+    }
 
-void TessBaseAPI::SetSourceResolution(int ppi) {
-  if (thresholder_)
-    thresholder_->SetSourceYResolution(ppi);
-  else
-    tprintf("Please call SetImage before SetSourceResolution.\n");
-}
+    void TessBaseAPI::SetSourceResolution(int ppi) {
+      if (thresholder_)
+        thresholder_->SetSourceYResolution(ppi);
+      else
+        tprintf("Please call SetImage before SetSourceResolution.\n");
+    }
 
 /**
  * Provide an image for Tesseract to recognize. As with SetImage above,
@@ -616,53 +616,53 @@ void TessBaseAPI::SetSourceResolution(int ppi) {
  * Use Pix where possible. Tesseract uses Pix as its internal representation
  * and it is therefore more efficient to provide a Pix directly.
  */
-void TessBaseAPI::SetImage(Pix* pix) {
-  if (InternalSetImage()) {
-    if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
-      // remove alpha channel from png
-      PIX* p1 = pixRemoveAlpha(pix);
-      pixSetSpp(p1, 3);
-      pix = pixCopy(nullptr, p1);
-      pixDestroy(&p1);
-    }
-    thresholder_->SetImage(pix);
-    SetInputImage(thresholder_->GetPixRect());
-  }
-}
+    void TessBaseAPI::SetImage(Pix* pix) {
+      if (InternalSetImage()) {
+        if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
+          // remove alpha channel from png
+          PIX* p1 = pixRemoveAlpha(pix);
+          pixSetSpp(p1, 3);
+          pix = pixCopy(nullptr, p1);
+          pixDestroy(&p1);
+        }
+        thresholder_->SetImage(pix);
+        SetInputImage(thresholder_->GetPixRect());
+      }
+    }
 
 /**
  * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
  * Each SetRectangle clears the recogntion results so multiple rectangles
  * can be recognized with the same image.
  */
-void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
-  if (thresholder_ == nullptr)
-    return;
-  thresholder_->SetRectangle(left, top, width, height);
-  ClearResults();
-}
+    void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
+      if (thresholder_ == nullptr)
+        return;
+      thresholder_->SetRectangle(left, top, width, height);
+      ClearResults();
+    }
 
 /**
  * ONLY available after SetImage if you have Leptonica installed.
  * Get a copy of the internal thresholded image from Tesseract.
  */
-Pix* TessBaseAPI::GetThresholdedImage() {
-  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
-  if (tesseract_->pix_binary() == nullptr &&
-      !Threshold(tesseract_->mutable_pix_binary())) {
-    return nullptr;
-  }
-  return pixClone(tesseract_->pix_binary());
-}
+    Pix* TessBaseAPI::GetThresholdedImage() {
+      if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
+      if (tesseract_->pix_binary() == nullptr &&
+          !Threshold(tesseract_->mutable_pix_binary())) {
+        return nullptr;
+      }
+      return pixClone(tesseract_->pix_binary());
+    }
 
 /**
  * Get the result of page layout analysis as a leptonica-style
  * Boxa, Pixa pair, in reading order.
  * Can be called before or after Recognize.
  */
-Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
-  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
-}
+    Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
+      return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
+    }
 
 /**
  * Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order.
@@ -672,11 +672,11 @@ Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
  * If paraids is not nullptr, the paragraph-id of each line within its block is
  * also returned as an array of one element per line. delete [] after use.
  */
-Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
-                                Pixa** pixa, int** blockids, int** paraids) {
-  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
-                            pixa, blockids, paraids);
-}
+    Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
+                                    Pixa** pixa, int** blockids, int** paraids) {
+      return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
+                                pixa, blockids, paraids);
+    }
 
 /**
  * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
@@ -686,18 +686,18 @@ Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
  * If blockids is not nullptr, the block-id of each line is also returned as an
  * array of one element per line. delete [] after use.
  */
-Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
-  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
-}
+    Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
+      return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
+    }
 
 /**
  * Get the words as a leptonica-style
  * Boxa, Pixa pair, in reading order.
  * Can be called before or after Recognize.
  */
-Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
-  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
-}
+    Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
+      return GetComponentImages(RIL_WORD, true, pixa, nullptr);
+    }
 
 /**
  * Gets the individual connected (text) components (created
@@ -705,9 +705,9 @@ Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
  * as a leptonica-style Boxa, Pixa pair, in reading order.
  * Can be called before or after Recognize.
  */
-Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
-  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
-}
+    Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
+      return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
+    }
 
 /**
  * Get the given level kind of components (block, textline, word etc.) as a
@@ -717,94 +717,94 @@ Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
  * as an array of one element per component. delete [] after use.
  * If text_only is true, then only text components are returned.
  */
-Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
-                                      bool text_only, bool raw_image,
-                                      const int raw_padding,
-                                      Pixa** pixa, int** blockids,
-                                      int** paraids) {
-  PageIterator* page_it = GetIterator();
-  if (page_it == nullptr)
-    page_it = AnalyseLayout();
-  if (page_it == nullptr)
-    return nullptr;  // Failed.
-
-  // Count the components to get a size for the arrays.
-  int component_count = 0;
-  int left, top, right, bottom;
-
-  TessResultCallback<bool>* get_bbox = nullptr;
-  if (raw_image) {
-    // Get bounding box in original raw image with padding.
-    get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox,
-                                        level, raw_padding,
-                                        &left, &top, &right, &bottom);
-  } else {
-    // Get bounding box from binarized imaged. Note that this could be
-    // differently scaled from the original image.
-    get_bbox = NewPermanentTessCallback(page_it,
-                                        &PageIterator::BoundingBoxInternal,
-                                        level, &left, &top, &right, &bottom);
-  }
-  do {
-    if (get_bbox->Run() &&
-        (!text_only || PTIsTextType(page_it->BlockType())))
-      ++component_count;
-  } while (page_it->Next(level));
-
-  Boxa* boxa = boxaCreate(component_count);
-  if (pixa != nullptr)
-    *pixa = pixaCreate(component_count);
-  if (blockids != nullptr)
-    *blockids = new int[component_count];
-  if (paraids != nullptr)
-    *paraids = new int[component_count];
-
-  int blockid = 0;
-  int paraid = 0;
-  int component_index = 0;
-  page_it->Begin();
-  do {
-    if (get_bbox->Run() &&
-        (!text_only || PTIsTextType(page_it->BlockType()))) {
-      Box* lbox = boxCreate(left, top, right - left, bottom - top);
-      boxaAddBox(boxa, lbox, L_INSERT);
-      if (pixa != nullptr) {
-        Pix* pix = nullptr;
-        if (raw_image) {
-          pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
-                                  &top);
-        } else {
-          pix = page_it->GetBinaryImage(level);
-        }
-        pixaAddPix(*pixa, pix, L_INSERT);
-        pixaAddBox(*pixa, lbox, L_CLONE);
-      }
-      if (paraids != nullptr) {
-        (*paraids)[component_index] = paraid;
-        if (page_it->IsAtFinalElement(RIL_PARA, level))
-          ++paraid;
-      }
-      if (blockids != nullptr) {
-        (*blockids)[component_index] = blockid;
-        if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
-          ++blockid;
-          paraid = 0;
-        }
+    Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
+                                          bool text_only, bool raw_image,
+                                          const int raw_padding,
+                                          Pixa** pixa, int** blockids,
+                                          int** paraids) {
+      PageIterator* page_it = GetIterator();
+      if (page_it == nullptr)
+        page_it = AnalyseLayout();
+      if (page_it == nullptr)
+        return nullptr;  // Failed.
+
+      // Count the components to get a size for the arrays.
+      int component_count = 0;
+      int left, top, right, bottom;
+
+      TessResultCallback<bool>* get_bbox = nullptr;
+      if (raw_image) {
+        // Get bounding box in original raw image with padding.
+        get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox,
+                                            level, raw_padding,
+                                            &left, &top, &right, &bottom);
+      } else {
+        // Get bounding box from binarized imaged. Note that this could be
+        // differently scaled from the original image.
+        get_bbox = NewPermanentTessCallback(page_it,
+                                            &PageIterator::BoundingBoxInternal,
+                                            level, &left, &top, &right, &bottom);
       }
-      ++component_index;
+      do {
+        if (get_bbox->Run() &&
+            (!text_only || PTIsTextType(page_it->BlockType())))
+          ++component_count;
+      } while (page_it->Next(level));
+
+      Boxa* boxa = boxaCreate(component_count);
+      if (pixa != nullptr)
+        *pixa = pixaCreate(component_count);
+      if (blockids != nullptr)
+        *blockids = new int[component_count];
+      if (paraids != nullptr)
+        *paraids = new int[component_count];
+
+      int blockid = 0;
+      int paraid = 0;
+      int component_index = 0;
+      page_it->Begin();
+      do {
+        if (get_bbox->Run() &&
+            (!text_only || PTIsTextType(page_it->BlockType()))) {
+          Box* lbox = boxCreate(left, top, right - left, bottom - top);
+          boxaAddBox(boxa, lbox, L_INSERT);
+          if (pixa != nullptr) {
+            Pix* pix = nullptr;
+            if (raw_image) {
+              pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
+                                      &top);
+            } else {
+              pix = page_it->GetBinaryImage(level);
+            }
+            pixaAddPix(*pixa, pix, L_INSERT);
+            pixaAddBox(*pixa, lbox, L_CLONE);
+          }
+          if (paraids != nullptr) {
+            (*paraids)[component_index] = paraid;
+            if (page_it->IsAtFinalElement(RIL_PARA, level))
+              ++paraid;
+          }
+          if (blockids != nullptr) {
+            (*blockids)[component_index] = blockid;
+            if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
+              ++blockid;
+              paraid = 0;
+            }
+          }
+          ++component_index;
+        }
+      } while (page_it->Next(level));
+      delete page_it;
+      delete get_bbox;
+      return boxa;
     }
-  } while (page_it->Next(level));
-  delete page_it;
-  delete get_bbox;
-  return boxa;
-}
 
-int TessBaseAPI::GetThresholdedImageScaleFactor() const {
-  if (thresholder_ == nullptr) {
-    return 0;
-  }
-  return thresholder_->GetScaleFactor();
-}
+    int TessBaseAPI::GetThresholdedImageScaleFactor() const {
+      if (thresholder_ == nullptr) {
+        return 0;
+      }
+      return thresholder_->GetScaleFactor();
+    }
 
 /**
  * Runs page layout analysis in the mode set by SetPageSegMode.
@@ -821,282 +821,282 @@ int TessBaseAPI::GetThresholdedImageScaleFactor() const {
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
-
-PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
-  if (FindLines() == 0) {
-    if (block_list_->empty())
-      return nullptr;  // The page was empty.
-    page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
-    DetectParagraphs(false);
-    return new PageIterator(
-        page_res_, tesseract_, thresholder_->GetScaleFactor(),
-        thresholder_->GetScaledYResolution(),
-        rect_left_, rect_top_, rect_width_, rect_height_);
-  }
-  return nullptr;
-}
+    PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
+
+    PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
+      if (FindLines() == 0) {
+        if (block_list_->empty())
+          return nullptr;  // The page was empty.
+        page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
+        DetectParagraphs(false);
+        return new PageIterator(
+                page_res_, tesseract_, thresholder_->GetScaleFactor(),
+                thresholder_->GetScaledYResolution(),
+                rect_left_, rect_top_, rect_width_, rect_height_);
+      }
+      return nullptr;
+    }
 
 /**
  * Recognize the tesseract global image and return the result as Tesseract
  * internal structures.
  */
-int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
-  if (tesseract_ == nullptr)
-    return -1;
-  if (FindLines() != 0)
-    return -1;
-  delete page_res_;
-  if (block_list_->empty()) {
-    page_res_ = new PAGE_RES(false, block_list_,
-                             &tesseract_->prev_word_best_choice_);
-    return 0; // Empty page.
-  }
+    int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
+      if (tesseract_ == nullptr)
+        return -1;
+      if (FindLines() != 0)
+        return -1;
+      delete page_res_;
+      if (block_list_->empty()) {
+        page_res_ = new PAGE_RES(false, block_list_,
+                                 &tesseract_->prev_word_best_choice_);
+        return 0; // Empty page.
+      }
 
-  tesseract_->SetBlackAndWhitelist();
-  recognition_done_ = true;
+      tesseract_->SetBlackAndWhitelist();
+      recognition_done_ = true;
 #ifndef DISABLED_LEGACY_ENGINE
-  if (tesseract_->tessedit_resegment_from_line_boxes) {
-    page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
-  } else if (tesseract_->tessedit_resegment_from_boxes) {
-    page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
-  } else
+      if (tesseract_->tessedit_resegment_from_line_boxes) {
+        page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
+      } else if (tesseract_->tessedit_resegment_from_boxes) {
+        page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
+      } else
 #endif  // ndef DISABLED_LEGACY_ENGINE
-  {
-    page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
-                             block_list_, &tesseract_->prev_word_best_choice_);
-  }
+      {
+        page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
+                                 block_list_, &tesseract_->prev_word_best_choice_);
+      }
 
-  if (page_res_ == nullptr) {
-    return -1;
-  }
+      if (page_res_ == nullptr) {
+        return -1;
+      }
 
-  if (tesseract_->tessedit_train_line_recognizer) {
-    tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
-    tesseract_->CorrectClassifyWords(page_res_);
-    return 0;
-  }
+      if (tesseract_->tessedit_train_line_recognizer) {
+        tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
+        tesseract_->CorrectClassifyWords(page_res_);
+        return 0;
+      }
 #ifndef DISABLED_LEGACY_ENGINE
-  if (tesseract_->tessedit_make_boxes_from_boxes) {
-    tesseract_->CorrectClassifyWords(page_res_);
-    return 0;
-  }
+      if (tesseract_->tessedit_make_boxes_from_boxes) {
+        tesseract_->CorrectClassifyWords(page_res_);
+        return 0;
+      }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-  if (truth_cb_ != nullptr) {
-    tesseract_->wordrec_run_blamer.set_value(true);
-    PageIterator *page_it = new PageIterator(
-            page_res_, tesseract_, thresholder_->GetScaleFactor(),
-            thresholder_->GetScaledYResolution(),
-            rect_left_, rect_top_, rect_width_, rect_height_);
-    truth_cb_->Run(tesseract_->getDict().getUnicharset(),
-                   image_height_, page_it, this->tesseract()->pix_grey());
-    delete page_it;
-  }
+      if (truth_cb_ != nullptr) {
+        tesseract_->wordrec_run_blamer.set_value(true);
+        PageIterator *page_it = new PageIterator(
+                page_res_, tesseract_, thresholder_->GetScaleFactor(),
+                thresholder_->GetScaledYResolution(),
+                rect_left_, rect_top_, rect_width_, rect_height_);
+        truth_cb_->Run(tesseract_->getDict().getUnicharset(),
+                       image_height_, page_it, this->tesseract()->pix_grey());
+        delete page_it;
+      }
 
-  int result = 0;
-  if (tesseract_->interactive_display_mode) {
-    #ifndef GRAPHICS_DISABLED
-    tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
-    #endif  // GRAPHICS_DISABLED
-    // The page_res is invalid after an interactive session, so cleanup
-    // in a way that lets us continue to the next page without crashing.
-    delete page_res_;
-    page_res_ = nullptr;
-    return -1;
-  #ifndef DISABLED_LEGACY_ENGINE
-  } else if (tesseract_->tessedit_train_from_boxes) {
-    STRING fontname;
-    ExtractFontName(*output_file_, &fontname);
-    tesseract_->ApplyBoxTraining(fontname, page_res_);
-  } else if (tesseract_->tessedit_ambigs_training) {
-    FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
-    // OCR the page segmented into words by tesseract.
-    tesseract_->recog_training_segmented(
-        *input_file_, page_res_, monitor, training_output_file);
-    fclose(training_output_file);
-  #endif  // ndef DISABLED_LEGACY_ENGINE
-  } else {
-    // Now run the main recognition.
-    bool wait_for_text = true;
-    GetBoolVariable("paragraph_text_based", &wait_for_text);
-    if (!wait_for_text) DetectParagraphs(false);
-    if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
-      if (wait_for_text) DetectParagraphs(true);
-    } else {
-      result = -1;
+      int result = 0;
+      if (tesseract_->interactive_display_mode) {
+#ifndef GRAPHICS_DISABLED
+        tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
+#endif  // GRAPHICS_DISABLED
+        // The page_res is invalid after an interactive session, so cleanup
+        // in a way that lets us continue to the next page without crashing.
+        delete page_res_;
+        page_res_ = nullptr;
+        return -1;
+#ifndef DISABLED_LEGACY_ENGINE
+      } else if (tesseract_->tessedit_train_from_boxes) {
+        STRING fontname;
+        ExtractFontName(*output_file_, &fontname);
+        tesseract_->ApplyBoxTraining(fontname, page_res_);
+      } else if (tesseract_->tessedit_ambigs_training) {
+        FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
+        // OCR the page segmented into words by tesseract.
+        tesseract_->recog_training_segmented(
+                *input_file_, page_res_, monitor, training_output_file);
+        fclose(training_output_file);
+#endif  // ndef DISABLED_LEGACY_ENGINE
+      } else {
+        // Now run the main recognition.
+        bool wait_for_text = true;
+        GetBoolVariable("paragraph_text_based", &wait_for_text);
+        if (!wait_for_text) DetectParagraphs(false);
+        if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
+          if (wait_for_text) DetectParagraphs(true);
+        } else {
+          result = -1;
+        }
+      }
+      return result;
     }
-  }
-  return result;
-}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /** Tests the chopper by exhaustively running chop_one_blob. */
-int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
-  if (tesseract_ == nullptr)
-    return -1;
-  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
-    tprintf("Please call SetImage before attempting recognition.\n");
-    return -1;
-  }
-  if (page_res_ != nullptr)
-    ClearResults();
-  if (FindLines() != 0)
-    return -1;
-  // Additional conditions under which chopper test cannot be run
-  if (tesseract_->interactive_display_mode) return -1;
-
-  recognition_done_ = true;
-
-  page_res_ = new PAGE_RES(false, block_list_,
-                           &(tesseract_->prev_word_best_choice_));
-
-  PAGE_RES_IT page_res_it(page_res_);
-
-  while (page_res_it.word() != nullptr) {
-    WERD_RES *word_res = page_res_it.word();
-    GenericVector<TBOX> boxes;
-    tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
-                                  page_res_it.row()->row, word_res);
-    page_res_it.forward();
-  }
-  return 0;
-}
+    int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
+      if (tesseract_ == nullptr)
+        return -1;
+      if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
+        tprintf("Please call SetImage before attempting recognition.\n");
+        return -1;
+      }
+      if (page_res_ != nullptr)
+        ClearResults();
+      if (FindLines() != 0)
+        return -1;
+      // Additional conditions under which chopper test cannot be run
+      if (tesseract_->interactive_display_mode) return -1;
+
+      recognition_done_ = true;
+
+      page_res_ = new PAGE_RES(false, block_list_,
+                               &(tesseract_->prev_word_best_choice_));
+
+      PAGE_RES_IT page_res_it(page_res_);
+
+      while (page_res_it.word() != nullptr) {
+        WERD_RES *word_res = page_res_it.word();
+        GenericVector<TBOX> boxes;
+        tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
+                                      page_res_it.row()->row, word_res);
+        page_res_it.forward();
+      }
+      return 0;
+    }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 // Takes ownership of the input pix.
-void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
+    void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
 
-Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
+    Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
 
-const char * TessBaseAPI::GetInputName() {
-  if (input_file_)
-    return input_file_->c_str();
-  return nullptr;
-}
+    const char * TessBaseAPI::GetInputName() {
+      if (input_file_)
+        return input_file_->c_str();
+      return nullptr;
+    }
 
-const char *  TessBaseAPI::GetDatapath() {
-  return tesseract_->datadir.c_str();
-}
+    const char *  TessBaseAPI::GetDatapath() {
+      return tesseract_->datadir.c_str();
+    }
 
-int TessBaseAPI::GetSourceYResolution() {
-  return thresholder_->GetSourceYResolution();
-}
+    int TessBaseAPI::GetSourceYResolution() {
+      return thresholder_->GetSourceYResolution();
+    }
 
 // If flist exists, get data from there. Otherwise get data from buf.
 // Seems convoluted, but is the easiest way I know of to meet multiple
 // goals. Support streaming from stdin, and also work on platforms
 // lacking fmemopen.
-bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
-                                       STRING *buf,
-                                       const char* retry_config,
-                                       int timeout_millisec,
-                                       TessResultRenderer* renderer,
-                                       int tessedit_page_number) {
-  if (!flist && !buf) return false;
-  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
-  char pagename[MAX_PATH];
-
-  GenericVector<STRING> lines;
-  if (!flist) {
-    buf->split('\n', &lines);
-    if (lines.empty()) return false;
-  }
+    bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
+                                           STRING *buf,
+                                           const char* retry_config,
+                                           int timeout_millisec,
+                                           TessResultRenderer* renderer,
+                                           int tessedit_page_number) {
+      if (!flist && !buf) return false;
+      int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+      char pagename[MAX_PATH];
+
+      GenericVector<STRING> lines;
+      if (!flist) {
+        buf->split('\n', &lines);
+        if (lines.empty()) return false;
+      }
 
-  // Skip to the requested page number.
-  for (int i = 0; i < page; i++) {
-    if (flist) {
-      if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
-    }
-  }
+      // Skip to the requested page number.
+      for (int i = 0; i < page; i++) {
+        if (flist) {
+          if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+        }
+      }
 
-  // Begin producing output
-  if (renderer && !renderer->BeginDocument(unknown_title_)) {
-    return false;
-  }
+      // Begin producing output
+      if (renderer && !renderer->BeginDocument(unknown_title_)) {
+        return false;
+      }
 
-  // Loop over all pages - or just the requested one
-  while (true) {
-    if (flist) {
-      if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
-    } else {
-      if (page >= lines.size()) break;
-      snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
-    }
-    chomp_string(pagename);
-    Pix *pix = pixRead(pagename);
-    if (pix == nullptr) {
-      tprintf("Image file %s cannot be read!\n", pagename);
-      return false;
+      // Loop over all pages - or just the requested one
+      while (true) {
+        if (flist) {
+          if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+        } else {
+          if (page >= lines.size()) break;
+          snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
+        }
+        chomp_string(pagename);
+        Pix *pix = pixRead(pagename);
+        if (pix == nullptr) {
+          tprintf("Image file %s cannot be read!\n", pagename);
+          return false;
+        }
+        tprintf("Page %d : %s\n", page, pagename);
+        bool r = ProcessPage(pix, page, pagename, retry_config,
+                             timeout_millisec, renderer);
+        pixDestroy(&pix);
+        if (!r) return false;
+        if (tessedit_page_number >= 0) break;
+        ++page;
+      }
+
+      // Finish producing output
+      if (renderer && !renderer->EndDocument()) {
+        return false;
+      }
+      return true;
     }
-    tprintf("Page %d : %s\n", page, pagename);
-    bool r = ProcessPage(pix, page, pagename, retry_config,
-                         timeout_millisec, renderer);
-    pixDestroy(&pix);
-    if (!r) return false;
-    if (tessedit_page_number >= 0) break;
-    ++page;
-  }
 
-  // Finish producing output
-  if (renderer && !renderer->EndDocument()) {
-    return false;
-  }
-  return true;
-}
-
-bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
-                                            size_t size,
-                                            const char* filename,
-                                            const char* retry_config,
-                                            int timeout_millisec,
-                                            TessResultRenderer* renderer,
-                                            int tessedit_page_number) {
+    bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
+                                                size_t size,
+                                                const char* filename,
+                                                const char* retry_config,
+                                                int timeout_millisec,
+                                                TessResultRenderer* renderer,
+                                                int tessedit_page_number) {
 #ifndef ANDROID_BUILD
-  Pix *pix = nullptr;
-  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
-  size_t offset = 0;
-  for (; ; ++page) {
-    if (tessedit_page_number >= 0)
-      page = tessedit_page_number;
-    pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
-                 : pixReadFromMultipageTiff(filename, &offset);
-    if (pix == nullptr) break;
-    tprintf("Page %d\n", page + 1);
-    char page_str[kMaxIntSize];
-    snprintf(page_str, kMaxIntSize - 1, "%d", page);
-    SetVariable("applybox_page", page_str);
-    bool r = ProcessPage(pix, page, filename, retry_config,
-                           timeout_millisec, renderer);
-    pixDestroy(&pix);
-    if (!r) return false;
-    if (tessedit_page_number >= 0) break;
-    if (!offset) break;
-  }
-  return true;
+      Pix *pix = nullptr;
+      int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+      size_t offset = 0;
+      for (; ; ++page) {
+        if (tessedit_page_number >= 0)
+          page = tessedit_page_number;
+        pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
+                     : pixReadFromMultipageTiff(filename, &offset);
+        if (pix == nullptr) break;
+        tprintf("Page %d\n", page + 1);
+        char page_str[kMaxIntSize];
+        snprintf(page_str, kMaxIntSize - 1, "%d", page);
+        SetVariable("applybox_page", page_str);
+        bool r = ProcessPage(pix, page, filename, retry_config,
+                             timeout_millisec, renderer);
+        pixDestroy(&pix);
+        if (!r) return false;
+        if (tessedit_page_number >= 0) break;
+        if (!offset) break;
+      }
+      return true;
 #else
-  return false;
+      return false;
 #endif
-}
+    }
 
 // Master ProcessPages calls ProcessPagesInternal and then does any post-
 // processing required due to being in a training mode.
-bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
-                               int timeout_millisec,
-                               TessResultRenderer* renderer) {
-  bool result =
-      ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
-  #ifndef DISABLED_LEGACY_ENGINE
-  if (result) {
-    if (tesseract_->tessedit_train_from_boxes &&
-        !tesseract_->WriteTRFile(*output_file_)) {
-      tprintf("Write of TR file failed: %s\n", output_file_->string());
-      return false;
+    bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
+                                   int timeout_millisec,
+                                   TessResultRenderer* renderer) {
+      bool result =
+              ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
+#ifndef DISABLED_LEGACY_ENGINE
+      if (result) {
+        if (tesseract_->tessedit_train_from_boxes &&
+            !tesseract_->WriteTRFile(*output_file_)) {
+          tprintf("Write of TR file failed: %s\n", output_file_->string());
+          return false;
+        }
+      }
+#endif  // ndef DISABLED_LEGACY_ENGINE
+      return result;
     }
-  }
-  #endif  // ndef DISABLED_LEGACY_ENGINE
-  return result;
-}
 
 // In the ideal scenario, Tesseract will start working on data as soon
 // as it can. For example, if you stream a filelist through stdin, we
@@ -1109,184 +1109,184 @@ bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
 // impractical.  So we support a command line flag to explicitly
 // identify the scenario that really matters: filelists on
 // stdin. We'll still do our best if the user likes pipes.
-bool TessBaseAPI::ProcessPagesInternal(const char* filename,
-                                       const char* retry_config,
-                                       int timeout_millisec,
-                                       TessResultRenderer* renderer) {
-  PERF_COUNT_START("ProcessPages")
-  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
-  if (stdInput) {
+    bool TessBaseAPI::ProcessPagesInternal(const char* filename,
+                                           const char* retry_config,
+                                           int timeout_millisec,
+                                           TessResultRenderer* renderer) {
+      PERF_COUNT_START("ProcessPages")
+      bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
+      if (stdInput) {
 #ifdef WIN32
-    if (_setmode(_fileno(stdin), _O_BINARY) == -1)
+        if (_setmode(_fileno(stdin), _O_BINARY) == -1)
       tprintf("ERROR: cin to binary: %s", strerror(errno));
 #endif  // WIN32
-  }
+      }
 
-  if (stream_filelist) {
-    return ProcessPagesFileList(stdin, nullptr, retry_config,
-                                timeout_millisec, renderer,
-                                tesseract_->tessedit_page_number);
-  }
+      if (stream_filelist) {
+        return ProcessPagesFileList(stdin, nullptr, retry_config,
+                                    timeout_millisec, renderer,
+                                    tesseract_->tessedit_page_number);
+      }
 
-  // At this point we are officially in autodection territory.
-  // That means any data in stdin must be buffered, to make it
-  // seekable.
-  std::string buf;
-  const l_uint8 *data = nullptr;
-  if (stdInput) {
-    buf.assign((std::istreambuf_iterator<char>(std::cin)),
-               (std::istreambuf_iterator<char>()));
-    data = reinterpret_cast<const l_uint8 *>(buf.data());
-  } else {
-    // Check whether the input file can be read.
-    if (FILE* file = fopen(filename, "rb")) {
-      fclose(file);
-    } else {
-      fprintf(stderr, "Error, cannot read input file %s: %s\n",
-              filename, strerror(errno));
-      return false;
-    }
-  }
+      // At this point we are officially in autodection territory.
+      // That means any data in stdin must be buffered, to make it
+      // seekable.
+      std::string buf;
+      const l_uint8 *data = nullptr;
+      if (stdInput) {
+        buf.assign((std::istreambuf_iterator<char>(std::cin)),
+                   (std::istreambuf_iterator<char>()));
+        data = reinterpret_cast<const l_uint8 *>(buf.data());
+      } else {
+        // Check whether the input file can be read.
+        if (FILE* file = fopen(filename, "rb")) {
+          fclose(file);
+        } else {
+          fprintf(stderr, "Error, cannot read input file %s: %s\n",
+                  filename, strerror(errno));
+          return false;
+        }
+      }
 
-  // Here is our autodetection
-  int format;
-  int r = (stdInput) ?
-      findFileFormatBuffer(data, &format) :
-      findFileFormat(filename, &format);
-
-  // Maybe we have a filelist
-  if (r != 0 || format == IFF_UNKNOWN) {
-    STRING s;
-    if (stdInput) {
-      s = buf.c_str();
-    } else {
-      std::ifstream t(filename);
-      std::string u((std::istreambuf_iterator<char>(t)),
-                    std::istreambuf_iterator<char>());
-      s = u.c_str();
-    }
-    return ProcessPagesFileList(nullptr, &s, retry_config,
-                                timeout_millisec, renderer,
-                                tesseract_->tessedit_page_number);
-  }
+      // Here is our autodetection
+      int format;
+      int r = (stdInput) ?
+              findFileFormatBuffer(data, &format) :
+              findFileFormat(filename, &format);
+
+      // Maybe we have a filelist
+      if (r != 0 || format == IFF_UNKNOWN) {
+        STRING s;
+        if (stdInput) {
+          s = buf.c_str();
+        } else {
+          std::ifstream t(filename);
+          std::string u((std::istreambuf_iterator<char>(t)),
+                        std::istreambuf_iterator<char>());
+          s = u.c_str();
+        }
+        return ProcessPagesFileList(nullptr, &s, retry_config,
+                                    timeout_millisec, renderer,
+                                    tesseract_->tessedit_page_number);
+      }
 
-  // Maybe we have a TIFF which is potentially multipage
-  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
-               format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
-               format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
-               format == IFF_TIFF_ZIP);
-
-  // Fail early if we can, before producing any output
-  Pix *pix = nullptr;
-  if (!tiff) {
-    pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
-    if (pix == nullptr) {
-      return false;
-    }
-  }
+      // Maybe we have a TIFF which is potentially multipage
+      bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
+                   format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
+                   format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
+                   format == IFF_TIFF_ZIP);
+
+      // Fail early if we can, before producing any output
+      Pix *pix = nullptr;
+      if (!tiff) {
+        pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
+        if (pix == nullptr) {
+          return false;
+        }
+      }
 
-  // Begin the output
-  if (renderer && !renderer->BeginDocument(unknown_title_)) {
-    pixDestroy(&pix);
-    return false;
-  }
+      // Begin the output
+      if (renderer && !renderer->BeginDocument(unknown_title_)) {
+        pixDestroy(&pix);
+        return false;
+      }
 
-  // Produce output
-  r = (tiff) ?
-      ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
-                                timeout_millisec, renderer,
-                                tesseract_->tessedit_page_number) :
-      ProcessPage(pix, 0, filename, retry_config,
-                  timeout_millisec, renderer);
+      // Produce output
+      r = (tiff) ?
+          ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
+                                    timeout_millisec, renderer,
+                                    tesseract_->tessedit_page_number) :
+          ProcessPage(pix, 0, filename, retry_config,
+                      timeout_millisec, renderer);
 
-  // Clean up memory as needed
-  pixDestroy(&pix);
+      // Clean up memory as needed
+      pixDestroy(&pix);
 
-  // End the output
-  if (!r || (renderer && !renderer->EndDocument())) {
-    return false;
-  }
-  PERF_COUNT_END
-  return true;
-}
-
-bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
-                              const char* retry_config, int timeout_millisec,
-                              TessResultRenderer* renderer) {
-  PERF_COUNT_START("ProcessPage")
-  SetInputName(filename);
-  SetImage(pix);
-  bool failed = false;
-
-  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
-    // Disabled character recognition
-    PageIterator* it = AnalyseLayout();
-
-    if (it == nullptr) {
-      failed = true;
-    } else {
-      delete it;
+      // End the output
+      if (!r || (renderer && !renderer->EndDocument())) {
+        return false;
+      }
+      PERF_COUNT_END
+      return true;
     }
-  } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
-    failed = FindLines() != 0;
-  } else if (timeout_millisec > 0) {
-    // Running with a timeout.
-    ETEXT_DESC monitor;
-    monitor.cancel = nullptr;
-    monitor.cancel_this = nullptr;
-    monitor.set_deadline_msecs(timeout_millisec);
-
-    // Now run the main recognition.
-    failed = Recognize(&monitor) < 0;
-  } else {
-    // Normal layout and character recognition with no timeout.
-    failed = Recognize(nullptr) < 0;
-  }
 
-  if (tesseract_->tessedit_write_images) {
+    bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
+                                  const char* retry_config, int timeout_millisec,
+                                  TessResultRenderer* renderer) {
+      PERF_COUNT_START("ProcessPage")
+      SetInputName(filename);
+      SetImage(pix);
+      bool failed = false;
+
+      if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
+        // Disabled character recognition
+        PageIterator* it = AnalyseLayout();
+
+        if (it == nullptr) {
+          failed = true;
+        } else {
+          delete it;
+        }
+      } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
+        failed = FindLines() != 0;
+      } else if (timeout_millisec > 0) {
+        // Running with a timeout.
+        ETEXT_DESC monitor;
+        monitor.cancel = nullptr;
+        monitor.cancel_this = nullptr;
+        monitor.set_deadline_msecs(timeout_millisec);
+
+        // Now run the main recognition.
+        failed = Recognize(&monitor) < 0;
+      } else {
+        // Normal layout and character recognition with no timeout.
+        failed = Recognize(nullptr) < 0;
+      }
+
+      if (tesseract_->tessedit_write_images) {
 #ifndef ANDROID_BUILD
-    Pix* page_pix = GetThresholdedImage();
-    pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
+        Pix* page_pix = GetThresholdedImage();
+        pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
 #endif  // ANDROID_BUILD
-  }
+      }
 
-  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
-    // Save current config variables before switching modes.
-    FILE* fp = fopen(kOldVarsFile, "wb");
-    if (fp == nullptr) {
-      tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
-    } else {
-      PrintVariables(fp);
-      fclose(fp);
-    }
-    // Switch to alternate mode for retry.
-    ReadConfigFile(retry_config);
-    SetImage(pix);
-    Recognize(nullptr);
-    // Restore saved config variables.
-    ReadConfigFile(kOldVarsFile);
-  }
+      if (failed && retry_config != nullptr && retry_config[0] != '\0') {
+        // Save current config variables before switching modes.
+        FILE* fp = fopen(kOldVarsFile, "wb");
+        if (fp == nullptr) {
+          tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
+        } else {
+          PrintVariables(fp);
+          fclose(fp);
+        }
+        // Switch to alternate mode for retry.
+        ReadConfigFile(retry_config);
+        SetImage(pix);
+        Recognize(nullptr);
+        // Restore saved config variables.
+        ReadConfigFile(kOldVarsFile);
+      }
 
-  if (renderer && !failed) {
-    failed = !renderer->AddImage(this);
-  }
+      if (renderer && !failed) {
+        failed = !renderer->AddImage(this);
+      }
 
-  PERF_COUNT_END
-  return !failed;
-}
+      PERF_COUNT_END
+      return !failed;
+    }
 
 /**
  * Get a left-to-right iterator to the results of LayoutAnalysis and/or
  * Recognize. The returned iterator must be deleted after use.
  */
-LTRResultIterator* TessBaseAPI::GetLTRIterator() {
-  if (tesseract_ == nullptr || page_res_ == nullptr)
-    return nullptr;
-  return new LTRResultIterator(
-      page_res_, tesseract_,
-      thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
-      rect_left_, rect_top_, rect_width_, rect_height_);
-}
+    LTRResultIterator* TessBaseAPI::GetLTRIterator() {
+      if (tesseract_ == nullptr || page_res_ == nullptr)
+        return nullptr;
+      return new LTRResultIterator(
+              page_res_, tesseract_,
+              thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
+              rect_left_, rect_top_, rect_width_, rect_height_);
+    }
 
 /**
  * Get a reading-order iterator to the results of LayoutAnalysis and/or
@@ -1296,14 +1296,14 @@ LTRResultIterator* TessBaseAPI::GetLTRIterator() {
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-ResultIterator* TessBaseAPI::GetIterator() {
-  if (tesseract_ == nullptr || page_res_ == nullptr)
-    return nullptr;
-  return ResultIterator::StartOfParagraph(LTRResultIterator(
-      page_res_, tesseract_,
-      thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
-      rect_left_, rect_top_, rect_width_, rect_height_));
-}
+    ResultIterator* TessBaseAPI::GetIterator() {
+      if (tesseract_ == nullptr || page_res_ == nullptr)
+        return nullptr;
+      return ResultIterator::StartOfParagraph(LTRResultIterator(
+              page_res_, tesseract_,
+              thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
+              rect_left_, rect_top_, rect_width_, rect_height_));
+    }
 
 /**
  * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
@@ -1313,45 +1313,45 @@ ResultIterator* TessBaseAPI::GetIterator() {
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-MutableIterator* TessBaseAPI::GetMutableIterator() {
-  if (tesseract_ == nullptr || page_res_ == nullptr)
-    return nullptr;
-  return new MutableIterator(page_res_, tesseract_,
-                             thresholder_->GetScaleFactor(),
-                             thresholder_->GetScaledYResolution(),
-                             rect_left_, rect_top_, rect_width_, rect_height_);
-}
+    MutableIterator* TessBaseAPI::GetMutableIterator() {
+      if (tesseract_ == nullptr || page_res_ == nullptr)
+        return nullptr;
+      return new MutableIterator(page_res_, tesseract_,
+                                 thresholder_->GetScaleFactor(),
+                                 thresholder_->GetScaledYResolution(),
+                                 rect_left_, rect_top_, rect_width_, rect_height_);
+    }
 
 /** Make a text string from the internal data structures. */
-char* TessBaseAPI::GetUTF8Text() {
-  if (tesseract_ == nullptr ||
-      (!recognition_done_ && Recognize(nullptr) < 0))
-    return nullptr;
-  STRING text("");
-  ResultIterator *it = GetIterator();
-  do {
-    if (it->Empty(RIL_PARA)) continue;
-    const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
-    text += para_text.get();
-  } while (it->Next(RIL_PARA));
-  char* result = new char[text.length() + 1];
-  strncpy(result, text.string(), text.length() + 1);
-  delete it;
-  return result;
-}
+    char* TessBaseAPI::GetUTF8Text() {
+      if (tesseract_ == nullptr ||
+          (!recognition_done_ && Recognize(nullptr) < 0))
+        return nullptr;
+      STRING text("");
+      ResultIterator *it = GetIterator();
+      do {
+        if (it->Empty(RIL_PARA)) continue;
+        const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
+        text += para_text.get();
+      } while (it->Next(RIL_PARA));
+      char* result = new char[text.length() + 1];
+      strncpy(result, text.string(), text.length() + 1);
+      delete it;
+      return result;
+    }
 
 /**
  * Gets the block orientation at the current iterator position.
  */
-static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
-  tesseract::Orientation orientation;
-  tesseract::WritingDirection writing_direction;
-  tesseract::TextlineOrder textline_order;
-  float deskew_angle;
-  it->Orientation(&orientation, &writing_direction, &textline_order,
-                  &deskew_angle);
-  return orientation;
-}
+    static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
+      tesseract::Orientation orientation;
+      tesseract::WritingDirection writing_direction;
+      tesseract::TextlineOrder textline_order;
+      float deskew_angle;
+      it->Orientation(&orientation, &writing_direction, &textline_order,
+                      &deskew_angle);
+      return orientation;
+    }
 
 /**
  * Fits a line to the baseline at the given level, and appends its coefficients
@@ -1361,104 +1361,149 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
  * method currently only inserts a 'textangle' property to indicate the rotation
  * direction and does not add any baseline information to the hocr string.
  */
-static void AddBaselineCoordsTohOCR(const PageIterator *it,
-                                    PageIteratorLevel level,
-                                    STRING* hocr_str) {
-  tesseract::Orientation orientation = GetBlockTextOrientation(it);
-  if (orientation != ORIENTATION_PAGE_UP) {
-    hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
-    return;
-  }
+    static void AddBaselineCoordsTohOCR(const PageIterator *it,
+                                        PageIteratorLevel level,
+                                        STRING* hocr_str) {
+      tesseract::Orientation orientation = GetBlockTextOrientation(it);
+      if (orientation != ORIENTATION_PAGE_UP) {
+        hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
+        return;
+      }
 
-  int left, top, right, bottom;
-  it->BoundingBox(level, &left, &top, &right, &bottom);
-
-  // Try to get the baseline coordinates at this level.
-  int x1, y1, x2, y2;
-  if (!it->Baseline(level, &x1, &y1, &x2, &y2))
-    return;
-  // Following the description of this field of the hOCR spec, we convert the
-  // baseline coordinates so that "the bottom left of the bounding box is the
-  // origin".
-  x1 -= left;
-  x2 -= left;
-  y1 -= bottom;
-  y2 -= bottom;
-
-  // Now fit a line through the points so we can extract coefficients for the
-  // equation:  y = p1 x + p0
-  double p1 = 0;
-  double p0 = 0;
-  if (x1 == x2) {
-    // Problem computing the polynomial coefficients.
-    return;
-  }
-  p1 = (y2 - y1) / static_cast<double>(x2 - x1);
-  p0 = y1 - static_cast<double>(p1 * x1);
-
-  hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
-  hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
-}
-
-static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
-                        int num2) {
-  const size_t BUFSIZE = 64;
-  char id_buffer[BUFSIZE];
-  if (num2 >= 0) {
-    snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
-  } else {
-    snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
-  }
-  id_buffer[BUFSIZE - 1] = '\0';
-  *hocr_str += " id='";
-  *hocr_str += id_buffer;
-  *hocr_str += "'";
-}
-
-static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
-  int num2, int num3) {
-  const size_t BUFSIZE = 64;
-  char id_buffer[BUFSIZE];
-  snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
-  id_buffer[BUFSIZE - 1] = '\0';
-  *hocr_str += " id='";
-  *hocr_str += id_buffer;
-  *hocr_str += "'";
-}
-
-static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
-                         STRING* hocr_str) {
-  int left, top, right, bottom;
-  it->BoundingBox(level, &left, &top, &right, &bottom);
-  // This is the only place we use double quotes instead of single quotes,
-  // but it may too late to change for consistency
-  hocr_str->add_str_int(" title=\"bbox ", left);
-  hocr_str->add_str_int(" ", top);
-  hocr_str->add_str_int(" ", right);
-  hocr_str->add_str_int(" ", bottom);
-  // Add baseline coordinates & heights for textlines only.
-  if (level == RIL_TEXTLINE) {
-    AddBaselineCoordsTohOCR(it, level, hocr_str);
-    // add custom height measures
-    float row_height, descenders, ascenders;  // row attributes
-    it->RowAttributes(&row_height, &descenders, &ascenders);
-    // TODO(rays): Do we want to limit these to a single decimal place?
-    hocr_str->add_str_double("; x_size ", row_height);
-    hocr_str->add_str_double("; x_descenders ", descenders * -1);
-    hocr_str->add_str_double("; x_ascenders ", ascenders);
-  }
-  *hocr_str += "\">";
-}
-
-static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
-                        STRING* hocr_str) {
-  int left, top, right, bottom;
-  it->BoundingBox(level, &left, &top, &right, &bottom);
-  hocr_str->add_str_int("\t", left);
-  hocr_str->add_str_int("\t", top);
-  hocr_str->add_str_int("\t", right - left);
-  hocr_str->add_str_int("\t", bottom - top);
-}
+      int left, top, right, bottom;
+      it->BoundingBox(level, &left, &top, &right, &bottom);
+
+      // Try to get the baseline coordinates at this level.
+      int x1, y1, x2, y2;
+      if (!it->Baseline(level, &x1, &y1, &x2, &y2))
+        return;
+      // Following the description of this field of the hOCR spec, we convert the
+      // baseline coordinates so that "the bottom left of the bounding box is the
+      // origin".
+      x1 -= left;
+      x2 -= left;
+      y1 -= bottom;
+      y2 -= bottom;
+
+      // Now fit a line through the points so we can extract coefficients for the
+      // equation:  y = p1 x + p0
+      double p1 = 0;
+      double p0 = 0;
+      if (x1 == x2) {
+        // Problem computing the polynomial coefficients.
+        return;
+      }
+      p1 = (y2 - y1) / static_cast<double>(x2 - x1);
+      p0 = y1 - static_cast<double>(p1 * x1);
+
+      hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
+      hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
+    }
+
+    static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
+                            int num2) {
+      const size_t BUFSIZE = 64;
+      char id_buffer[BUFSIZE];
+      if (num2 >= 0) {
+        snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
+      } else {
+        snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
+      }
+      id_buffer[BUFSIZE - 1] = '\0';
+      *hocr_str += " id='";
+      *hocr_str += id_buffer;
+      *hocr_str += "'";
+    }
+
+    static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
+                            int num2, int num3) {
+      const size_t BUFSIZE = 64;
+      char id_buffer[BUFSIZE];
+      snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
+      id_buffer[BUFSIZE - 1] = '\0';
+      *hocr_str += " id='";
+      *hocr_str += id_buffer;
+      *hocr_str += "'";
+    }
+
+    static void AddIdToAlto(STRING* alto_str, const std::string base, int num1) {
+      const size_t BUFSIZE = 64;
+      char id_buffer[BUFSIZE];
+      snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
+      id_buffer[BUFSIZE - 1] = '\0';
+      *alto_str += " ID=\"";
+      *alto_str += id_buffer;
+      *alto_str += "\"";
+    }
+
+    static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
+                             STRING* hocr_str) {
+      int left, top, right, bottom;
+      it->BoundingBox(level, &left, &top, &right, &bottom);
+      // This is the only place we use double quotes instead of single quotes,
+      // but it may too late to change for consistency
+      hocr_str->add_str_int(" title=\"bbox ", left);
+      hocr_str->add_str_int(" ", top);
+      hocr_str->add_str_int(" ", right);
+      hocr_str->add_str_int(" ", bottom);
+      // Add baseline coordinates & heights for textlines only.
+      if (level == RIL_TEXTLINE) {
+        AddBaselineCoordsTohOCR(it, level, hocr_str);
+        // add custom height measures
+        float row_height, descenders, ascenders;  // row attributes
+        it->RowAttributes(&row_height, &descenders, &ascenders);
+        // TODO(rays): Do we want to limit these to a single decimal place?
+        hocr_str->add_str_double("; x_size ", row_height);
+        hocr_str->add_str_double("; x_descenders ", descenders * -1);
+        hocr_str->add_str_double("; x_ascenders ", ascenders);
+      }
+      *hocr_str += "\">";
+    }
+
+    static void AddBoxToAlto(const ResultIterator* it, PageIteratorLevel level,
+                             STRING* alto_str) {
+      int left, top, right, bottom;
+      it->BoundingBox(level, &left, &top, &right, &bottom);
+
+      int hpos = left;
+      int vpos = top;
+      int height = bottom - top;
+      int width = right - left;
+
+      *alto_str += " HPOS=\"";
+      alto_str->add_str_int("", hpos);
+      *alto_str += "\"";
+      *alto_str += " VPOS=\"";
+      alto_str->add_str_int("", vpos);
+      *alto_str += "\"";
+      *alto_str += " WIDTH=\"";
+      alto_str->add_str_int("", width);
+      *alto_str += "\"";
+      *alto_str += " HEIGHT=\"";
+      alto_str->add_str_int("", height);
+      *alto_str += "\"";
+
+      if (level == RIL_WORD) {
+        int wc = it->Confidence(RIL_WORD);
+        *alto_str += " WC=\"0.";
+        alto_str->add_str_int("", wc);
+        *alto_str += "\"";
+      }
+      if (level != RIL_WORD) {
+
+        *alto_str += ">";
+      }
+    }
+
+    static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
+                            STRING* hocr_str) {
+      int left, top, right, bottom;
+      it->BoundingBox(level, &left, &top, &right, &bottom);
+      hocr_str->add_str_int("\t", left);
+      hocr_str->add_str_int("\t", top);
+      hocr_str->add_str_int("\t", right - left);
+      hocr_str->add_str_int("\t", bottom - top);
+    }
 
 /**
  * Make a HTML-formatted string with hOCR markup from the internal
@@ -1469,9 +1514,17 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
  * STL removed from original patch submission and refactored by rays.
  * Returned string must be freed with the delete [] operator.
  */
-char* TessBaseAPI::GetHOCRText(int page_number) {
-  return GetHOCRText(nullptr, page_number);
-}
+    char* TessBaseAPI::GetHOCRText(int page_number) {
+      return GetHOCRText(nullptr, page_number);
+    }
+
+/**
+ * Make an XML-formatted string with ALTO markup from the internal
+ * data structures.
+ */
+    char* TessBaseAPI::GetAltoText(int page_number) {
+      return GetAltoText(nullptr, page_number);
+    }
 
 /**
  * Make a HTML-formatted string with hOCR markup from the internal
@@ -1482,24 +1535,24 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
  * STL removed from original patch submission and refactored by rays.
  * Returned string must be freed with the delete [] operator.
  */
-char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
-  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
-    return nullptr;
+    char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
+      if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
+        return nullptr;
 
-  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
-  int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
-  bool para_is_ltr = true;        // Default direction is LTR
-  const char* paragraph_lang = nullptr;
-  bool font_info = false;
-  GetBoolVariable("hocr_font_info", &font_info);
+      int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
+      int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
+      bool para_is_ltr = true;        // Default direction is LTR
+      const char* paragraph_lang = nullptr;
+      bool font_info = false;
+      GetBoolVariable("hocr_font_info", &font_info);
 
-  STRING hocr_str("");
+      STRING hocr_str("");
 
-  if (input_file_ == nullptr)
-      SetInputName(nullptr);
+      if (input_file_ == nullptr)
+        SetInputName(nullptr);
 
 #ifdef _WIN32
-  // convert input name from ANSI encoding to utf-8
+      // convert input name from ANSI encoding to utf-8
   int str16_len =
       MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
   wchar_t *uni16_str = new WCHAR[str16_len];
@@ -1515,325 +1568,439 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
   delete[] utf8_str;
 #endif
 
-  hocr_str += "  <div class='ocr_page'";
-  AddIdTohOCR(&hocr_str, "page", page_id, -1);
-  hocr_str += " title='image \"";
-  if (input_file_) {
-    hocr_str += HOcrEscape(input_file_->string());
-  } else {
-    hocr_str += "unknown";
-  }
-  hocr_str.add_str_int("\"; bbox ", rect_left_);
-  hocr_str.add_str_int(" ", rect_top_);
-  hocr_str.add_str_int(" ", rect_width_);
-  hocr_str.add_str_int(" ", rect_height_);
-  hocr_str.add_str_int("; ppageno ", page_number);
-  hocr_str += "'>\n";
-
-  ResultIterator *res_it = GetIterator();
-  while (!res_it->Empty(RIL_BLOCK)) {
-    if (res_it->Empty(RIL_WORD)) {
-      res_it->Next(RIL_WORD);
-      continue;
-    }
-
-    // Open any new block/paragraph/textline.
-    if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-      para_is_ltr = true;  // reset to default direction
-      hocr_str += "   <div class='ocr_carea'";
-      AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
-      AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
-    }
-    if (res_it->IsAtBeginningOf(RIL_PARA)) {
-      hocr_str += "\n    <p class='ocr_par'";
-      para_is_ltr = res_it->ParagraphIsLtr();
-      if (!para_is_ltr) {
-        hocr_str += " dir='rtl'";
-      }
-      AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
-      paragraph_lang = res_it->WordRecognitionLanguage();
-      if (paragraph_lang) {
-        hocr_str += " lang='";
-        hocr_str += paragraph_lang;
-        hocr_str += "'";
+      hocr_str += "  <div class='ocr_page'";
+      AddIdTohOCR(&hocr_str, "page", page_id, -1);
+      hocr_str += " title='image \"";
+      if (input_file_) {
+        hocr_str += HOcrEscape(input_file_->string());
+      } else {
+        hocr_str += "unknown";
       }
-      AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
-    }
-    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-      hocr_str += "\n     <span class='ocr_line'";
-      AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
-      AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
-    }
-
-    // Now, process the word...
-    std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
-    if (tesseract_->lstm_choice_mode) {
-      confidencemap = res_it->GetBestLSTMSymbolChoices();
-    }
-    hocr_str += "\n      <span class='ocrx_word'";
-    AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
-    int left, top, right, bottom;
-    bool bold, italic, underlined, monospace, serif, smallcaps;
-    int pointsize, font_id;
-    const char *font_name;
-    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-    font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
-                                           &monospace, &serif, &smallcaps,
-                                           &pointsize, &font_id);
-    hocr_str.add_str_int(" title='bbox ", left);
-    hocr_str.add_str_int(" ", top);
-    hocr_str.add_str_int(" ", right);
-    hocr_str.add_str_int(" ", bottom);
-    hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
-    if (font_info) {
-      if (font_name) {
-        hocr_str += "; x_font ";
-        hocr_str += HOcrEscape(font_name);
-      }
-      hocr_str.add_str_int("; x_fsize ", pointsize);
-    }
-    hocr_str += "'";
-    const char* lang = res_it->WordRecognitionLanguage();
-    if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
-      hocr_str += " lang='";
-      hocr_str += lang;
-      hocr_str += "'";
-    }
-    switch (res_it->WordDirection()) {
-      // Only emit direction if different from current paragraph direction
-      case DIR_LEFT_TO_RIGHT:
-        if (!para_is_ltr) hocr_str += " dir='ltr'";
-        break;
-      case DIR_RIGHT_TO_LEFT:
-        if (para_is_ltr) hocr_str += " dir='rtl'";
-        break;
-      case DIR_MIX:
-      case DIR_NEUTRAL:
-      default:  // Do nothing.
-        break;
-    }
-    hocr_str += ">";
-    bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
-    bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
-    bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
-    if (bold) hocr_str += "<strong>";
-    if (italic) hocr_str += "<em>";
-    do {
-      const std::unique_ptr<const char[]> grapheme(
-          res_it->GetUTF8Text(RIL_SYMBOL));
-      if (grapheme && grapheme[0] != 0) {
-        hocr_str += HOcrEscape(grapheme.get());
-      }
-      res_it->Next(RIL_SYMBOL);
-    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-    if (italic) hocr_str += "</em>";
-    if (bold) hocr_str += "</strong>";
-    // If the lstm choice mode is required it is added here
-    if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
-      for (size_t i = 0; i < confidencemap->size(); i++) {
-        hocr_str += "\n       <span class='ocrx_cinfo'";
-        AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
-        hocr_str += ">";
-        std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
-        for (std::pair<const char*, float> conf : timestep) {
-          hocr_str += "<span class='ocr_glyph'";
-          AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
-          hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
-          hocr_str += "'";
-          hocr_str += ">";
-          hocr_str += conf.first;
-          hocr_str += "</span>";
-          gcnt++;
+      hocr_str.add_str_int("\"; bbox ", rect_left_);
+      hocr_str.add_str_int(" ", rect_top_);
+      hocr_str.add_str_int(" ", rect_width_);
+      hocr_str.add_str_int(" ", rect_height_);
+      hocr_str.add_str_int("; ppageno ", page_number);
+      hocr_str += "'>\n";
+
+      ResultIterator *res_it = GetIterator();
+      while (!res_it->Empty(RIL_BLOCK)) {
+        if (res_it->Empty(RIL_WORD)) {
+          res_it->Next(RIL_WORD);
+          continue;
         }
-        hocr_str += "</span>";
-        tcnt++;
-      }
-    } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
-      for (size_t i = 0; i < confidencemap->size(); i++) {
-        std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
-        if (timestep.size() > 0) {
-          hocr_str += "\n       <span class='ocrx_cinfo'";
-          AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
-          hocr_str += " chosen='";
-          hocr_str += timestep[0].first;
-          hocr_str += "'>";
-          for (size_t j = 1; j < timestep.size(); j++) {
-            hocr_str += "<span class='ocr_glyph'";
-            AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
-            hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
+
+        // Open any new block/paragraph/textline.
+        if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+          para_is_ltr = true;  // reset to default direction
+          hocr_str += "   <div class='ocr_carea'";
+          AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
+          AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
+        }
+        if (res_it->IsAtBeginningOf(RIL_PARA)) {
+          hocr_str += "\n    <p class='ocr_par'";
+          para_is_ltr = res_it->ParagraphIsLtr();
+          if (!para_is_ltr) {
+            hocr_str += " dir='rtl'";
+          }
+          AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
+          paragraph_lang = res_it->WordRecognitionLanguage();
+          if (paragraph_lang) {
+            hocr_str += " lang='";
+            hocr_str += paragraph_lang;
             hocr_str += "'";
-            hocr_str += ">";
-            hocr_str += timestep[j].first;
-            hocr_str += "</span>";
-            gcnt++;
           }
-          hocr_str += "</span>";
-          tcnt++;
+          AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
+        }
+        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+          hocr_str += "\n     <span class='ocr_line'";
+          AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
+          AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
         }
-      }
-    }
-    hocr_str += "</span>";
-    tcnt = 1;
-    gcnt = 1;
-    wcnt++;
-    // Close any ending block/paragraph/textline.
-    if (last_word_in_line) {
-      hocr_str += "\n     </span>";
-      lcnt++;
-    }
-    if (last_word_in_para) {
-      hocr_str += "\n    </p>\n";
-      pcnt++;
-      para_is_ltr = true;  // back to default direction
-    }
-    if (last_word_in_block) {
-      hocr_str += "   </div>\n";
-      bcnt++;
-    }
-  }
-  hocr_str += "  </div>\n";
-
-  char *ret = new char[hocr_str.length() + 1];
-  strcpy(ret, hocr_str.string());
-  delete res_it;
-  return ret;
-}
+
+        // Now, process the word...
+        std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
+        if (tesseract_->lstm_choice_mode) {
+          confidencemap = res_it->GetBestLSTMSymbolChoices();
+        }
+        hocr_str += "\n      <span class='ocrx_word'";
+        AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
+        int left, top, right, bottom;
+        bool bold, italic, underlined, monospace, serif, smallcaps;
+        int pointsize, font_id;
+        const char *font_name;
+        res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
+        font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
+                                               &monospace, &serif, &smallcaps,
+                                               &pointsize, &font_id);
+        hocr_str.add_str_int(" title='bbox ", left);
+        hocr_str.add_str_int(" ", top);
+        hocr_str.add_str_int(" ", right);
+        hocr_str.add_str_int(" ", bottom);
+        hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
+        if (font_info) {
+          if (font_name) {
+            hocr_str += "; x_font ";
+            hocr_str += HOcrEscape(font_name);
+          }
+          hocr_str.add_str_int("; x_fsize ", pointsize);
+        }
+        hocr_str += "'";
+        const char* lang = res_it->WordRecognitionLanguage();
+        if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
+          hocr_str += " lang='";
+          hocr_str += lang;
+          hocr_str += "'";
+        }
+        switch (res_it->WordDirection()) {
+          // Only emit direction if different from current paragraph direction
+          case DIR_LEFT_TO_RIGHT:
+            if (!para_is_ltr) hocr_str += " dir='ltr'";
+                break;
+          case DIR_RIGHT_TO_LEFT:
+            if (para_is_ltr) hocr_str += " dir='rtl'";
+                break;
+          case DIR_MIX:
+          case DIR_NEUTRAL:
+          default:  // Do nothing.
+            break;
+        }
+        hocr_str += ">";
+        bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
+        bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
+        bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
+        if (bold) hocr_str += "<strong>";
+        if (italic) hocr_str += "<em>";
+        do {
+          const std::unique_ptr<const char[]> grapheme(
+                  res_it->GetUTF8Text(RIL_SYMBOL));
+          if (grapheme && grapheme[0] != 0) {
+            hocr_str += HOcrEscape(grapheme.get());
+          }
+          res_it->Next(RIL_SYMBOL);
+        } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+        if (italic) hocr_str += "</em>";
+        if (bold) hocr_str += "</strong>";
+        // If the lstm choice mode is required it is added here
+        if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
+          for (size_t i = 0; i < confidencemap->size(); i++) {
+            hocr_str += "\n       <span class='ocrx_cinfo'";
+            AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
+            hocr_str += ">";
+            std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
+            for (std::pair<const char*, float> conf : timestep) {
+              hocr_str += "<span class='ocr_glyph'";
+              AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
+              hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
+              hocr_str += "'";
+              hocr_str += ">";
+              hocr_str += conf.first;
+              hocr_str += "</span>";
+              gcnt++;
+            }
+            hocr_str += "</span>";
+            tcnt++;
+          }
+        } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
+          for (size_t i = 0; i < confidencemap->size(); i++) {
+            std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
+            if (timestep.size() > 0) {
+              hocr_str += "\n       <span class='ocrx_cinfo'";
+              AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
+              hocr_str += " chosen='";
+              hocr_str += timestep[0].first;
+              hocr_str += "'>";
+              for (size_t j = 1; j < timestep.size(); j++) {
+                hocr_str += "<span class='ocr_glyph'";
+                AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
+                hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
+                hocr_str += "'";
+                hocr_str += ">";
+                hocr_str += timestep[j].first;
+                hocr_str += "</span>";
+                gcnt++;
+              }
+              hocr_str += "</span>";
+              tcnt++;
+            }
+          }
+        }
+        hocr_str += "</span>";
+        tcnt = 1;
+        gcnt = 1;
+        wcnt++;
+        // Close any ending block/paragraph/textline.
+        if (last_word_in_line) {
+          hocr_str += "\n     </span>";
+          lcnt++;
+        }
+        if (last_word_in_para) {
+          hocr_str += "\n    </p>\n";
+          pcnt++;
+          para_is_ltr = true;  // back to default direction
+        }
+        if (last_word_in_block) {
+          hocr_str += "   </div>\n";
+          bcnt++;
+        }
+      }
+      hocr_str += "  </div>\n";
+
+      char *ret = new char[hocr_str.length() + 1];
+      strcpy(ret, hocr_str.string());
+      delete res_it;
+      return ret;
+    }
+
+/**
+ * Make an XML-formatted string with ALTO markup from the internal
+ * data structures.
+ */
+    char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
+      if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
+        return nullptr;
+
+      int lcnt = 0, bcnt = 0, wcnt = 0;
+      int page_id = page_number;
+
+      STRING alto_str("");
+
+      if (input_file_ == nullptr)
+        SetInputName(nullptr);
+
+#ifdef _WIN32
+      // convert input name from ANSI encoding to utf-8
+  int str16_len =
+      MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
+  wchar_t *uni16_str = new WCHAR[str16_len];
+  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
+                                  uni16_str, str16_len);
+  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
+                                     nullptr, nullptr);
+  char *utf8_str = new char[utf8_len];
+  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
+                      utf8_len, nullptr, nullptr);
+  *input_file_ = utf8_str;
+  delete[] uni16_str;
+  delete[] utf8_str;
+#endif
+
+      alto_str += "\t\t<Page WIDTH=\"";
+      alto_str.add_str_int("", rect_width_);
+      alto_str += "\" HEIGHT=\"";
+      alto_str.add_str_int("", rect_height_);
+      alto_str += "\" PHYSICAL_IMG_NR=\"";
+      alto_str.add_str_int("", rect_height_);
+      alto_str += "\"";
+      AddIdToAlto(&alto_str, "page", page_id);
+      alto_str += ">\n";
+      alto_str += ("\t\t\t<PrintSpace HPOS=\"0\" "
+                   "VPOS=\"0\""
+                   " WIDTH=\"");
+      alto_str.add_str_int("", rect_width_);
+      alto_str += "\" HEIGHT=\"";
+      alto_str.add_str_int("", rect_height_);
+      alto_str += "\">\n";
+
+      ResultIterator *res_it = GetIterator();
+      while (!res_it->Empty(RIL_BLOCK)) {
+        if (res_it->Empty(RIL_WORD)) {
+          res_it->Next(RIL_WORD);
+          continue;
+        }
+
+        if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+          alto_str += "\t\t\t\t<TextBlock ";
+          AddIdToAlto(&alto_str, "block", bcnt);
+          AddBoxToAlto(res_it, RIL_BLOCK, &alto_str);
+          alto_str += "\n";
+        }
+
+        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+
+          alto_str += "\t\t\t\t\t<TextLine ";
+          AddIdToAlto(&alto_str, "line", lcnt);
+          AddBoxToAlto(res_it, RIL_TEXTLINE, &alto_str);
+          alto_str += "\n";
+        }
+
+        alto_str += "\t\t\t\t\t\t<String ";
+        AddIdToAlto(&alto_str, "string", wcnt);
+        AddBoxToAlto(res_it, RIL_WORD, &alto_str);
+        alto_str += " CONTENT=\"";
+
+
+        bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
+        bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
+
+        do {
+          const std::unique_ptr<const char[]> grapheme(
+                  res_it->GetUTF8Text(RIL_SYMBOL));
+          if (grapheme && grapheme[0] != 0) {
+            alto_str += HOcrEscape(grapheme.get());
+          }
+          res_it->Next(RIL_SYMBOL);
+        } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+
+        alto_str += "\"/>\n";
+
+        wcnt++;
+
+        if (last_word_in_line) {
+          alto_str += "\t\t\t\t\t</TextLine>\n";
+          lcnt++;
+        }
+
+        if (last_word_in_block) {
+          alto_str += "\t\t\t\t</TextBlock>\n";
+          bcnt++;
+        }
+      }
+
+      alto_str += "\t\t\t</PrintSpace>\n";
+      alto_str += "\t\t</Page>\n";
+
+      char *ret = new char[alto_str.length() + 1];
+      strcpy(ret, alto_str.string());
+      delete res_it;
+      return ret;
+    }
 
 /**
  * Make a TSV-formatted string from the internal data structures.
  * page_number is 0-based but will appear in the output as 1-based.
  * Returned string must be freed with the delete [] operator.
  */
-char* TessBaseAPI::GetTSVText(int page_number) {
-  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
-    return nullptr;
-
-  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
-  int page_id = page_number + 1;  // we use 1-based page numbers.
-
-  STRING tsv_str("");
-
-  int page_num = page_id;
-  int block_num = 0;
-  int par_num = 0;
-  int line_num = 0;
-  int word_num = 0;
-
-  tsv_str.add_str_int("1\t", page_num);  // level 1 - page
-  tsv_str.add_str_int("\t", block_num);
-  tsv_str.add_str_int("\t", par_num);
-  tsv_str.add_str_int("\t", line_num);
-  tsv_str.add_str_int("\t", word_num);
-  tsv_str.add_str_int("\t", rect_left_);
-  tsv_str.add_str_int("\t", rect_top_);
-  tsv_str.add_str_int("\t", rect_width_);
-  tsv_str.add_str_int("\t", rect_height_);
-  tsv_str += "\t-1\t\n";
-
-  ResultIterator* res_it = GetIterator();
-  while (!res_it->Empty(RIL_BLOCK)) {
-    if (res_it->Empty(RIL_WORD)) {
-      res_it->Next(RIL_WORD);
-      continue;
-    }
-
-    // Add rows for any new block/paragraph/textline.
-    if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-      block_num++;
-      par_num = 0;
-      line_num = 0;
-      word_num = 0;
-      tsv_str.add_str_int("2\t", page_num);  // level 2 - block
-      tsv_str.add_str_int("\t", block_num);
-      tsv_str.add_str_int("\t", par_num);
-      tsv_str.add_str_int("\t", line_num);
-      tsv_str.add_str_int("\t", word_num);
-      AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
-      tsv_str += "\t-1\t\n";  // end of row for block
-    }
-    if (res_it->IsAtBeginningOf(RIL_PARA)) {
-      par_num++;
-      line_num = 0;
-      word_num = 0;
-      tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
-      tsv_str.add_str_int("\t", block_num);
-      tsv_str.add_str_int("\t", par_num);
-      tsv_str.add_str_int("\t", line_num);
-      tsv_str.add_str_int("\t", word_num);
-      AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
-      tsv_str += "\t-1\t\n";  // end of row for para
-    }
-    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-      line_num++;
-      word_num = 0;
-      tsv_str.add_str_int("4\t", page_num);  // level 4 - line
+    char* TessBaseAPI::GetTSVText(int page_number) {
+      if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
+        return nullptr;
+
+      int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
+      int page_id = page_number + 1;  // we use 1-based page numbers.
+
+      STRING tsv_str("");
+
+      int page_num = page_id;
+      int block_num = 0;
+      int par_num = 0;
+      int line_num = 0;
+      int word_num = 0;
+
+      tsv_str.add_str_int("1\t", page_num);  // level 1 - page
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);
       tsv_str.add_str_int("\t", line_num);
       tsv_str.add_str_int("\t", word_num);
-      AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
-      tsv_str += "\t-1\t\n";  // end of row for line
-    }
-
-    // Now, process the word...
-    int left, top, right, bottom;
-    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-    word_num++;
-    tsv_str.add_str_int("5\t", page_num);  // level 5 - word
-    tsv_str.add_str_int("\t", block_num);
-    tsv_str.add_str_int("\t", par_num);
-    tsv_str.add_str_int("\t", line_num);
-    tsv_str.add_str_int("\t", word_num);
-    tsv_str.add_str_int("\t", left);
-    tsv_str.add_str_int("\t", top);
-    tsv_str.add_str_int("\t", right - left);
-    tsv_str.add_str_int("\t", bottom - top);
-    tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
-    tsv_str += "\t";
-
-    // Increment counts if at end of block/paragraph/textline.
-    if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
-    if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
-    if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
-
-    do {
-      tsv_str +=
-          std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
-      res_it->Next(RIL_SYMBOL);
-    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-    tsv_str += "\n";  // end of row
-    wcnt++;
-  }
+      tsv_str.add_str_int("\t", rect_left_);
+      tsv_str.add_str_int("\t", rect_top_);
+      tsv_str.add_str_int("\t", rect_width_);
+      tsv_str.add_str_int("\t", rect_height_);
+      tsv_str += "\t-1\t\n";
+
+      ResultIterator* res_it = GetIterator();
+      while (!res_it->Empty(RIL_BLOCK)) {
+        if (res_it->Empty(RIL_WORD)) {
+          res_it->Next(RIL_WORD);
+          continue;
+        }
+
+        // Add rows for any new block/paragraph/textline.
+        if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+          block_num++;
+          par_num = 0;
+          line_num = 0;
+          word_num = 0;
+          tsv_str.add_str_int("2\t", page_num);  // level 2 - block
+          tsv_str.add_str_int("\t", block_num);
+          tsv_str.add_str_int("\t", par_num);
+          tsv_str.add_str_int("\t", line_num);
+          tsv_str.add_str_int("\t", word_num);
+          AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
+          tsv_str += "\t-1\t\n";  // end of row for block
+        }
+        if (res_it->IsAtBeginningOf(RIL_PARA)) {
+          par_num++;
+          line_num = 0;
+          word_num = 0;
+          tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
+          tsv_str.add_str_int("\t", block_num);
+          tsv_str.add_str_int("\t", par_num);
+          tsv_str.add_str_int("\t", line_num);
+          tsv_str.add_str_int("\t", word_num);
+          AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
+          tsv_str += "\t-1\t\n";  // end of row for para
+        }
+        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+          line_num++;
+          word_num = 0;
+          tsv_str.add_str_int("4\t", page_num);  // level 4 - line
+          tsv_str.add_str_int("\t", block_num);
+          tsv_str.add_str_int("\t", par_num);
+          tsv_str.add_str_int("\t", line_num);
+          tsv_str.add_str_int("\t", word_num);
+          AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
+          tsv_str += "\t-1\t\n";  // end of row for line
+        }
 
-  char* ret = new char[tsv_str.length() + 1];
-  strcpy(ret, tsv_str.string());
-  delete res_it;
-  return ret;
-}
+        // Now, process the word...
+        int left, top, right, bottom;
+        res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
+        word_num++;
+        tsv_str.add_str_int("5\t", page_num);  // level 5 - word
+        tsv_str.add_str_int("\t", block_num);
+        tsv_str.add_str_int("\t", par_num);
+        tsv_str.add_str_int("\t", line_num);
+        tsv_str.add_str_int("\t", word_num);
+        tsv_str.add_str_int("\t", left);
+        tsv_str.add_str_int("\t", top);
+        tsv_str.add_str_int("\t", right - left);
+        tsv_str.add_str_int("\t", bottom - top);
+        tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
+        tsv_str += "\t";
+
+        // Increment counts if at end of block/paragraph/textline.
+        if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
+        if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
+        if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
+
+        do {
+          tsv_str +=
+                  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
+          res_it->Next(RIL_SYMBOL);
+        } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+        tsv_str += "\n";  // end of row
+        wcnt++;
+      }
+
+      char* ret = new char[tsv_str.length() + 1];
+      strcpy(ret, tsv_str.string());
+      delete res_it;
+      return ret;
+    }
 
 /** The 5 numbers output for each box (the usual 4 and a page number.) */
-const int kNumbersPerBlob = 5;
+    const int kNumbersPerBlob = 5;
 /**
  * The number of bytes taken by each number. Since we use int16_t for ICOORD,
  * assume only 5 digits max.
  */
-const int kBytesPerNumber = 5;
+    const int kBytesPerNumber = 5;
 /**
  * Multiplier for max expected textlength assumes (kBytesPerNumber + space)
  * * kNumbersPerBlob plus the newline. Add to this the
  * original UTF8 characters, and one kMaxBytesPerLine for safety.
  */
-const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1;
+    const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1;
 /** Max bytes in the decimal representation of int64_t. */
-const int kBytesPer64BitNumber = 20;
+    const int kBytesPer64BitNumber = 20;
 /**
  * A maximal single box could occupy kNumbersPerBlob numbers at
  * kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a
  * space plus the newline and the maximum length of a UNICHAR.
  * Test against this on each iteration for safety.
  */
-const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
-    UNICHAR_LEN;
+    const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
+                                 UNICHAR_LEN;
 
 /**
  * The recognized text is returned as a char* which is coded
@@ -1841,160 +2008,160 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
  * page_number is a 0-base page index that will appear in the box file.
  * Returned string must be freed with the delete [] operator.
  */
-char* TessBaseAPI::GetBoxText(int page_number) {
-  if (tesseract_ == nullptr ||
-      (!recognition_done_ && Recognize(nullptr) < 0))
-    return nullptr;
-  int blob_count;
-  int utf8_length = TextLength(&blob_count);
-  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
-      kMaxBytesPerLine;
-  char* result = new char[total_length];
-  result[0] = '\0';
-  int output_length = 0;
-  LTRResultIterator* it = GetLTRIterator();
-  do {
-    int left, top, right, bottom;
-    if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
-      const std::unique_ptr</*non-const*/ char[]> text(
-          it->GetUTF8Text(RIL_SYMBOL));
-      // Tesseract uses space for recognition failure. Fix to a reject
-      // character, kTesseractReject so we don't create illegal box files.
-      for (int i = 0; text[i] != '\0'; ++i) {
-        if (text[i] == ' ')
-          text[i] = kTesseractReject;
-      }
-      snprintf(result + output_length, total_length - output_length,
-               "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
-               right, image_height_ - top, page_number);
-      output_length += strlen(result + output_length);
-      // Just in case...
-      if (output_length + kMaxBytesPerLine > total_length)
-        break;
-    }
-  } while (it->Next(RIL_SYMBOL));
-  delete it;
-  return result;
-}
+    char* TessBaseAPI::GetBoxText(int page_number) {
+      if (tesseract_ == nullptr ||
+          (!recognition_done_ && Recognize(nullptr) < 0))
+        return nullptr;
+      int blob_count;
+      int utf8_length = TextLength(&blob_count);
+      int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
+                         kMaxBytesPerLine;
+      char* result = new char[total_length];
+      result[0] = '\0';
+      int output_length = 0;
+      LTRResultIterator* it = GetLTRIterator();
+      do {
+        int left, top, right, bottom;
+        if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
+          const std::unique_ptr</*non-const*/ char[]> text(
+                  it->GetUTF8Text(RIL_SYMBOL));
+          // Tesseract uses space for recognition failure. Fix to a reject
+          // character, kTesseractReject so we don't create illegal box files.
+          for (int i = 0; text[i] != '\0'; ++i) {
+            if (text[i] == ' ')
+              text[i] = kTesseractReject;
+          }
+          snprintf(result + output_length, total_length - output_length,
+                   "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
+                   right, image_height_ - top, page_number);
+          output_length += strlen(result + output_length);
+          // Just in case...
+          if (output_length + kMaxBytesPerLine > total_length)
+            break;
+        }
+      } while (it->Next(RIL_SYMBOL));
+      delete it;
+      return result;
+    }
 
 /**
  * Conversion table for non-latin characters.
  * Maps characters out of the latin set into the latin set.
  * TODO(rays) incorporate this translation into unicharset.
  */
-const int kUniChs[] = {
-  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
-};
+    const int kUniChs[] = {
+            0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
+    };
 /** Latin chars corresponding to the unicode chars above. */
-const int kLatinChs[] = {
-  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
-};
+    const int kLatinChs[] = {
+            0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
+    };
 
 /**
  * The recognized text is returned as a char* which is coded
  * as UNLV format Latin-1 with specific reject and suspect codes.
  * Returned string must be freed with the delete [] operator.
  */
-char* TessBaseAPI::GetUNLVText() {
-  if (tesseract_ == nullptr ||
-      (!recognition_done_ && Recognize(nullptr) < 0))
-    return nullptr;
-  bool tilde_crunch_written = false;
-  bool last_char_was_newline = true;
-  bool last_char_was_tilde = false;
-
-  int total_length = TextLength(nullptr);
-  PAGE_RES_IT   page_res_it(page_res_);
-  char* result = new char[total_length];
-  char* ptr = result;
-  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
-       page_res_it.forward()) {
-    WERD_RES *word = page_res_it.word();
-    // Process the current word.
-    if (word->unlv_crunch_mode != CR_NONE) {
-      if (word->unlv_crunch_mode != CR_DELETE &&
-          (!tilde_crunch_written ||
-           (word->unlv_crunch_mode == CR_KEEP_SPACE &&
-            word->word->space() > 0 &&
-            !word->word->flag(W_FUZZY_NON) &&
-            !word->word->flag(W_FUZZY_SP)))) {
-        if (!word->word->flag(W_BOL) &&
-            word->word->space() > 0 &&
-            !word->word->flag(W_FUZZY_NON) &&
-            !word->word->flag(W_FUZZY_SP)) {
-          /* Write a space to separate from preceding good text */
-          *ptr++ = ' ';
-          last_char_was_tilde = false;
-        }
-        if (!last_char_was_tilde) {
-          // Write a reject char.
-          last_char_was_tilde = true;
-          *ptr++ = kUNLVReject;
-          tilde_crunch_written = true;
-          last_char_was_newline = false;
-        }
-      }
-    } else {
-      // NORMAL PROCESSING of non tilde crunched words.
-      tilde_crunch_written = false;
-      tesseract_->set_unlv_suspects(word);
-      const char* wordstr = word->best_choice->unichar_string().string();
-      const STRING& lengths = word->best_choice->unichar_lengths();
-      int length = lengths.length();
-      int i = 0;
-      int offset = 0;
-
-      if (last_char_was_tilde &&
-          word->word->space() == 0 && wordstr[offset] == ' ') {
-        // Prevent adjacent tilde across words - we know that adjacent tildes
-        // within words have been removed.
-        // Skip the first character.
-        offset = lengths[i++];
-      }
-      if (i < length && wordstr[offset] != 0) {
-        if (!last_char_was_newline)
-          *ptr++ = ' ';
-        else
-          last_char_was_newline = false;
-        for (; i < length; offset += lengths[i++]) {
-          if (wordstr[offset] == ' ' ||
-              wordstr[offset] == kTesseractReject) {
-            *ptr++ = kUNLVReject;
-            last_char_was_tilde = true;
-          } else {
-            if (word->reject_map[i].rejected())
-              *ptr++ = kUNLVSuspect;
-            UNICHAR ch(wordstr + offset, lengths[i]);
-            int uni_ch = ch.first_uni();
-            for (int j = 0; kUniChs[j] != 0; ++j) {
-              if (kUniChs[j] == uni_ch) {
-                uni_ch = kLatinChs[j];
-                break;
-              }
-            }
-            if (uni_ch <= 0xff) {
-              *ptr++ = static_cast<char>(uni_ch);
+    char* TessBaseAPI::GetUNLVText() {
+      if (tesseract_ == nullptr ||
+          (!recognition_done_ && Recognize(nullptr) < 0))
+        return nullptr;
+      bool tilde_crunch_written = false;
+      bool last_char_was_newline = true;
+      bool last_char_was_tilde = false;
+
+      int total_length = TextLength(nullptr);
+      PAGE_RES_IT   page_res_it(page_res_);
+      char* result = new char[total_length];
+      char* ptr = result;
+      for (page_res_it.restart_page(); page_res_it.word () != nullptr;
+           page_res_it.forward()) {
+        WERD_RES *word = page_res_it.word();
+        // Process the current word.
+        if (word->unlv_crunch_mode != CR_NONE) {
+          if (word->unlv_crunch_mode != CR_DELETE &&
+              (!tilde_crunch_written ||
+               (word->unlv_crunch_mode == CR_KEEP_SPACE &&
+                word->word->space() > 0 &&
+                !word->word->flag(W_FUZZY_NON) &&
+                !word->word->flag(W_FUZZY_SP)))) {
+            if (!word->word->flag(W_BOL) &&
+                word->word->space() > 0 &&
+                !word->word->flag(W_FUZZY_NON) &&
+                !word->word->flag(W_FUZZY_SP)) {
+              /* Write a space to separate from preceding good text */
+              *ptr++ = ' ';
               last_char_was_tilde = false;
-            } else {
-              *ptr++ = kUNLVReject;
+            }
+            if (!last_char_was_tilde) {
+              // Write a reject char.
               last_char_was_tilde = true;
+              *ptr++ = kUNLVReject;
+              tilde_crunch_written = true;
+              last_char_was_newline = false;
+            }
+          }
+        } else {
+          // NORMAL PROCESSING of non tilde crunched words.
+          tilde_crunch_written = false;
+          tesseract_->set_unlv_suspects(word);
+          const char* wordstr = word->best_choice->unichar_string().string();
+          const STRING& lengths = word->best_choice->unichar_lengths();
+          int length = lengths.length();
+          int i = 0;
+          int offset = 0;
+
+          if (last_char_was_tilde &&
+              word->word->space() == 0 && wordstr[offset] == ' ') {
+            // Prevent adjacent tilde across words - we know that adjacent tildes
+            // within words have been removed.
+            // Skip the first character.
+            offset = lengths[i++];
+          }
+          if (i < length && wordstr[offset] != 0) {
+            if (!last_char_was_newline)
+              *ptr++ = ' ';
+            else
+              last_char_was_newline = false;
+            for (; i < length; offset += lengths[i++]) {
+              if (wordstr[offset] == ' ' ||
+                  wordstr[offset] == kTesseractReject) {
+                *ptr++ = kUNLVReject;
+                last_char_was_tilde = true;
+              } else {
+                if (word->reject_map[i].rejected())
+                  *ptr++ = kUNLVSuspect;
+                UNICHAR ch(wordstr + offset, lengths[i]);
+                int uni_ch = ch.first_uni();
+                for (int j = 0; kUniChs[j] != 0; ++j) {
+                  if (kUniChs[j] == uni_ch) {
+                    uni_ch = kLatinChs[j];
+                    break;
+                  }
+                }
+                if (uni_ch <= 0xff) {
+                  *ptr++ = static_cast<char>(uni_ch);
+                  last_char_was_tilde = false;
+                } else {
+                  *ptr++ = kUNLVReject;
+                  last_char_was_tilde = true;
+                }
+              }
             }
           }
         }
+        if (word->word->flag(W_EOL) && !last_char_was_newline) {
+          /* Add a new line output */
+          *ptr++ = '\n';
+          tilde_crunch_written = false;
+          last_char_was_newline = true;
+          last_char_was_tilde = false;
+        }
       }
-    }
-    if (word->word->flag(W_EOL) && !last_char_was_newline) {
-      /* Add a new line output */
       *ptr++ = '\n';
-      tilde_crunch_written = false;
-      last_char_was_newline = true;
-      last_char_was_tilde = false;
+      *ptr = '\0';
+      return result;
     }
-  }
-  *ptr++ = '\n';
-  *ptr = '\0';
-  return result;
-}
 
 #ifndef DISABLED_LEGACY_ENGINE
 
@@ -2007,103 +2174,103 @@ char* TessBaseAPI::GetUNLVText() {
  * script_conf is confidence level in the script
  * Returns true on success and writes values to each parameter as an output
  */
-bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
-                                          const char** script_name,
-                                          float* script_conf) {
-  OSResults osr;
-
-  bool osd = DetectOS(&osr);
-  if (!osd) {
-    return false;
-  }
+    bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
+                                              const char** script_name,
+                                              float* script_conf) {
+      OSResults osr;
+
+      bool osd = DetectOS(&osr);
+      if (!osd) {
+        return false;
+      }
 
-  int orient_id = osr.best_result.orientation_id;
-  int script_id = osr.get_best_script(orient_id);
-  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
-  if (orient_deg) *orient_deg = orient_id * 90;  // convert quadrant to degrees
+      int orient_id = osr.best_result.orientation_id;
+      int script_id = osr.get_best_script(orient_id);
+      if (orient_conf) *orient_conf = osr.best_result.oconfidence;
+      if (orient_deg) *orient_deg = orient_id * 90;  // convert quadrant to degrees
 
-  if (script_name) {
-    const char* script = osr.unicharset->get_script_from_script_id(script_id);
+      if (script_name) {
+        const char* script = osr.unicharset->get_script_from_script_id(script_id);
 
-    *script_name = script;
-  }
+        *script_name = script;
+      }
 
-  if (script_conf) *script_conf = osr.best_result.sconfidence;
+      if (script_conf) *script_conf = osr.best_result.sconfidence;
 
-  return true;
-}
+      return true;
+    }
 
 /**
  * The recognized text is returned as a char* which is coded
  * as UTF8 and must be freed with the delete [] operator.
  * page_number is a 0-based page index that will appear in the osd file.
  */
-char* TessBaseAPI::GetOsdText(int page_number) {
-  int orient_deg;
-  float orient_conf;
-  const char* script_name;
-  float script_conf;
-
-  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
-                               &script_conf))
-    return nullptr;
-
-  // clockwise rotation needed to make the page upright
-  int rotate = OrientationIdToValue(orient_deg / 90);
-
-  const int kOsdBufsize = 255;
-  char* osd_buf = new char[kOsdBufsize];
-  snprintf(osd_buf, kOsdBufsize,
-           "Page number: %d\n"
-           "Orientation in degrees: %d\n"
-           "Rotate: %d\n"
-           "Orientation confidence: %.2f\n"
-           "Script: %s\n"
-           "Script confidence: %.2f\n",
-           page_number, orient_deg, rotate, orient_conf, script_name,
-           script_conf);
-
-  return osd_buf;
-}
+    char* TessBaseAPI::GetOsdText(int page_number) {
+      int orient_deg;
+      float orient_conf;
+      const char* script_name;
+      float script_conf;
+
+      if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
+                                   &script_conf))
+        return nullptr;
+
+      // clockwise rotation needed to make the page upright
+      int rotate = OrientationIdToValue(orient_deg / 90);
+
+      const int kOsdBufsize = 255;
+      char* osd_buf = new char[kOsdBufsize];
+      snprintf(osd_buf, kOsdBufsize,
+               "Page number: %d\n"
+               "Orientation in degrees: %d\n"
+               "Rotate: %d\n"
+               "Orientation confidence: %.2f\n"
+               "Script: %s\n"
+               "Script confidence: %.2f\n",
+               page_number, orient_deg, rotate, orient_conf, script_name,
+               script_conf);
+
+      return osd_buf;
+    }
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
 /** Returns the average word confidence for Tesseract page result. */
-int TessBaseAPI::MeanTextConf() {
-  int* conf = AllWordConfidences();
-  if (!conf) return 0;
-  int sum = 0;
-  int *pt = conf;
-  while (*pt >= 0) sum += *pt++;
-  if (pt != conf) sum /= pt - conf;
-  delete [] conf;
-  return sum;
-}
+    int TessBaseAPI::MeanTextConf() {
+      int* conf = AllWordConfidences();
+      if (!conf) return 0;
+      int sum = 0;
+      int *pt = conf;
+      while (*pt >= 0) sum += *pt++;
+      if (pt != conf) sum /= pt - conf;
+      delete [] conf;
+      return sum;
+    }
 
 /** Returns an array of all word confidences, terminated by -1. */
-int* TessBaseAPI::AllWordConfidences() {
-  if (tesseract_ == nullptr ||
-      (!recognition_done_ && Recognize(nullptr) < 0))
-    return nullptr;
-  int n_word = 0;
-  PAGE_RES_IT res_it(page_res_);
-  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
-    n_word++;
-
-  int* conf = new int[n_word+1];
-  n_word = 0;
-  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
-    WERD_RES *word = res_it.word();
-    WERD_CHOICE* choice = word->best_choice;
-    int w_conf = static_cast<int>(100 + 5 * choice->certainty());
-                 // This is the eq for converting Tesseract confidence to 1..100
-    if (w_conf < 0) w_conf = 0;
-    if (w_conf > 100) w_conf = 100;
-    conf[n_word++] = w_conf;
-  }
-  conf[n_word] = -1;
-  return conf;
-}
+    int* TessBaseAPI::AllWordConfidences() {
+      if (tesseract_ == nullptr ||
+          (!recognition_done_ && Recognize(nullptr) < 0))
+        return nullptr;
+      int n_word = 0;
+      PAGE_RES_IT res_it(page_res_);
+      for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
+        n_word++;
+
+      int* conf = new int[n_word+1];
+      n_word = 0;
+      for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
+        WERD_RES *word = res_it.word();
+        WERD_CHOICE* choice = word->best_choice;
+        int w_conf = static_cast<int>(100 + 5 * choice->certainty());
+        // This is the eq for converting Tesseract confidence to 1..100
+        if (w_conf < 0) w_conf = 0;
+        if (w_conf > 100) w_conf = 100;
+        conf[n_word++] = w_conf;
+      }
+      conf[n_word] = -1;
+      return conf;
+    }
 
 #ifndef DISABLED_LEGACY_ENGINE
 /**
@@ -2116,61 +2283,61 @@ int* TessBaseAPI::AllWordConfidences() {
  * The currently set PageSegMode is preserved.
  * Returns false if adaption was not possible for some reason.
  */
-bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
-  int debug = 0;
-  GetIntVariable("applybox_debug", &debug);
-  bool success = true;
-  PageSegMode current_psm = GetPageSegMode();
-  SetPageSegMode(mode);
-  SetVariable("classify_enable_learning", "0");
-  const std::unique_ptr<const char[]> text(GetUTF8Text());
-  if (debug) {
-    tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
-  }
-  if (text != nullptr) {
-    PAGE_RES_IT it(page_res_);
-    WERD_RES* word_res = it.word();
-    if (word_res != nullptr) {
-      word_res->word->set_text(wordstr);
-      // Check to see if text matches wordstr.
-      int w = 0;
-      int t;
-      for (t = 0; text[t] != '\0'; ++t) {
-        if (text[t] == '\n' || text[t] == ' ')
-          continue;
-        while (wordstr[w] == ' ') ++w;
-        if (text[t] != wordstr[w])
-          break;
-        ++w;
+    bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
+      int debug = 0;
+      GetIntVariable("applybox_debug", &debug);
+      bool success = true;
+      PageSegMode current_psm = GetPageSegMode();
+      SetPageSegMode(mode);
+      SetVariable("classify_enable_learning", "0");
+      const std::unique_ptr<const char[]> text(GetUTF8Text());
+      if (debug) {
+        tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
       }
-      if (text[t] != '\0' || wordstr[w] != '\0') {
-        // No match.
-        delete page_res_;
-        GenericVector<TBOX> boxes;
-        page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
-        tesseract_->ReSegmentByClassification(page_res_);
-        tesseract_->TidyUp(page_res_);
-        PAGE_RES_IT pr_it(page_res_);
-        if (pr_it.word() == nullptr)
+      if (text != nullptr) {
+        PAGE_RES_IT it(page_res_);
+        WERD_RES* word_res = it.word();
+        if (word_res != nullptr) {
+          word_res->word->set_text(wordstr);
+          // Check to see if text matches wordstr.
+          int w = 0;
+          int t;
+          for (t = 0; text[t] != '\0'; ++t) {
+            if (text[t] == '\n' || text[t] == ' ')
+              continue;
+            while (wordstr[w] == ' ') ++w;
+            if (text[t] != wordstr[w])
+              break;
+            ++w;
+          }
+          if (text[t] != '\0' || wordstr[w] != '\0') {
+            // No match.
+            delete page_res_;
+            GenericVector<TBOX> boxes;
+            page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
+            tesseract_->ReSegmentByClassification(page_res_);
+            tesseract_->TidyUp(page_res_);
+            PAGE_RES_IT pr_it(page_res_);
+            if (pr_it.word() == nullptr)
+              success = false;
+            else
+              word_res = pr_it.word();
+          } else {
+            word_res->BestChoiceToCorrectText();
+          }
+          if (success) {
+            tesseract_->EnableLearning = true;
+            tesseract_->LearnWord(nullptr, word_res);
+          }
+        } else {
           success = false;
-        else
-          word_res = pr_it.word();
+        }
       } else {
-        word_res->BestChoiceToCorrectText();
-      }
-      if (success) {
-        tesseract_->EnableLearning = true;
-        tesseract_->LearnWord(nullptr, word_res);
+        success = false;
       }
-    } else {
-      success = false;
+      SetPageSegMode(current_psm);
+      return success;
     }
-  } else {
-    success = false;
-  }
-  SetPageSegMode(current_psm);
-  return success;
-}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /**
@@ -2179,12 +2346,12 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
  * Afterwards, you must call SetImage or TesseractRect before doing
  * any Recognize or Get* operation.
  */
-void TessBaseAPI::Clear() {
-  if (thresholder_ != nullptr)
-    thresholder_->Clear();
-  ClearResults();
-  if (tesseract_ != nullptr) SetInputImage(nullptr);
-}
+    void TessBaseAPI::Clear() {
+      if (thresholder_ != nullptr)
+        thresholder_->Clear();
+      ClearResults();
+      if (tesseract_ != nullptr) SetInputImage(nullptr);
+    }
 
 /**
  * Close down tesseract and free up all memory. End() is equivalent to
@@ -2192,100 +2359,100 @@ void TessBaseAPI::Clear() {
  * Once End() has been used, none of the other API functions may be used
  * other than Init and anything declared above it in the class definition.
  */
-void TessBaseAPI::End() {
-  Clear();
-  delete thresholder_;
-  thresholder_ = nullptr;
-  delete page_res_;
-  page_res_ = nullptr;
-  delete block_list_;
-  block_list_ = nullptr;
-  if (paragraph_models_ != nullptr) {
-    paragraph_models_->delete_data_pointers();
-    delete paragraph_models_;
-    paragraph_models_ = nullptr;
-  }
-  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
-  delete tesseract_;
-  tesseract_ = nullptr;
-  delete osd_tesseract_;
-  osd_tesseract_ = nullptr;
-  delete equ_detect_;
-  equ_detect_ = nullptr;
-  delete input_file_;
-  input_file_ = nullptr;
-  delete output_file_;
-  output_file_ = nullptr;
-  delete datapath_;
-  datapath_ = nullptr;
-  delete language_;
-  language_ = nullptr;
-}
+    void TessBaseAPI::End() {
+      Clear();
+      delete thresholder_;
+      thresholder_ = nullptr;
+      delete page_res_;
+      page_res_ = nullptr;
+      delete block_list_;
+      block_list_ = nullptr;
+      if (paragraph_models_ != nullptr) {
+        paragraph_models_->delete_data_pointers();
+        delete paragraph_models_;
+        paragraph_models_ = nullptr;
+      }
+      if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
+      delete tesseract_;
+      tesseract_ = nullptr;
+      delete osd_tesseract_;
+      osd_tesseract_ = nullptr;
+      delete equ_detect_;
+      equ_detect_ = nullptr;
+      delete input_file_;
+      input_file_ = nullptr;
+      delete output_file_;
+      output_file_ = nullptr;
+      delete datapath_;
+      datapath_ = nullptr;
+      delete language_;
+      language_ = nullptr;
+    }
 
 // Clear any library-level memory caches.
 // There are a variety of expensive-to-load constant data structures (mostly
 // language dictionaries) that are cached globally -- surviving the Init()
 // and End() of individual TessBaseAPI's.  This function allows the clearing
 // of these caches.
-void TessBaseAPI::ClearPersistentCache() {
-  Dict::GlobalDawgCache()->DeleteUnusedDawgs();
-}
+    void TessBaseAPI::ClearPersistentCache() {
+      Dict::GlobalDawgCache()->DeleteUnusedDawgs();
+    }
 
 /**
  * Check whether a word is valid according to Tesseract's language model
  * returns 0 if the word is invalid, non-zero if valid
  */
-int TessBaseAPI::IsValidWord(const char *word) {
-  return tesseract_->getDict().valid_word(word);
-}
+    int TessBaseAPI::IsValidWord(const char *word) {
+      return tesseract_->getDict().valid_word(word);
+    }
 // Returns true if utf8_character is defined in the UniCharset.
-bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
-    return tesseract_->unicharset.contains_unichar(utf8_character);
-}
+    bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
+      return tesseract_->unicharset.contains_unichar(utf8_character);
+    }
 
 
 // TODO(rays) Obsolete this function and replace with a more aptly named
 // function that returns image coordinates rather than tesseract coordinates.
-bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
-  PageIterator* it = AnalyseLayout();
-  if (it == nullptr) {
-    return false;
-  }
-  int x1, x2, y1, y2;
-  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
-  // Calculate offset and slope (NOTE: Kind of ugly)
-  if (x2 <= x1) x2 = x1 + 1;
-  // Convert the point pair to slope/offset of the baseline (in image coords.)
-  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
-  *out_offset = static_cast<int>(y1 - *out_slope * x1);
-  // Get the y-coord of the baseline at the left and right edges of the
-  // textline's bounding box.
-  int left, top, right, bottom;
-  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
-    delete it;
-    return false;
-  }
-  int left_y = IntCastRounded(*out_slope * left + *out_offset);
-  int right_y = IntCastRounded(*out_slope * right + *out_offset);
-  // Shift the baseline down so it passes through the nearest bottom-corner
-  // of the textline's bounding box. This is the difference between the y
-  // at the lowest (max) edge of the box and the actual box bottom.
-  *out_offset += bottom - std::max(left_y, right_y);
-  // Switch back to bottom-up tesseract coordinates. Requires negation of
-  // the slope and height - offset for the offset.
-  *out_slope = -*out_slope;
-  *out_offset = rect_height_ - *out_offset;
-  delete it;
-
-  return true;
-}
+    bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
+      PageIterator* it = AnalyseLayout();
+      if (it == nullptr) {
+        return false;
+      }
+      int x1, x2, y1, y2;
+      it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
+      // Calculate offset and slope (NOTE: Kind of ugly)
+      if (x2 <= x1) x2 = x1 + 1;
+      // Convert the point pair to slope/offset of the baseline (in image coords.)
+      *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
+      *out_offset = static_cast<int>(y1 - *out_slope * x1);
+      // Get the y-coord of the baseline at the left and right edges of the
+      // textline's bounding box.
+      int left, top, right, bottom;
+      if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
+        delete it;
+        return false;
+      }
+      int left_y = IntCastRounded(*out_slope * left + *out_offset);
+      int right_y = IntCastRounded(*out_slope * right + *out_offset);
+      // Shift the baseline down so it passes through the nearest bottom-corner
+      // of the textline's bounding box. This is the difference between the y
+      // at the lowest (max) edge of the box and the actual box bottom.
+      *out_offset += bottom - std::max(left_y, right_y);
+      // Switch back to bottom-up tesseract coordinates. Requires negation of
+      // the slope and height - offset for the offset.
+      *out_slope = -*out_slope;
+      *out_offset = rect_height_ - *out_offset;
+      delete it;
+
+      return true;
+    }
 
 /** Sets Dict::letter_is_okay_ function to point to the given function. */
-void TessBaseAPI::SetDictFunc(DictFunc f) {
-  if (tesseract_ != nullptr) {
-    tesseract_->getDict().letter_is_okay_ = f;
-  }
-}
+    void TessBaseAPI::SetDictFunc(DictFunc f) {
+      if (tesseract_ != nullptr) {
+        tesseract_->getDict().letter_is_okay_ = f;
+      }
+    }
 
 /**
  * Sets Dict::probability_in_context_ function to point to the given
@@ -2295,35 +2462,35 @@ void TessBaseAPI::SetDictFunc(DictFunc f) {
  * "character" (in general a utf-8 string), given the context of a previous
  * utf-8 string.
  */
-void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
-  if (tesseract_ != nullptr) {
-    tesseract_->getDict().probability_in_context_ = f;
-    // Set it for the sublangs too.
-    int num_subs = tesseract_->num_sub_langs();
-    for (int i = 0; i < num_subs; ++i) {
-      tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
+    void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
+      if (tesseract_ != nullptr) {
+        tesseract_->getDict().probability_in_context_ = f;
+        // Set it for the sublangs too.
+        int num_subs = tesseract_->num_sub_langs();
+        for (int i = 0; i < num_subs; ++i) {
+          tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
+        }
+      }
     }
-  }
-}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /** Sets Wordrec::fill_lattice_ function to point to the given function. */
-void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
-  if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
-}
+    void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
+      if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
+    }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /** Common code for setting the image. */
-bool TessBaseAPI::InternalSetImage() {
-  if (tesseract_ == nullptr) {
-    tprintf("Please call Init before attempting to set an image.\n");
-    return false;
-  }
-  if (thresholder_ == nullptr)
-    thresholder_ = new ImageThresholder;
-  ClearResults();
-  return true;
-}
+    bool TessBaseAPI::InternalSetImage() {
+      if (tesseract_ == nullptr) {
+        tprintf("Please call Init before attempting to set an image.\n");
+        return false;
+      }
+      if (thresholder_ == nullptr)
+        thresholder_ = new ImageThresholder;
+      ClearResults();
+      return true;
+    }
 
 /**
  * Run the thresholder to make the thresholded image, returned in pix,
@@ -2331,155 +2498,155 @@ bool TessBaseAPI::InternalSetImage() {
  * to an existing pixDestroyable Pix.
  * The usual argument to Threshold is Tesseract::mutable_pix_binary().
  */
-bool TessBaseAPI::Threshold(Pix** pix) {
-  ASSERT_HOST(pix != nullptr);
-  if (*pix != nullptr)
-    pixDestroy(pix);
-  // Zero resolution messes up the algorithms, so make sure it is credible.
-  int user_dpi = 0;
-  bool a = GetIntVariable("user_defined_dpi", &user_dpi);
-  int y_res = thresholder_->GetScaledYResolution();
-  if (user_dpi && (user_dpi < kMinCredibleResolution ||
-      user_dpi > kMaxCredibleResolution)) {
-    tprintf("Warning: User defined image dpi is outside of expected range "
-            "(%d - %d)!\n",
-            kMinCredibleResolution, kMaxCredibleResolution);
-  }
-  // Always use user defined dpi
-  if (user_dpi) {
-    thresholder_->SetSourceYResolution(user_dpi);
-  } else if (y_res < kMinCredibleResolution ||
-             y_res > kMaxCredibleResolution) {
-    tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
-            y_res, kMinCredibleResolution);
-    thresholder_->SetSourceYResolution(kMinCredibleResolution);
-  }
-  PageSegMode pageseg_mode =
-      static_cast<PageSegMode>(
-          static_cast<int>(tesseract_->tessedit_pageseg_mode));
-  if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
-  thresholder_->GetImageSizes(&rect_left_, &rect_top_,
-                              &rect_width_, &rect_height_,
-                              &image_width_, &image_height_);
-  if (!thresholder_->IsBinary()) {
-    tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
-    tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
-  } else {
-    tesseract_->set_pix_thresholds(nullptr);
-    tesseract_->set_pix_grey(nullptr);
-  }
-  // Set the internal resolution that is used for layout parameters from the
-  // estimated resolution, rather than the image resolution, which may be
-  // fabricated, but we will use the image resolution, if there is one, to
-  // report output point sizes.
-  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
-                                  kMinCredibleResolution,
-                                  kMaxCredibleResolution);
-  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
-    tprintf("Estimated internal resolution %d out of range! "
-            "Corrected to %d.\n",
-            thresholder_->GetScaledEstimatedResolution(), estimated_res);
-  }
-  tesseract_->set_source_resolution(estimated_res);
-  SavePixForCrash(estimated_res, *pix);
-  return true;
-}
+    bool TessBaseAPI::Threshold(Pix** pix) {
+      ASSERT_HOST(pix != nullptr);
+      if (*pix != nullptr)
+        pixDestroy(pix);
+      // Zero resolution messes up the algorithms, so make sure it is credible.
+      int user_dpi = 0;
+      bool a = GetIntVariable("user_defined_dpi", &user_dpi);
+      int y_res = thresholder_->GetScaledYResolution();
+      if (user_dpi && (user_dpi < kMinCredibleResolution ||
+                       user_dpi > kMaxCredibleResolution)) {
+        tprintf("Warning: User defined image dpi is outside of expected range "
+                "(%d - %d)!\n",
+                kMinCredibleResolution, kMaxCredibleResolution);
+      }
+      // Always use user defined dpi
+      if (user_dpi) {
+        thresholder_->SetSourceYResolution(user_dpi);
+      } else if (y_res < kMinCredibleResolution ||
+                 y_res > kMaxCredibleResolution) {
+        tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
+                y_res, kMinCredibleResolution);
+        thresholder_->SetSourceYResolution(kMinCredibleResolution);
+      }
+      PageSegMode pageseg_mode =
+              static_cast<PageSegMode>(
+                      static_cast<int>(tesseract_->tessedit_pageseg_mode));
+      if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
+      thresholder_->GetImageSizes(&rect_left_, &rect_top_,
+                                  &rect_width_, &rect_height_,
+                                  &image_width_, &image_height_);
+      if (!thresholder_->IsBinary()) {
+        tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
+        tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
+      } else {
+        tesseract_->set_pix_thresholds(nullptr);
+        tesseract_->set_pix_grey(nullptr);
+      }
+      // Set the internal resolution that is used for layout parameters from the
+      // estimated resolution, rather than the image resolution, which may be
+      // fabricated, but we will use the image resolution, if there is one, to
+      // report output point sizes.
+      int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
+                                      kMinCredibleResolution,
+                                      kMaxCredibleResolution);
+      if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
+        tprintf("Estimated internal resolution %d out of range! "
+                "Corrected to %d.\n",
+                thresholder_->GetScaledEstimatedResolution(), estimated_res);
+      }
+      tesseract_->set_source_resolution(estimated_res);
+      SavePixForCrash(estimated_res, *pix);
+      return true;
+    }
 
 /** Find lines from the image making the BLOCK_LIST. */
-int TessBaseAPI::FindLines() {
-  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
-    tprintf("Please call SetImage before attempting recognition.\n");
-    return -1;
-  }
-  if (recognition_done_)
-    ClearResults();
-  if (!block_list_->empty()) {
-    return 0;
-  }
-  if (tesseract_ == nullptr) {
-    tesseract_ = new Tesseract;
-  #ifndef DISABLED_LEGACY_ENGINE
-    tesseract_->InitAdaptiveClassifier(nullptr);
-  #endif
-  }
-  if (tesseract_->pix_binary() == nullptr &&
-      !Threshold(tesseract_->mutable_pix_binary())) {
-    return -1;
-  }
+    int TessBaseAPI::FindLines() {
+      if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
+        tprintf("Please call SetImage before attempting recognition.\n");
+        return -1;
+      }
+      if (recognition_done_)
+        ClearResults();
+      if (!block_list_->empty()) {
+        return 0;
+      }
+      if (tesseract_ == nullptr) {
+        tesseract_ = new Tesseract;
+#ifndef DISABLED_LEGACY_ENGINE
+        tesseract_->InitAdaptiveClassifier(nullptr);
+#endif
+      }
+      if (tesseract_->pix_binary() == nullptr &&
+          !Threshold(tesseract_->mutable_pix_binary())) {
+        return -1;
+      }
 
-  tesseract_->PrepareForPageseg();
+      tesseract_->PrepareForPageseg();
 
 #ifndef DISABLED_LEGACY_ENGINE
-  if (tesseract_->textord_equation_detect) {
-    if (equ_detect_ == nullptr && datapath_ != nullptr) {
-      equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
-    }
-    if (equ_detect_ == nullptr) {
-      tprintf("Warning: Could not set equation detector\n");
-    } else {
-      tesseract_->SetEquationDetect(equ_detect_);
-    }
-  }
+      if (tesseract_->textord_equation_detect) {
+        if (equ_detect_ == nullptr && datapath_ != nullptr) {
+          equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
+        }
+        if (equ_detect_ == nullptr) {
+          tprintf("Warning: Could not set equation detector\n");
+        } else {
+          tesseract_->SetEquationDetect(equ_detect_);
+        }
+      }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-  Tesseract* osd_tess = osd_tesseract_;
-  OSResults osr;
-  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
-      osd_tess == nullptr) {
-    if (strcmp(language_->string(), "osd") == 0) {
-      osd_tess = tesseract_;
-    } else {
-      osd_tesseract_ = new Tesseract;
-      TessdataManager mgr(reader_);
-      if (datapath_ == nullptr) {
-        tprintf("Warning: Auto orientation and script detection requested,"
-                " but data path is undefined\n");
-        delete osd_tesseract_;
-        osd_tesseract_ = nullptr;
-      } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
-                                                "osd", OEM_TESSERACT_ONLY,
-                                                nullptr, 0, nullptr, nullptr,
-                                                false, &mgr) == 0) {
-        osd_tess = osd_tesseract_;
-        osd_tesseract_->set_source_resolution(
-            thresholder_->GetSourceYResolution());
-      } else {
-        tprintf("Warning: Auto orientation and script detection requested,"
-                " but osd language failed to load\n");
-        delete osd_tesseract_;
-        osd_tesseract_ = nullptr;
+      Tesseract* osd_tess = osd_tesseract_;
+      OSResults osr;
+      if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
+          osd_tess == nullptr) {
+        if (strcmp(language_->string(), "osd") == 0) {
+          osd_tess = tesseract_;
+        } else {
+          osd_tesseract_ = new Tesseract;
+          TessdataManager mgr(reader_);
+          if (datapath_ == nullptr) {
+            tprintf("Warning: Auto orientation and script detection requested,"
+                    " but data path is undefined\n");
+            delete osd_tesseract_;
+            osd_tesseract_ = nullptr;
+          } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
+                                                    "osd", OEM_TESSERACT_ONLY,
+                                                    nullptr, 0, nullptr, nullptr,
+                                                    false, &mgr) == 0) {
+            osd_tess = osd_tesseract_;
+            osd_tesseract_->set_source_resolution(
+                    thresholder_->GetSourceYResolution());
+          } else {
+            tprintf("Warning: Auto orientation and script detection requested,"
+                    " but osd language failed to load\n");
+            delete osd_tesseract_;
+            osd_tesseract_ = nullptr;
+          }
+        }
       }
-    }
-  }
 
-  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
-    return -1;
+      if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
+        return -1;
 
-  // If Devanagari is being recognized, we use different images for page seg
-  // and for OCR.
-  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
-  return 0;
-}
+      // If Devanagari is being recognized, we use different images for page seg
+      // and for OCR.
+      tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
+      return 0;
+    }
 
 /** Delete the pageres and clear the block list ready for a new page. */
-void TessBaseAPI::ClearResults() {
-  if (tesseract_ != nullptr) {
-    tesseract_->Clear();
-  }
-  delete page_res_;
-  page_res_ = nullptr;
-  recognition_done_ = false;
-  if (block_list_ == nullptr)
-    block_list_ = new BLOCK_LIST;
-  else
-    block_list_->clear();
-  if (paragraph_models_ != nullptr) {
-    paragraph_models_->delete_data_pointers();
-    delete paragraph_models_;
-    paragraph_models_ = nullptr;
-  }
-  SavePixForCrash(0, nullptr);
-}
+    void TessBaseAPI::ClearResults() {
+      if (tesseract_ != nullptr) {
+        tesseract_->Clear();
+      }
+      delete page_res_;
+      page_res_ = nullptr;
+      recognition_done_ = false;
+      if (block_list_ == nullptr)
+        block_list_ = new BLOCK_LIST;
+      else
+        block_list_->clear();
+      if (paragraph_models_ != nullptr) {
+        paragraph_models_->delete_data_pointers();
+        delete paragraph_models_;
+        paragraph_models_ = nullptr;
+      }
+      SavePixForCrash(0, nullptr);
+    }
 
 /**
  * Return the length of the output text string, as UTF8, assuming
@@ -2488,55 +2655,55 @@ void TessBaseAPI::ClearResults() {
  * character.
  * Also return the number of recognized blobs in blob_count.
  */
-int TessBaseAPI::TextLength(int* blob_count) {
-  if (tesseract_ == nullptr || page_res_ == nullptr)
-    return 0;
-
-  PAGE_RES_IT   page_res_it(page_res_);
-  int total_length = 2;
-  int total_blobs = 0;
-  // Iterate over the data structures to extract the recognition result.
-  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
-       page_res_it.forward()) {
-    WERD_RES *word = page_res_it.word();
-    WERD_CHOICE* choice = word->best_choice;
-    if (choice != nullptr) {
-      total_blobs += choice->length() + 2;
-      total_length += choice->unichar_string().length() + 2;
-      for (int i = 0; i < word->reject_map.length(); ++i) {
-        if (word->reject_map[i].rejected())
-          ++total_length;
+    int TessBaseAPI::TextLength(int* blob_count) {
+      if (tesseract_ == nullptr || page_res_ == nullptr)
+        return 0;
+
+      PAGE_RES_IT   page_res_it(page_res_);
+      int total_length = 2;
+      int total_blobs = 0;
+      // Iterate over the data structures to extract the recognition result.
+      for (page_res_it.restart_page(); page_res_it.word () != nullptr;
+           page_res_it.forward()) {
+        WERD_RES *word = page_res_it.word();
+        WERD_CHOICE* choice = word->best_choice;
+        if (choice != nullptr) {
+          total_blobs += choice->length() + 2;
+          total_length += choice->unichar_string().length() + 2;
+          for (int i = 0; i < word->reject_map.length(); ++i) {
+            if (word->reject_map[i].rejected())
+              ++total_length;
+          }
+        }
       }
+      if (blob_count != nullptr)
+        *blob_count = total_blobs;
+      return total_length;
     }
-  }
-  if (blob_count != nullptr)
-    *blob_count = total_blobs;
-  return total_length;
-}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /**
  * Estimates the Orientation And Script of the image.
  * Returns true if the image was processed successfully.
  */
-bool TessBaseAPI::DetectOS(OSResults* osr) {
-  if (tesseract_ == nullptr)
-    return false;
-  ClearResults();
-  if (tesseract_->pix_binary() == nullptr &&
-      !Threshold(tesseract_->mutable_pix_binary())) {
-    return false;
-  }
+    bool TessBaseAPI::DetectOS(OSResults* osr) {
+      if (tesseract_ == nullptr)
+        return false;
+      ClearResults();
+      if (tesseract_->pix_binary() == nullptr &&
+          !Threshold(tesseract_->mutable_pix_binary())) {
+        return false;
+      }
 
-  if (input_file_ == nullptr)
-    input_file_ = new STRING(kInputFile);
-  return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
-}
+      if (input_file_ == nullptr)
+        input_file_ = new STRING(kInputFile);
+      return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
+    }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-void TessBaseAPI::set_min_orientation_margin(double margin) {
-  tesseract_->min_orientation_margin.set_value(margin);
-}
+    void TessBaseAPI::set_min_orientation_margin(double margin) {
+      tesseract_->min_orientation_margin.set_value(margin);
+    }
 
 /**
  * Return text orientation of each block as determined in an earlier page layout
@@ -2552,98 +2719,98 @@ void TessBaseAPI::set_min_orientation_margin(double margin) {
  * be less than the total number of blocks. The ordering is intended to be
  * consistent with GetTextLines().
  */
-void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
-                                           bool** vertical_writing) {
-  delete[] *block_orientation;
-  *block_orientation = nullptr;
-  delete[] *vertical_writing;
-  *vertical_writing = nullptr;
-  BLOCK_IT block_it(block_list_);
-
-  block_it.move_to_first();
-  int num_blocks = 0;
-  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
-    if (!block_it.data()->pdblk.poly_block()->IsText()) {
-      continue;
-    }
-    ++num_blocks;
-  }
-  if (!num_blocks) {
-    tprintf("WARNING: Found no blocks\n");
-    return;
-  }
-  *block_orientation = new int[num_blocks];
-  *vertical_writing = new bool[num_blocks];
-  block_it.move_to_first();
-  int i = 0;
-  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
-       block_it.forward()) {
-    if (!block_it.data()->pdblk.poly_block()->IsText()) {
-      continue;
-    }
-    FCOORD re_rotation = block_it.data()->re_rotation();
-    float re_theta = re_rotation.angle();
-    FCOORD classify_rotation = block_it.data()->classify_rotation();
-    float classify_theta = classify_rotation.angle();
-    double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
-    if (rot_theta < 0) rot_theta += 4;
-    int num_rotations = static_cast<int>(rot_theta + 0.5);
-    (*block_orientation)[i] = num_rotations;
-    // The classify_rotation is non-zero only if the text has vertical
-    // writing direction.
-    (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
-    ++i;
-  }
-}
-
-
-void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
-  int debug_level = 0;
-  GetIntVariable("paragraph_debug_level", &debug_level);
-  if (paragraph_models_ == nullptr)
-    paragraph_models_ = new GenericVector<ParagraphModel*>;
-  MutableIterator *result_it = GetMutableIterator();
-  do {  // Detect paragraphs for this block
-    GenericVector<ParagraphModel *> models;
-    ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
-                                  result_it, &models);
-    *paragraph_models_ += models;
-  } while (result_it->Next(RIL_BLOCK));
-  delete result_it;
-}
+    void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
+                                               bool** vertical_writing) {
+      delete[] *block_orientation;
+      *block_orientation = nullptr;
+      delete[] *vertical_writing;
+      *vertical_writing = nullptr;
+      BLOCK_IT block_it(block_list_);
+
+      block_it.move_to_first();
+      int num_blocks = 0;
+      for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+        if (!block_it.data()->pdblk.poly_block()->IsText()) {
+          continue;
+        }
+        ++num_blocks;
+      }
+      if (!num_blocks) {
+        tprintf("WARNING: Found no blocks\n");
+        return;
+      }
+      *block_orientation = new int[num_blocks];
+      *vertical_writing = new bool[num_blocks];
+      block_it.move_to_first();
+      int i = 0;
+      for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+           block_it.forward()) {
+        if (!block_it.data()->pdblk.poly_block()->IsText()) {
+          continue;
+        }
+        FCOORD re_rotation = block_it.data()->re_rotation();
+        float re_theta = re_rotation.angle();
+        FCOORD classify_rotation = block_it.data()->classify_rotation();
+        float classify_theta = classify_rotation.angle();
+        double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
+        if (rot_theta < 0) rot_theta += 4;
+        int num_rotations = static_cast<int>(rot_theta + 0.5);
+        (*block_orientation)[i] = num_rotations;
+        // The classify_rotation is non-zero only if the text has vertical
+        // writing direction.
+        (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
+        ++i;
+      }
+    }
+
+
+    void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
+      int debug_level = 0;
+      GetIntVariable("paragraph_debug_level", &debug_level);
+      if (paragraph_models_ == nullptr)
+        paragraph_models_ = new GenericVector<ParagraphModel*>;
+      MutableIterator *result_it = GetMutableIterator();
+      do {  // Detect paragraphs for this block
+        GenericVector<ParagraphModel *> models;
+        ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
+                                      result_it, &models);
+        *paragraph_models_ += models;
+      } while (result_it->Next(RIL_BLOCK));
+      delete result_it;
+    }
 
 /** This method returns the string form of the specified unichar. */
-const char* TessBaseAPI::GetUnichar(int unichar_id) {
-  return tesseract_->unicharset.id_to_unichar(unichar_id);
-}
+    const char* TessBaseAPI::GetUnichar(int unichar_id) {
+      return tesseract_->unicharset.id_to_unichar(unichar_id);
+    }
 
 /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-const Dawg *TessBaseAPI::GetDawg(int i) const {
-  if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
-  return tesseract_->getDict().GetDawg(i);
-}
+    const Dawg *TessBaseAPI::GetDawg(int i) const {
+      if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
+      return tesseract_->getDict().GetDawg(i);
+    }
 
 /** Return the number of dawgs loaded into tesseract_ object. */
-int TessBaseAPI::NumDawgs() const {
-  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
-}
+    int TessBaseAPI::NumDawgs() const {
+      return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
+    }
 
 /** Escape a char string - remove <>&"' with HTML codes. */
-STRING HOcrEscape(const char* text) {
-  STRING ret;
-  const char *ptr;
-  for (ptr = text; *ptr; ptr++) {
-    switch (*ptr) {
-      case '<': ret += "&lt;"; break;
-      case '>': ret += "&gt;"; break;
-      case '&': ret += "&amp;"; break;
-      case '"': ret += "&quot;"; break;
-      case '\'': ret += "&#39;"; break;
-      default: ret += *ptr;
+    STRING HOcrEscape(const char* text) {
+      STRING ret;
+      const char *ptr;
+      for (ptr = text; *ptr; ptr++) {
+        switch (*ptr) {
+          case '<': ret += "&lt;"; break;
+          case '>': ret += "&gt;"; break;
+          case '&': ret += "&amp;"; break;
+          case '"': ret += "&quot;"; break;
+          case '\'': ret += "&#39;"; break;
+          default: ret += *ptr;
+        }
+      }
+      return ret;
     }
-  }
-  return ret;
-}
 
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -2653,271 +2820,271 @@ STRING HOcrEscape(const char* text) {
 // Ocropus add-ons.
 
 /** Find lines from the image making the BLOCK_LIST. */
-BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
-  ASSERT_HOST(FindLines() == 0);
-  BLOCK_LIST* result = block_list_;
-  block_list_ = nullptr;
-  return result;
-}
+    BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
+      ASSERT_HOST(FindLines() == 0);
+      BLOCK_LIST* result = block_list_;
+      block_list_ = nullptr;
+      return result;
+    }
 
 /**
  * Delete a block list.
  * This is to keep BLOCK_LIST pointer opaque
  * and let go of including the other headers.
  */
-void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
-  delete block_list;
-}
-
-
-ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
-                                 float xheight,
-                                 float descender,
-                                 float ascender) {
-  int32_t xstarts[] = {-32000};
-  double quad_coeffs[] = {0, 0, baseline};
-  return new ROW(1,
-                 xstarts,
-                 quad_coeffs,
-                 xheight,
-                 ascender - (baseline + xheight),
-                 descender - baseline,
-                 0,
-                 0);
-}
+    void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
+      delete block_list;
+    }
+
+
+    ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
+                                     float xheight,
+                                     float descender,
+                                     float ascender) {
+      int32_t xstarts[] = {-32000};
+      double quad_coeffs[] = {0, 0, baseline};
+      return new ROW(1,
+                     xstarts,
+                     quad_coeffs,
+                     xheight,
+                     ascender - (baseline + xheight),
+                     descender - baseline,
+                     0,
+                     0);
+    }
 
 /** Creates a TBLOB* from the whole pix. */
-TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
-  int width = pixGetWidth(pix);
-  int height = pixGetHeight(pix);
-  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
-
-  // Create C_BLOBs from the page
-  extract_edges(pix, &block);
-
-  // Merge all C_BLOBs
-  C_BLOB_LIST *list = block.blob_list();
-  C_BLOB_IT c_blob_it(list);
-  if (c_blob_it.empty())
-    return nullptr;
-  // Move all the outlines to the first blob.
-  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
-  for (c_blob_it.forward();
-       !c_blob_it.at_first();
-       c_blob_it.forward()) {
-      C_BLOB *c_blob = c_blob_it.data();
-      ol_it.add_list_after(c_blob->out_list());
-  }
-  // Convert the first blob to the output TBLOB.
-  return TBLOB::PolygonalCopy(false, c_blob_it.data());
-}
+    TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
+      int width = pixGetWidth(pix);
+      int height = pixGetHeight(pix);
+      BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
+
+      // Create C_BLOBs from the page
+      extract_edges(pix, &block);
+
+      // Merge all C_BLOBs
+      C_BLOB_LIST *list = block.blob_list();
+      C_BLOB_IT c_blob_it(list);
+      if (c_blob_it.empty())
+        return nullptr;
+      // Move all the outlines to the first blob.
+      C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
+      for (c_blob_it.forward();
+           !c_blob_it.at_first();
+           c_blob_it.forward()) {
+        C_BLOB *c_blob = c_blob_it.data();
+        ol_it.add_list_after(c_blob->out_list());
+      }
+      // Convert the first blob to the output TBLOB.
+      return TBLOB::PolygonalCopy(false, c_blob_it.data());
+    }
 
 /**
  * This method baseline normalizes a TBLOB in-place. The input row is used
  * for normalization. The denorm is an optional parameter in which the
  * normalization-antidote is returned.
  */
-void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
-  TBOX box = tblob->bounding_box();
-  float x_center = (box.left() + box.right()) / 2.0f;
-  float baseline = row->base_line(x_center);
-  float scale = kBlnXHeight / row->x_height();
-  tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
-                   0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
-}
+    void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
+      TBOX box = tblob->bounding_box();
+      float x_center = (box.left() + box.right()) / 2.0f;
+      float baseline = row->base_line(x_center);
+      float scale = kBlnXHeight / row->x_height();
+      tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
+                       0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
+    }
 
 /**
  * Return a TBLOB * from the whole pix.
  * To be freed later with delete.
  */
-static TBLOB *make_tesseract_blob(float baseline, float xheight,
-                                  float descender, float ascender,
-                                  bool numeric_mode, Pix* pix) {
-  TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
-
-  // Normalize TBLOB
-  ROW *row =
-      TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
-  TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
-  delete row;
-  return tblob;
-}
+    static TBLOB *make_tesseract_blob(float baseline, float xheight,
+                                      float descender, float ascender,
+                                      bool numeric_mode, Pix* pix) {
+      TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
+
+      // Normalize TBLOB
+      ROW *row =
+              TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
+      TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
+      delete row;
+      return tblob;
+    }
 
 /**
  * Adapt to recognize the current image as the given character.
  * The image must be preloaded into pix_binary_ and be just an image
  * of a single character.
  */
-void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
-                                   int length,
-                                   float baseline,
-                                   float xheight,
-                                   float descender,
-                                   float ascender) {
-  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
-  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
-                                    tesseract_->classify_bln_numeric_mode,
-                                    tesseract_->pix_binary());
-  float threshold;
-  float best_rating = -100;
-
-
-  // Classify to get a raw choice.
-  BLOB_CHOICE_LIST choices;
-  tesseract_->AdaptiveClassifier(blob, &choices);
-  BLOB_CHOICE_IT choice_it;
-  choice_it.set_to_list(&choices);
-  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
-       choice_it.forward()) {
-    if (choice_it.data()->rating() > best_rating) {
-      best_rating = choice_it.data()->rating();
-    }
-  }
+    void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
+                                       int length,
+                                       float baseline,
+                                       float xheight,
+                                       float descender,
+                                       float ascender) {
+      UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
+      TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
+                                        tesseract_->classify_bln_numeric_mode,
+                                        tesseract_->pix_binary());
+      float threshold;
+      float best_rating = -100;
+
+
+      // Classify to get a raw choice.
+      BLOB_CHOICE_LIST choices;
+      tesseract_->AdaptiveClassifier(blob, &choices);
+      BLOB_CHOICE_IT choice_it;
+      choice_it.set_to_list(&choices);
+      for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
+           choice_it.forward()) {
+        if (choice_it.data()->rating() > best_rating) {
+          best_rating = choice_it.data()->rating();
+        }
+      }
 
-  threshold = tesseract_->matcher_good_threshold;
+      threshold = tesseract_->matcher_good_threshold;
+
+      if (blob->outlines)
+        tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
+                                tesseract_->AdaptedTemplates);
+      delete blob;
+    }
 
-  if (blob->outlines)
-    tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
-                            tesseract_->AdaptedTemplates);
-  delete blob;
-}
 
+    PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
+      PAGE_RES *page_res = new PAGE_RES(false, block_list,
+                                        &(tesseract_->prev_word_best_choice_));
+      tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
+      return page_res;
+    }
 
-PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
-  PAGE_RES *page_res = new PAGE_RES(false, block_list,
+    PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
+                                            PAGE_RES* pass1_result) {
+      if (!pass1_result)
+        pass1_result = new PAGE_RES(false, block_list,
                                     &(tesseract_->prev_word_best_choice_));
-  tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
-  return page_res;
-}
-
-PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
-                                        PAGE_RES* pass1_result) {
-  if (!pass1_result)
-    pass1_result = new PAGE_RES(false, block_list,
-                                &(tesseract_->prev_word_best_choice_));
-  tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
-  return pass1_result;
-}
-
-struct TESS_CHAR : ELIST_LINK {
-  char *unicode_repr;
-  int length;  // of unicode_repr
-  float cost;
-  TBOX box;
-
-  TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
-    length = (len == -1 ? strlen(repr) : len);
-    unicode_repr = new char[length + 1];
-    strncpy(unicode_repr, repr, length);
-  }
+      tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
+      return pass1_result;
+    }
 
-  TESS_CHAR()
-    : unicode_repr(nullptr),
-      length(0),
-      cost(0.0f)
-  {  // Satisfies ELISTIZE.
-  }
-  ~TESS_CHAR() {
-    delete [] unicode_repr;
-  }
-};
+    struct TESS_CHAR : ELIST_LINK {
+        char *unicode_repr;
+        int length;  // of unicode_repr
+        float cost;
+        TBOX box;
+
+        TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
+          length = (len == -1 ? strlen(repr) : len);
+          unicode_repr = new char[length + 1];
+          strncpy(unicode_repr, repr, length);
+        }
 
-ELISTIZEH(TESS_CHAR)
-ELISTIZE(TESS_CHAR)
+        TESS_CHAR()
+                : unicode_repr(nullptr),
+                  length(0),
+                  cost(0.0f)
+        {  // Satisfies ELISTIZE.
+        }
+        ~TESS_CHAR() {
+          delete [] unicode_repr;
+        }
+    };
 
-static void add_space(TESS_CHAR_IT* it) {
-  TESS_CHAR *t = new TESS_CHAR(0, " ");
-  it->add_after_then_move(t);
-}
+    ELISTIZEH(TESS_CHAR)
+    ELISTIZE(TESS_CHAR)
 
+    static void add_space(TESS_CHAR_IT* it) {
+      TESS_CHAR *t = new TESS_CHAR(0, " ");
+      it->add_after_then_move(t);
+    }
 
-static float rating_to_cost(float rating) {
-  rating = 100 + rating;
-  // cuddled that to save from coverage profiler
-  // (I have never seen ratings worse than -100,
-  //  but the check won't hurt)
-  if (rating < 0) rating = 0;
-  return rating;
-}
+
+    static float rating_to_cost(float rating) {
+      rating = 100 + rating;
+      // cuddled that to save from coverage profiler
+      // (I have never seen ratings worse than -100,
+      //  but the check won't hurt)
+      if (rating < 0) rating = 0;
+      return rating;
+    }
 
 /**
  * Extract the OCR results, costs (penalty points for uncertainty),
  * and the bounding boxes of the characters.
  */
-static void extract_result(TESS_CHAR_IT* out,
-                           PAGE_RES* page_res) {
-  PAGE_RES_IT page_res_it(page_res);
-  int word_count = 0;
-  while (page_res_it.word() != nullptr) {
-    WERD_RES *word = page_res_it.word();
-    const char *str = word->best_choice->unichar_string().string();
-    const char *len = word->best_choice->unichar_lengths().string();
-    TBOX real_rect = word->word->bounding_box();
-
-    if (word_count)
-      add_space(out);
-    int n = strlen(len);
-    for (int i = 0; i < n; i++) {
-      TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
-                                    str, *len);
-      tc->box = real_rect.intersection(word->box_word->BlobBox(i));
-      out->add_after_then_move(tc);
-       str += *len;
-      len++;
-    }
-    page_res_it.forward();
-    word_count++;
-  }
-}
+    static void extract_result(TESS_CHAR_IT* out,
+                               PAGE_RES* page_res) {
+      PAGE_RES_IT page_res_it(page_res);
+      int word_count = 0;
+      while (page_res_it.word() != nullptr) {
+        WERD_RES *word = page_res_it.word();
+        const char *str = word->best_choice->unichar_string().string();
+        const char *len = word->best_choice->unichar_lengths().string();
+        TBOX real_rect = word->word->bounding_box();
+
+        if (word_count)
+          add_space(out);
+        int n = strlen(len);
+        for (int i = 0; i < n; i++) {
+          TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
+                                        str, *len);
+          tc->box = real_rect.intersection(word->box_word->BlobBox(i));
+          out->add_after_then_move(tc);
+          str += *len;
+          len++;
+        }
+        page_res_it.forward();
+        word_count++;
+      }
+    }
 
 /**
  * Extract the OCR results, costs (penalty points for uncertainty),
  * and the bounding boxes of the characters.
  */
-int TessBaseAPI::TesseractExtractResult(char** text,
-                                        int** lengths,
-                                        float** costs,
-                                        int** x0,
-                                        int** y0,
-                                        int** x1,
-                                        int** y1,
-                                        PAGE_RES* page_res) {
-  TESS_CHAR_LIST tess_chars;
-  TESS_CHAR_IT tess_chars_it(&tess_chars);
-  extract_result(&tess_chars_it, page_res);
-  tess_chars_it.move_to_first();
-  int n = tess_chars.length();
-  int text_len = 0;
-  *lengths = new int[n];
-  *costs = new float[n];
-  *x0 = new int[n];
-  *y0 = new int[n];
-  *x1 = new int[n];
-  *y1 = new int[n];
-  int i = 0;
-  for (tess_chars_it.mark_cycle_pt();
-       !tess_chars_it.cycled_list();
-       tess_chars_it.forward(), i++) {
-    TESS_CHAR *tc = tess_chars_it.data();
-    text_len += (*lengths)[i] = tc->length;
-    (*costs)[i] = tc->cost;
-    (*x0)[i] = tc->box.left();
-    (*y0)[i] = tc->box.bottom();
-    (*x1)[i] = tc->box.right();
-    (*y1)[i] = tc->box.top();
-  }
-  char *p = *text = new char[text_len];
-
-  tess_chars_it.move_to_first();
-  for (tess_chars_it.mark_cycle_pt();
-        !tess_chars_it.cycled_list();
-       tess_chars_it.forward()) {
-    TESS_CHAR *tc = tess_chars_it.data();
-    strncpy(p, tc->unicode_repr, tc->length);
-    p += tc->length;
-  }
-  return n;
-}
+    int TessBaseAPI::TesseractExtractResult(char** text,
+                                            int** lengths,
+                                            float** costs,
+                                            int** x0,
+                                            int** y0,
+                                            int** x1,
+                                            int** y1,
+                                            PAGE_RES* page_res) {
+      TESS_CHAR_LIST tess_chars;
+      TESS_CHAR_IT tess_chars_it(&tess_chars);
+      extract_result(&tess_chars_it, page_res);
+      tess_chars_it.move_to_first();
+      int n = tess_chars.length();
+      int text_len = 0;
+      *lengths = new int[n];
+      *costs = new float[n];
+      *x0 = new int[n];
+      *y0 = new int[n];
+      *x1 = new int[n];
+      *y1 = new int[n];
+      int i = 0;
+      for (tess_chars_it.mark_cycle_pt();
+           !tess_chars_it.cycled_list();
+           tess_chars_it.forward(), i++) {
+        TESS_CHAR *tc = tess_chars_it.data();
+        text_len += (*lengths)[i] = tc->length;
+        (*costs)[i] = tc->cost;
+        (*x0)[i] = tc->box.left();
+        (*y0)[i] = tc->box.bottom();
+        (*x1)[i] = tc->box.right();
+        (*y1)[i] = tc->box.top();
+      }
+      char *p = *text = new char[text_len];
+
+      tess_chars_it.move_to_first();
+      for (tess_chars_it.mark_cycle_pt();
+           !tess_chars_it.cycled_list();
+           tess_chars_it.forward()) {
+        TESS_CHAR *tc = tess_chars_it.data();
+        strncpy(p, tc->unicode_repr, tc->length);
+        p += tc->length;
+      }
+      return n;
+    }
 
 /** This method returns the features associated with the input blob. */
 // The resulting features are returned in int_features, which must be
@@ -2926,80 +3093,80 @@ int TessBaseAPI::TesseractExtractResult(char** text,
 // On return feature_outline_index is filled with an index of the outline
 // corresponding to each feature in int_features.
 // TODO(rays) Fix the caller to out outline_counts instead.
-void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
-                                     INT_FEATURE_STRUCT* int_features,
-                                     int* num_features,
-                                     int* feature_outline_index) {
-  GenericVector<int> outline_counts;
-  GenericVector<INT_FEATURE_STRUCT> bl_features;
-  GenericVector<INT_FEATURE_STRUCT> cn_features;
-  INT_FX_RESULT_STRUCT fx_info;
-  tesseract_->ExtractFeatures(*blob, false, &bl_features,
-                              &cn_features, &fx_info, &outline_counts);
-  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
-    *num_features = 0;
-    return;  // Feature extraction failed.
-  }
-  *num_features = cn_features.size();
-  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
-  // TODO(rays) Pass outline_counts back and simplify the calling code.
-  if (feature_outline_index != nullptr) {
-    int f = 0;
-    for (int i = 0; i < outline_counts.size(); ++i) {
-      while (f < outline_counts[i])
-        feature_outline_index[f++] = i;
+    void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
+                                         INT_FEATURE_STRUCT* int_features,
+                                         int* num_features,
+                                         int* feature_outline_index) {
+      GenericVector<int> outline_counts;
+      GenericVector<INT_FEATURE_STRUCT> bl_features;
+      GenericVector<INT_FEATURE_STRUCT> cn_features;
+      INT_FX_RESULT_STRUCT fx_info;
+      tesseract_->ExtractFeatures(*blob, false, &bl_features,
+                                  &cn_features, &fx_info, &outline_counts);
+      if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
+        *num_features = 0;
+        return;  // Feature extraction failed.
+      }
+      *num_features = cn_features.size();
+      memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
+      // TODO(rays) Pass outline_counts back and simplify the calling code.
+      if (feature_outline_index != nullptr) {
+        int f = 0;
+        for (int i = 0; i < outline_counts.size(); ++i) {
+          while (f < outline_counts[i])
+            feature_outline_index[f++] = i;
+        }
+      }
     }
-  }
-}
 
 // This method returns the row to which a box of specified dimensions would
 // belong. If no good match is found, it returns nullptr.
-ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
-                                int left, int top, int right, int bottom) {
-  TBOX box(left, bottom, right, top);
-  BLOCK_IT b_it(blocks);
-  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
-    BLOCK* block = b_it.data();
-    if (!box.major_overlap(block->pdblk.bounding_box()))
-      continue;
-    ROW_IT r_it(block->row_list());
-    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
-      ROW* row = r_it.data();
-      if (!box.major_overlap(row->bounding_box()))
-        continue;
-      WERD_IT w_it(row->word_list());
-      for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
-        WERD* word = w_it.data();
-        if (box.major_overlap(word->bounding_box()))
-          return row;
+    ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
+                                    int left, int top, int right, int bottom) {
+      TBOX box(left, bottom, right, top);
+      BLOCK_IT b_it(blocks);
+      for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+        BLOCK* block = b_it.data();
+        if (!box.major_overlap(block->pdblk.bounding_box()))
+          continue;
+        ROW_IT r_it(block->row_list());
+        for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+          ROW* row = r_it.data();
+          if (!box.major_overlap(row->bounding_box()))
+            continue;
+          WERD_IT w_it(row->word_list());
+          for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+            WERD* word = w_it.data();
+            if (box.major_overlap(word->bounding_box()))
+              return row;
+          }
+        }
       }
+      return nullptr;
     }
-  }
-  return nullptr;
-}
 
 /** Method to run adaptive classifier on a blob. */
-void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
-                                        int num_max_matches,
-                                        int* unichar_ids,
-                                        float* ratings,
-                                        int* num_matches_returned) {
-  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
-  tesseract_->AdaptiveClassifier(blob, choices);
-  BLOB_CHOICE_IT choices_it(choices);
-  int& index = *num_matches_returned;
-  index = 0;
-  for (choices_it.mark_cycle_pt();
-       !choices_it.cycled_list() && index < num_max_matches;
-       choices_it.forward()) {
-    BLOB_CHOICE* choice = choices_it.data();
-    unichar_ids[index] = choice->unichar_id();
-    ratings[index] = choice->rating();
-    ++index;
-  }
-  *num_matches_returned = index;
-  delete choices;
-}
+    void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
+                                            int num_max_matches,
+                                            int* unichar_ids,
+                                            float* ratings,
+                                            int* num_matches_returned) {
+      BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
+      tesseract_->AdaptiveClassifier(blob, choices);
+      BLOB_CHOICE_IT choices_it(choices);
+      int& index = *num_matches_returned;
+      index = 0;
+      for (choices_it.mark_cycle_pt();
+           !choices_it.cycled_list() && index < num_max_matches;
+           choices_it.forward()) {
+        BLOB_CHOICE* choice = choices_it.data();
+        unichar_ids[index] = choice->unichar_id();
+        ratings[index] = choice->rating();
+        ++index;
+      }
+      *num_matches_returned = index;
+      delete choices;
+    }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 }  // namespace tesseract.
diff --git a/src/api/baseapi.h b/src/api/baseapi.h
index da12d647b8..6da486011a 100644
--- a/src/api/baseapi.h
+++ b/src/api/baseapi.h
@@ -61,34 +61,34 @@ struct TBLOB;
 
 namespace tesseract {
 
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-class Trie;
-class Wordrec;
-
-typedef int (Dict::*DictFunc)(void* void_dawg_args,
-                              const UNICHARSET& unicharset,
-                              UNICHAR_ID unichar_id, bool word_end) const;
-typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
-                                                 const char* context,
-                                                 int context_bytes,
-                                                 const char* character,
-                                                 int character_bytes);
-typedef float (Dict::*ParamsModelClassifyFunc)(
-    const char *lang, void *path);
-typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
-                                         const WERD_CHOICE_LIST &best_choices,
-                                         const UNICHARSET &unicharset,
-                                         BlamerBundle *blamer_bundle);
-typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
-    TruthCallback;
+    class Dawg;
+    class Dict;
+    class EquationDetect;
+    class PageIterator;
+    class LTRResultIterator;
+    class ResultIterator;
+    class MutableIterator;
+    class TessResultRenderer;
+    class Tesseract;
+    class Trie;
+    class Wordrec;
+
+    typedef int (Dict::*DictFunc)(void* void_dawg_args,
+                                  const UNICHARSET& unicharset,
+                                  UNICHAR_ID unichar_id, bool word_end) const;
+    typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
+                                                     const char* context,
+                                                     int context_bytes,
+                                                     const char* character,
+                                                     int character_bytes);
+    typedef float (Dict::*ParamsModelClassifyFunc)(
+            const char *lang, void *path);
+    typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
+                                             const WERD_CHOICE_LIST &best_choices,
+                                             const UNICHARSET &unicharset,
+                                             BlamerBundle *blamer_bundle);
+    typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
+            TruthCallback;
 
 /**
  * Base class for all tesseract APIs.
@@ -98,829 +98,841 @@ typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
  * class to hide the data types so that users of this class don't have to
  * include any other Tesseract headers.
  */
-class TESS_API TessBaseAPI {
- public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char* Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Writes the thresholded image to stderr as a PBM file on receipt of a
-   * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
-   */
-  static void CatchSignals();
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char* name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char* GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix* GetInputImage();
-  int GetSourceYResolution();
-  const char* GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char* name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char* name, const char* value);
-  bool SetDebugVariable(const char* name, const char* value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, STRING *val);
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the parent directory of tessdata and
-   * must end in / . Any name after the last / will be stripped.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to eng.
-   * It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char* datapath, const char* language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const GenericVector<STRING> *vars_vec,
-           const GenericVector<STRING> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char* datapath, const char* language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char* data, int data_size, const char* language,
-           OcrEngineMode mode, char** configs, int configs_size,
-           const GenericVector<STRING>* vars_vec,
-           const GenericVector<STRING>* vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char* GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of STRINGs.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of STRINGs.
-   */
-  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
-
-  /**
-   * Init only the lang model component of Tesseract. The only functions
-   * that work after this init are SetVariable and IsValidWord.
-   * WARNING: temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int InitLangMod(const char* datapath, const char* language);
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char* filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char* filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char* TesseractRect(const unsigned char* imagedata,
-                      int bytes_per_pixel, int bytes_per_line,
-                      int left, int top, int width, int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-   /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char* imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix* pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * In extreme cases only, usually with a subclass of Thresholder, it
-   * is possible to provide a different Thresholder. The Thresholder may
-   * be preloaded with an image, settings etc, or they may be set after.
-   * Note that Tesseract takes ownership of the Thresholder and will
-   * delete it when it it is replaced or the API is destructed.
-   */
-  void SetThresholder(ImageThresholder* thresholder) {
-    delete thresholder_;
-    thresholder_ = thresholder;
-    ClearResults();
-  }
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix* GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa* GetRegions(Pixa** pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as an
-   * array of one element per line. delete [] after use.
-   * If paraids is not nullptr, the paragraph-id of each line within its block is
-   * also returned as an array of one element per line. delete [] after use.
-   */
-  Boxa* GetTextlines(const bool raw_image, const int raw_padding,
-                     Pixa** pixa, int** blockids, int** paraids);
-  /*
-     Helper method to extract from the thresholded image. (most common usage)
-  */
-  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as an
-   * array of one element per line. delete [] after use.
-   */
-  Boxa* GetStrips(Pixa** pixa, int** blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa* GetWords(Pixa** pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa* GetConnectedComponents(Pixa** cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its block
-   * is also returned as an array of one element per component. delete [] after
-   * use.
-   * If raw_image is true, then portions of the original image are extracted
-   * instead of the thresholded image and padded with raw_padding.
-   * If text_only is true, then only text components are returned.
-   */
-  Boxa* GetComponentImages(const PageIteratorLevel level,
-                           const bool text_only, const bool raw_image,
-                           const int raw_padding,
+    class TESS_API TessBaseAPI {
+    public:
+        TessBaseAPI();
+        virtual ~TessBaseAPI();
+
+        /**
+         * Returns the version identifier as a static string. Do not delete.
+         */
+        static const char* Version();
+
+        /**
+         * If compiled with OpenCL AND an available OpenCL
+         * device is deemed faster than serial code, then
+         * "device" is populated with the cl_device_id
+         * and returns sizeof(cl_device_id)
+         * otherwise *device=nullptr and returns 0.
+         */
+        static size_t getOpenCLDevice(void **device);
+
+        /**
+         * Writes the thresholded image to stderr as a PBM file on receipt of a
+         * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
+         */
+        static void CatchSignals();
+
+        /**
+         * Set the name of the input file. Needed for training and
+         * reading a UNLV zone file, and for searchable PDF output.
+         */
+        void SetInputName(const char* name);
+        /**
+         * These functions are required for searchable PDF output.
+         * We need our hands on the input file so that we can include
+         * it in the PDF without transcoding. If that is not possible,
+         * we need the original image. Finally, resolution metadata
+         * is stored in the PDF so we need that as well.
+         */
+        const char* GetInputName();
+        // Takes ownership of the input pix.
+        void SetInputImage(Pix *pix);
+        Pix* GetInputImage();
+        int GetSourceYResolution();
+        const char* GetDatapath();
+
+        /** Set the name of the bonus output files. Needed only for debugging. */
+        void SetOutputName(const char* name);
+
+        /**
+         * Set the value of an internal "parameter."
+         * Supply the name of the parameter and the value as a string, just as
+         * you would in a config file.
+         * Returns false if the name lookup failed.
+         * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
+         * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
+         * SetVariable may be used before Init, but settings will revert to
+         * defaults on End().
+         *
+         * Note: Must be called after Init(). Only works for non-init variables
+         * (init variables should be passed to Init()).
+         */
+        bool SetVariable(const char* name, const char* value);
+        bool SetDebugVariable(const char* name, const char* value);
+
+        /**
+         * Returns true if the parameter was found among Tesseract parameters.
+         * Fills in value with the value of the parameter.
+         */
+        bool GetIntVariable(const char *name, int *value) const;
+        bool GetBoolVariable(const char *name, bool *value) const;
+        bool GetDoubleVariable(const char *name, double *value) const;
+
+        /**
+         * Returns the pointer to the string that represents the value of the
+         * parameter if it was found among Tesseract parameters.
+         */
+        const char *GetStringVariable(const char *name) const;
+
+        /**
+         * Print Tesseract parameters to the given file.
+         */
+        void PrintVariables(FILE *fp) const;
+
+        /**
+         * Get value of named variable as a string, if it exists.
+         */
+        bool GetVariableAsString(const char *name, STRING *val);
+
+        /**
+         * Instances are now mostly thread-safe and totally independent,
+         * but some global parameters remain. Basically it is safe to use multiple
+         * TessBaseAPIs in different threads in parallel, UNLESS:
+         * you use SetVariable on some of the Params in classify and textord.
+         * If you do, then the effect will be to change it for all your instances.
+         *
+         * Start tesseract. Returns zero on success and -1 on failure.
+         * NOTE that the only members that may be called before Init are those
+         * listed above here in the class definition.
+         *
+         * The datapath must be the name of the parent directory of tessdata and
+         * must end in / . Any name after the last / will be stripped.
+         * The language is (usually) an ISO 639-3 string or nullptr will default to eng.
+         * It is entirely safe (and eventually will be efficient too) to call
+         * Init multiple times on the same instance to change language, or just
+         * to reset the classifier.
+         * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
+         * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
+         * English. Languages may specify internally that they want to be loaded
+         * with one or more other languages, so the ~ sign is available to override
+         * that. Eg if hin were set to load eng by default, then hin+~eng would force
+         * loading only hin. The number of loaded languages is limited only by
+         * memory, with the caveat that loading additional languages will impact
+         * both speed and accuracy, as there is more work to do to decide on the
+         * applicable language, and there is more chance of hallucinating incorrect
+         * words.
+         * WARNING: On changing languages, all Tesseract parameters are reset
+         * back to their default values. (Which may vary between languages.)
+         * If you have a rare need to set a Variable that controls
+         * initialization for a second call to Init you should explicitly
+         * call End() and then use SetVariable before Init. This is only a very
+         * rare use case, since there are very few uses that require any parameters
+         * to be set before Init.
+         *
+         * If set_only_non_debug_params is true, only params that do not contain
+         * "debug" in the name will be set.
+         */
+        int Init(const char* datapath, const char* language, OcrEngineMode mode,
+                 char **configs, int configs_size,
+                 const GenericVector<STRING> *vars_vec,
+                 const GenericVector<STRING> *vars_values,
+                 bool set_only_non_debug_params);
+        int Init(const char* datapath, const char* language, OcrEngineMode oem) {
+          return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
+        }
+        int Init(const char* datapath, const char* language) {
+          return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
+        }
+        // In-memory version reads the traineddata file directly from the given
+        // data[data_size] array, and/or reads data via a FileReader.
+        int Init(const char* data, int data_size, const char* language,
+                 OcrEngineMode mode, char** configs, int configs_size,
+                 const GenericVector<STRING>* vars_vec,
+                 const GenericVector<STRING>* vars_values,
+                 bool set_only_non_debug_params, FileReader reader);
+
+        /**
+         * Returns the languages string used in the last valid initialization.
+         * If the last initialization specified "deu+hin" then that will be
+         * returned. If hin loaded eng automatically as well, then that will
+         * not be included in this list. To find the languages actually
+         * loaded use GetLoadedLanguagesAsVector.
+         * The returned string should NOT be deleted.
+         */
+        const char* GetInitLanguagesAsString() const;
+
+        /**
+         * Returns the loaded languages in the vector of STRINGs.
+         * Includes all languages loaded by the last Init, including those loaded
+         * as dependencies of other loaded languages.
+         */
+        void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
+
+        /**
+         * Returns the available languages in the sorted vector of STRINGs.
+         */
+        void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
+
+        /**
+         * Init only the lang model component of Tesseract. The only functions
+         * that work after this init are SetVariable and IsValidWord.
+         * WARNING: temporary! This function will be removed from here and placed
+         * in a separate API at some future time.
+         */
+        int InitLangMod(const char* datapath, const char* language);
+
+        /**
+         * Init only for page layout analysis. Use only for calls to SetImage and
+         * AnalysePage. Calls that attempt recognition will generate an error.
+         */
+        void InitForAnalysePage();
+
+        /**
+         * Read a "config" file containing a set of param, value pairs.
+         * Searches the standard places: tessdata/configs, tessdata/tessconfigs
+         * and also accepts a relative or absolute path name.
+         * Note: only non-init params will be set (init params are set by Init()).
+         */
+        void ReadConfigFile(const char* filename);
+        /** Same as above, but only set debug params from the given config file. */
+        void ReadDebugConfigFile(const char* filename);
+
+        /**
+         * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
+         * The mode is stored as an IntParam so it can also be modified by
+         * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
+         */
+        void SetPageSegMode(PageSegMode mode);
+
+        /** Return the current page segmentation mode. */
+        PageSegMode GetPageSegMode() const;
+
+        /**
+         * Recognize a rectangle from an image and return the result as a string.
+         * May be called many times for a single Init.
+         * Currently has no error checking.
+         * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
+         * Palette color images will not work properly and must be converted to
+         * 24 bit.
+         * Binary images of 1 bit per pixel may also be given but they must be
+         * byte packed with the MSB of the first byte being the first pixel, and a
+         * 1 represents WHITE. For binary images set bytes_per_pixel=0.
+         * The recognized text is returned as a char* which is coded
+         * as UTF8 and must be freed with the delete [] operator.
+         *
+         * Note that TesseractRect is the simplified convenience interface.
+         * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
+         * and one or more of the Get*Text functions below.
+         */
+        char* TesseractRect(const unsigned char* imagedata,
+                            int bytes_per_pixel, int bytes_per_line,
+                            int left, int top, int width, int height);
+
+        /**
+         * Call between pages or documents etc to free up memory and forget
+         * adaptive data.
+         */
+        void ClearAdaptiveClassifier();
+
+        /**
+         * @defgroup AdvancedAPI Advanced API
+         * The following methods break TesseractRect into pieces, so you can
+         * get hold of the thresholded image, get the text in different formats,
+         * get bounding boxes, confidences etc.
+         */
+        /* @{ */
+
+        /**
+         * Provide an image for Tesseract to recognize. Format is as
+         * TesseractRect above. Copies the image buffer and converts to Pix.
+         * SetImage clears all recognition results, and sets the rectangle to the
+         * full image, so it may be followed immediately by a GetUTF8Text, and it
+         * will automatically perform recognition.
+         */
+        void SetImage(const unsigned char* imagedata, int width, int height,
+                      int bytes_per_pixel, int bytes_per_line);
+
+        /**
+         * Provide an image for Tesseract to recognize. As with SetImage above,
+         * Tesseract takes its own copy of the image, so it need not persist until
+         * after Recognize.
+         * Pix vs raw, which to use?
+         * Use Pix where possible. Tesseract uses Pix as its internal representation
+         * and it is therefore more efficient to provide a Pix directly.
+         */
+        void SetImage(Pix* pix);
+
+        /**
+         * Set the resolution of the source image in pixels per inch so font size
+         * information can be calculated in results.  Call this after SetImage().
+         */
+        void SetSourceResolution(int ppi);
+
+        /**
+         * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
+         * Each SetRectangle clears the recogntion results so multiple rectangles
+         * can be recognized with the same image.
+         */
+        void SetRectangle(int left, int top, int width, int height);
+
+        /**
+         * In extreme cases only, usually with a subclass of Thresholder, it
+         * is possible to provide a different Thresholder. The Thresholder may
+         * be preloaded with an image, settings etc, or they may be set after.
+         * Note that Tesseract takes ownership of the Thresholder and will
+         * delete it when it it is replaced or the API is destructed.
+         */
+        void SetThresholder(ImageThresholder* thresholder) {
+          delete thresholder_;
+          thresholder_ = thresholder;
+          ClearResults();
+        }
+
+        /**
+         * Get a copy of the internal thresholded image from Tesseract.
+         * Caller takes ownership of the Pix and must pixDestroy it.
+         * May be called any time after SetImage, or after TesseractRect.
+         */
+        Pix* GetThresholdedImage();
+
+        /**
+         * Get the result of page layout analysis as a leptonica-style
+         * Boxa, Pixa pair, in reading order.
+         * Can be called before or after Recognize.
+         */
+        Boxa* GetRegions(Pixa** pixa);
+
+        /**
+         * Get the textlines as a leptonica-style
+         * Boxa, Pixa pair, in reading order.
+         * Can be called before or after Recognize.
+         * If raw_image is true, then extract from the original image instead of the
+         * thresholded image and pad by raw_padding pixels.
+         * If blockids is not nullptr, the block-id of each line is also returned as an
+         * array of one element per line. delete [] after use.
+         * If paraids is not nullptr, the paragraph-id of each line within its block is
+         * also returned as an array of one element per line. delete [] after use.
+         */
+        Boxa* GetTextlines(const bool raw_image, const int raw_padding,
                            Pixa** pixa, int** blockids, int** paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa* GetComponentImages(const PageIteratorLevel level,
-                           const bool text_only,
-                           Pixa** pixa, int** blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator* AnalyseLayout();
-  PageIterator* AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC* monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  #ifndef DISABLED_LEGACY_ENGINE
-  /** Variant on Recognize used for testing chopper. */
-  int RecognizeForChopTest(ETEXT_DESC* monitor);
-  #endif
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char* filename, const char* retry_config,
-                    int timeout_millisec, TessResultRenderer* renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char* filename, const char* retry_config,
-                            int timeout_millisec, TessResultRenderer* renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for desciptions of other parameters.
-   */
-  bool ProcessPage(Pix* pix, int page_index, const char* filename,
-                   const char* retry_config, int timeout_millisec,
-                   TessResultRenderer* renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator* GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator* GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char* GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char* GetHOCRText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char* GetTSVText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char* GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char* GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
-                               const char** script_name, float* script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char* GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int* AllWordConfidences();
+        /*
+           Helper method to extract from the thresholded image. (most common usage)
+        */
+        Boxa* GetTextlines(Pixa** pixa, int** blockids) {
+          return GetTextlines(false, 0, pixa, blockids, nullptr);
+        }
+
+        /**
+         * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
+         * pair, in reading order. Enables downstream handling of non-rectangular
+         * regions.
+         * Can be called before or after Recognize.
+         * If blockids is not nullptr, the block-id of each line is also returned as an
+         * array of one element per line. delete [] after use.
+         */
+        Boxa* GetStrips(Pixa** pixa, int** blockids);
+
+        /**
+         * Get the words as a leptonica-style
+         * Boxa, Pixa pair, in reading order.
+         * Can be called before or after Recognize.
+         */
+        Boxa* GetWords(Pixa** pixa);
+
+        /**
+         * Gets the individual connected (text) components (created
+         * after pages segmentation step, but before recognition)
+         * as a leptonica-style Boxa, Pixa pair, in reading order.
+         * Can be called before or after Recognize.
+         * Note: the caller is responsible for calling boxaDestroy()
+         * on the returned Boxa array and pixaDestroy() on cc array.
+         */
+        Boxa* GetConnectedComponents(Pixa** cc);
+
+        /**
+         * Get the given level kind of components (block, textline, word etc.) as a
+         * leptonica-style Boxa, Pixa pair, in reading order.
+         * Can be called before or after Recognize.
+         * If blockids is not nullptr, the block-id of each component is also returned
+         * as an array of one element per component. delete [] after use.
+         * If blockids is not nullptr, the paragraph-id of each component with its block
+         * is also returned as an array of one element per component. delete [] after
+         * use.
+         * If raw_image is true, then portions of the original image are extracted
+         * instead of the thresholded image and padded with raw_padding.
+         * If text_only is true, then only text components are returned.
+         */
+        Boxa* GetComponentImages(const PageIteratorLevel level,
+                                 const bool text_only, const bool raw_image,
+                                 const int raw_padding,
+                                 Pixa** pixa, int** blockids, int** paraids);
+        // Helper function to get binary images with no padding (most common usage).
+        Boxa* GetComponentImages(const PageIteratorLevel level,
+                                 const bool text_only,
+                                 Pixa** pixa, int** blockids) {
+          return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
+        }
+
+        /**
+         * Returns the scale factor of the thresholded image that would be returned by
+         * GetThresholdedImage() and the various GetX() methods that call
+         * GetComponentImages().
+         * Returns 0 if no thresholder has been set.
+         */
+        int GetThresholdedImageScaleFactor() const;
+
+        /**
+         * Runs page layout analysis in the mode set by SetPageSegMode.
+         * May optionally be called prior to Recognize to get access to just
+         * the page layout results. Returns an iterator to the results.
+         * If merge_similar_words is true, words are combined where suitable for use
+         * with a line recognizer. Use if you want to use AnalyseLayout to find the
+         * textlines, and then want to process textline fragments with an external
+         * line recognizer.
+         * Returns nullptr on error or an empty page.
+         * The returned iterator must be deleted after use.
+         * WARNING! This class points to data held within the TessBaseAPI class, and
+         * therefore can only be used while the TessBaseAPI class still exists and
+         * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+         * DetectOS, or anything else that changes the internal PAGE_RES.
+         */
+        PageIterator* AnalyseLayout();
+        PageIterator* AnalyseLayout(bool merge_similar_words);
+
+        /**
+         * Recognize the image from SetAndThresholdImage, generating Tesseract
+         * internal structures. Returns 0 on success.
+         * Optional. The Get*Text functions below will call Recognize if needed.
+         * After Recognize, the output is kept internally until the next SetImage.
+         */
+        int Recognize(ETEXT_DESC* monitor);
+
+        /**
+         * Methods to retrieve information after SetAndThresholdImage(),
+         * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
+         */
 
 #ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
+        /** Variant on Recognize used for testing chopper. */
+        int RecognizeForChopTest(ETEXT_DESC* monitor);
+#endif
+
+        /**
+         * Turns images into symbolic text.
+         *
+         * filename can point to a single image, a multi-page TIFF,
+         * or a plain text list of image filenames.
+         *
+         * retry_config is useful for debugging. If not nullptr, you can fall
+         * back to an alternate configuration if a page fails for some
+         * reason.
+         *
+         * timeout_millisec terminates processing if any single page
+         * takes too long. Set to 0 for unlimited time.
+         *
+         * renderer is responible for creating the output. For example,
+         * use the TessTextRenderer if you want plaintext output, or
+         * the TessPDFRender to produce searchable PDF.
+         *
+         * If tessedit_page_number is non-negative, will only process that
+         * single page. Works for multi-page tiff file, or filelist.
+         *
+         * Returns true if successful, false on error.
+         */
+        bool ProcessPages(const char* filename, const char* retry_config,
+                          int timeout_millisec, TessResultRenderer* renderer);
+        // Does the real work of ProcessPages.
+        bool ProcessPagesInternal(const char* filename, const char* retry_config,
+                                  int timeout_millisec, TessResultRenderer* renderer);
+
+        /**
+         * Turn a single image into symbolic text.
+         *
+         * The pix is the image processed. filename and page_index are
+         * metadata used by side-effect processes, such as reading a box
+         * file or formatting as hOCR.
+         *
+         * See ProcessPages for desciptions of other parameters.
+         */
+        bool ProcessPage(Pix* pix, int page_index, const char* filename,
+                         const char* retry_config, int timeout_millisec,
+                         TessResultRenderer* renderer);
+
+        /**
+         * Get a reading-order iterator to the results of LayoutAnalysis and/or
+         * Recognize. The returned iterator must be deleted after use.
+         * WARNING! This class points to data held within the TessBaseAPI class, and
+         * therefore can only be used while the TessBaseAPI class still exists and
+         * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+         * DetectOS, or anything else that changes the internal PAGE_RES.
+         */
+        ResultIterator* GetIterator();
+
+        /**
+         * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
+         * The returned iterator must be deleted after use.
+         * WARNING! This class points to data held within the TessBaseAPI class, and
+         * therefore can only be used while the TessBaseAPI class still exists and
+         * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+         * DetectOS, or anything else that changes the internal PAGE_RES.
+         */
+        MutableIterator* GetMutableIterator();
+
+        /**
+         * The recognized text is returned as a char* which is coded
+         * as UTF8 and must be freed with the delete [] operator.
+         */
+        char* GetUTF8Text();
+
+        /**
+         * Make a HTML-formatted string with hOCR markup from the internal
+         * data structures.
+         * page_number is 0-based but will appear in the output as 1-based.
+         * monitor can be used to
+         *  cancel the recognition
+         *  receive progress callbacks
+         * Returned string must be freed with the delete [] operator.
+         */
+        char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
+
+        /**
+       * Make an XML-formatted string with Alto markup from the internal
+       * data structures.
+       */
+        char* GetAltoText(ETEXT_DESC* monitor, int page_number);
+
+        /**
+         * Make a HTML-formatted string with hOCR markup from the internal
+         * data structures.
+         * page_number is 0-based but will appear in the output as 1-based.
+         * Returned string must be freed with the delete [] operator.
+         */
+        char* GetHOCRText(int page_number);
+
+        /**
+       * Make an XML-formatted string with Alto markup from the internal
+       * data structures.
+       */
+        char* GetAltoText(int page_number);
+
+        /**
+         * Make a TSV-formatted string from the internal data structures.
+         * page_number is 0-based but will appear in the output as 1-based.
+         * Returned string must be freed with the delete [] operator.
+         */
+        char* GetTSVText(int page_number);
+
+        /**
+         * The recognized text is returned as a char* which is coded in the same
+         * format as a box file used in training.
+         * Constructs coordinates in the original image - not just the rectangle.
+         * page_number is a 0-based page index that will appear in the box file.
+         * Returned string must be freed with the delete [] operator.
+         */
+        char* GetBoxText(int page_number);
+
+        /**
+         * The recognized text is returned as a char* which is coded
+         * as UNLV format Latin-1 with specific reject and suspect codes.
+         * Returned string must be freed with the delete [] operator.
+         */
+        char* GetUNLVText();
+
+        /**
+         * Detect the orientation of the input image and apparent script (alphabet).
+         * orient_deg is the detected clockwise rotation of the input image in degrees
+         * (0, 90, 180, 270)
+         * orient_conf is the confidence (15.0 is reasonably confident)
+         * script_name is an ASCII string, the name of the script, e.g. "Latin"
+         * script_conf is confidence level in the script
+         * Returns true on success and writes values to each parameter as an output
+         */
+        bool DetectOrientationScript(int* orient_deg, float* orient_conf,
+                                     const char** script_name, float* script_conf);
+
+        /**
+         * The recognized text is returned as a char* which is coded
+         * as UTF8 and must be freed with the delete [] operator.
+         * page_number is a 0-based page index that will appear in the osd file.
+         */
+        char* GetOsdText(int page_number);
+
+        /** Returns the (average) confidence value between 0 and 100. */
+        int MeanTextConf();
+        /**
+         * Returns all word confidences (between 0 and 100) in an array, terminated
+         * by -1.  The calling function must delete [] after use.
+         * The number of confidences should correspond to the number of space-
+         * delimited words in GetUTF8Text.
+         */
+        int* AllWordConfidences();
+
+#ifndef DISABLED_LEGACY_ENGINE
+        /**
+         * Applies the given word to the adaptive classifier if possible.
+         * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
+         * tell the boundaries of the graphemes.
+         * Assumes that SetImage/SetRectangle have been used to set the image
+         * to the given word. The mode arg should be PSM_SINGLE_WORD or
+         * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
+         * The currently set PageSegMode is preserved.
+         * Returns false if adaption was not possible for some reason.
+         */
+        bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
 #endif  //  ndef DISABLED_LEGACY_ENGINE
 
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word);
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character);
-
-
-  bool GetTextDirection(int* out_offset, float* out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults*);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int** block_orientation,
-                                bool** vertical_writing);
-
-
-  #ifndef DISABLED_LEGACY_ENGINE
-
-  /** Sets Wordrec::fill_lattice_ function to point to the given function. */
-  void SetFillLatticeFunc(FillLatticeFunc f);
-
-  /** Find lines from the image making the BLOCK_LIST. */
-  BLOCK_LIST* FindLinesCreateBlockList();
-
-  /**
-   * Delete a block list.
-   * This is to keep BLOCK_LIST pointer opaque
-   * and let go of including the other headers.
-   */
-  static void DeleteBlockList(BLOCK_LIST* block_list);
-
-  /** Returns a ROW object created from the input row specification. */
-  static ROW *MakeTessOCRRow(float baseline, float xheight,
-                             float descender, float ascender);
-
-  /** Returns a TBLOB corresponding to the entire input image. */
-  static TBLOB *MakeTBLOB(Pix *pix);
-
-  /**
-   * This method baseline normalizes a TBLOB in-place. The input row is used
-   * for normalization. The denorm is an optional parameter in which the
-   * normalization-antidote is returned.
-   */
-  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
-
-  /** This method returns the features associated with the input image. */
-  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
-                          int* num_features, int* feature_outline_index);
-
-  /**
-   * This method returns the row to which a box of specified dimensions would
-   * belong. If no good match is found, it returns nullptr.
-   */
-  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
-                            int right, int bottom);
-
-  /**
-   * Method to run adaptive classifier on a blob.
-   * It returns at max num_max_matches results.
-   */
-  void RunAdaptiveClassifier(TBLOB* blob,
-                             int num_max_matches,
-                             int* unichar_ids,
-                             float* ratings,
-                             int* num_matches_returned);
+        /**
+         * Free up recognition results and any stored image data, without actually
+         * freeing any recognition data that would be time-consuming to reload.
+         * Afterwards, you must call SetImage or TesseractRect before doing
+         * any Recognize or Get* operation.
+         */
+        void Clear();
+
+        /**
+         * Close down tesseract and free up all memory. End() is equivalent to
+         * destructing and reconstructing your TessBaseAPI.
+         * Once End() has been used, none of the other API functions may be used
+         * other than Init and anything declared above it in the class definition.
+         */
+        void End();
+
+        /**
+         * Clear any library-level memory caches.
+         * There are a variety of expensive-to-load constant data structures (mostly
+         * language dictionaries) that are cached globally -- surviving the Init()
+         * and End() of individual TessBaseAPI's.  This function allows the clearing
+         * of these caches.
+         **/
+        static void ClearPersistentCache();
+
+        /**
+         * Check whether a word is valid according to Tesseract's language model
+         * @return 0 if the word is invalid, non-zero if valid.
+         * @warning temporary! This function will be removed from here and placed
+         * in a separate API at some future time.
+         */
+        int IsValidWord(const char *word);
+        // Returns true if utf8_character is defined in the UniCharset.
+        bool IsValidCharacter(const char *utf8_character);
+
+
+        bool GetTextDirection(int* out_offset, float* out_slope);
+
+        /** Sets Dict::letter_is_okay_ function to point to the given function. */
+        void SetDictFunc(DictFunc f);
+
+        /** Sets Dict::probability_in_context_ function to point to the given
+         * function.
+         */
+        void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
+
+        /**
+         * Estimates the Orientation And Script of the image.
+         * @return true if the image was processed successfully.
+         */
+        bool DetectOS(OSResults*);
+
+        /**
+         * Return text orientation of each block as determined by an earlier run
+         * of layout analysis.
+         */
+        void GetBlockTextOrientations(int** block_orientation,
+                                      bool** vertical_writing);
+
+
+#ifndef DISABLED_LEGACY_ENGINE
+
+        /** Sets Wordrec::fill_lattice_ function to point to the given function. */
+        void SetFillLatticeFunc(FillLatticeFunc f);
+
+        /** Find lines from the image making the BLOCK_LIST. */
+        BLOCK_LIST* FindLinesCreateBlockList();
+
+        /**
+         * Delete a block list.
+         * This is to keep BLOCK_LIST pointer opaque
+         * and let go of including the other headers.
+         */
+        static void DeleteBlockList(BLOCK_LIST* block_list);
+
+        /** Returns a ROW object created from the input row specification. */
+        static ROW *MakeTessOCRRow(float baseline, float xheight,
+                                   float descender, float ascender);
+
+        /** Returns a TBLOB corresponding to the entire input image. */
+        static TBLOB *MakeTBLOB(Pix *pix);
+
+        /**
+         * This method baseline normalizes a TBLOB in-place. The input row is used
+         * for normalization. The denorm is an optional parameter in which the
+         * normalization-antidote is returned.
+         */
+        static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
+
+        /** This method returns the features associated with the input image. */
+        void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
+                                int* num_features, int* feature_outline_index);
+
+        /**
+         * This method returns the row to which a box of specified dimensions would
+         * belong. If no good match is found, it returns nullptr.
+         */
+        static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
+                                  int right, int bottom);
+
+        /**
+         * Method to run adaptive classifier on a blob.
+         * It returns at max num_max_matches results.
+         */
+        void RunAdaptiveClassifier(TBLOB* blob,
+                                   int num_max_matches,
+                                   int* unichar_ids,
+                                   float* ratings,
+                                   int* num_matches_returned);
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-  /** This method returns the string form of the specified unichar. */
-  const char* GetUnichar(int unichar_id);
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract* tesseract() const { return tesseract_; }
-
-  OcrEngineMode oem() const { return last_oem_requested_; }
-
-  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
-
-  void set_min_orientation_margin(double margin);
- /* @} */
-
- protected:
-
-  /** Common code for setting the image. Returns true if Init has been called. */
-  TESS_LOCAL bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  TESS_LOCAL virtual bool Threshold(Pix** pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  TESS_LOCAL int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  TESS_LOCAL LTRResultIterator* GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  TESS_LOCAL int TextLength(int* blob_count);
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
-
-  #ifndef DISABLED_LEGACY_ENGINE
-
-  /** @defgroup ocropusAddOns ocropus add-ons */
-  /* @{ */
-
-  /**
-   * Adapt to recognize the current image as the given character.
-   * The image must be preloaded and be just an image of a single character.
-   */
-  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
-                                   int length,
-                                   float baseline,
-                                   float xheight,
-                                   float descender,
-                                   float ascender);
-
-  /** Recognize text doing one pass only, using settings for a given pass. */
-  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
-
-  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
-                                        PAGE_RES* pass1_result);
-
-  /**
-   * Extract the OCR results, costs (penalty points for uncertainty),
-   * and the bounding boxes of the characters.
-   */
-  TESS_LOCAL static int TesseractExtractResult(char** text,
-                                    int** lengths,
-                                    float** costs,
-                                    int** x0,
-                                    int** y0,
-                                    int** x1,
-                                    int** y1,
-                                    PAGE_RES* page_res);
-
-  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
-  /* @} */
+        /** This method returns the string form of the specified unichar. */
+        const char* GetUnichar(int unichar_id);
+
+        /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
+        const Dawg *GetDawg(int i) const;
+
+        /** Return the number of dawgs loaded into tesseract_ object. */
+        int NumDawgs() const;
+
+        Tesseract* tesseract() const { return tesseract_; }
+
+        OcrEngineMode oem() const { return last_oem_requested_; }
+
+        void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
+
+        void set_min_orientation_margin(double margin);
+        /* @} */
+
+    protected:
+
+        /** Common code for setting the image. Returns true if Init has been called. */
+        TESS_LOCAL bool InternalSetImage();
+
+        /**
+         * Run the thresholder to make the thresholded image. If pix is not nullptr,
+         * the source is thresholded to pix instead of the internal IMAGE.
+         */
+        TESS_LOCAL virtual bool Threshold(Pix** pix);
+
+        /**
+         * Find lines from the image making the BLOCK_LIST.
+         * @return 0 on success.
+         */
+        TESS_LOCAL int FindLines();
+
+        /** Delete the pageres and block list ready for a new page. */
+        void ClearResults();
+
+        /**
+         * Return an LTR Result Iterator -- used only for training, as we really want
+         * to ignore all BiDi smarts at that point.
+         * delete once you're done with it.
+         */
+        TESS_LOCAL LTRResultIterator* GetLTRIterator();
+
+        /**
+         * Return the length of the output text string, as UTF8, assuming
+         * one newline per line and one per block, with a terminator,
+         * and assuming a single character reject marker for each rejected character.
+         * Also return the number of recognized blobs in blob_count.
+         */
+        TESS_LOCAL int TextLength(int* blob_count);
+
+        //// paragraphs.cpp ////////////////////////////////////////////////////
+        TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
+
+#ifndef DISABLED_LEGACY_ENGINE
+
+        /** @defgroup ocropusAddOns ocropus add-ons */
+        /* @{ */
+
+        /**
+         * Adapt to recognize the current image as the given character.
+         * The image must be preloaded and be just an image of a single character.
+         */
+        TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
+                                         int length,
+                                         float baseline,
+                                         float xheight,
+                                         float descender,
+                                         float ascender);
+
+        /** Recognize text doing one pass only, using settings for a given pass. */
+        TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
+
+        TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
+                                              PAGE_RES* pass1_result);
+
+        /**
+         * Extract the OCR results, costs (penalty points for uncertainty),
+         * and the bounding boxes of the characters.
+         */
+        TESS_LOCAL static int TesseractExtractResult(char** text,
+                                                     int** lengths,
+                                                     float** costs,
+                                                     int** x0,
+                                                     int** y0,
+                                                     int** x1,
+                                                     int** y1,
+                                                     PAGE_RES* page_res);
+
+        TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
+        /* @} */
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
- protected:
-  Tesseract*        tesseract_;       ///< The underlying data object.
-  Tesseract*        osd_tesseract_;   ///< For orientation & script detection.
-  EquationDetect*   equ_detect_;      ///<The equation detector.
-  FileReader reader_;                 ///< Reads files from any filesystem.
-  ImageThresholder* thresholder_;     ///< Image thresholding module.
-  GenericVector<ParagraphModel *>* paragraph_models_;
-  BLOCK_LIST*       block_list_;      ///< The page layout.
-  PAGE_RES*         page_res_;        ///< The page-level data.
-  STRING*           input_file_;      ///< Name used by training code.
-  STRING*           output_file_;     ///< Name used by debug code.
-  STRING*           datapath_;        ///< Current location of tessdata.
-  STRING*           language_;        ///< Last initialized language.
-  OcrEngineMode last_oem_requested_;  ///< Last ocr language mode requested.
-  bool          recognition_done_;   ///< page_res_ contains recognition data.
-  TruthCallback *truth_cb_;           /// fxn for setting truth_* in WERD_RES
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
- private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp,
-                            STRING *buf,
-                            const char* retry_config, int timeout_millisec,
-                            TessResultRenderer* renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data,
-                                 size_t size,
-                                 const char* filename,
-                                 const char* retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer* renderer,
-                                 int tessedit_page_number);
-  // There's currently no way to pass a document title from the
-  // Tesseract command line, and we have multiple places that choose
-  // to set the title to an empty string. Using a single named
-  // variable will hopefully reduce confusion if the situation changes
-  // in the future.
-  const char *unknown_title_ = "";
-};  // class TessBaseAPI.
+    protected:
+        Tesseract*        tesseract_;       ///< The underlying data object.
+        Tesseract*        osd_tesseract_;   ///< For orientation & script detection.
+        EquationDetect*   equ_detect_;      ///<The equation detector.
+        FileReader reader_;                 ///< Reads files from any filesystem.
+        ImageThresholder* thresholder_;     ///< Image thresholding module.
+        GenericVector<ParagraphModel *>* paragraph_models_;
+        BLOCK_LIST*       block_list_;      ///< The page layout.
+        PAGE_RES*         page_res_;        ///< The page-level data.
+        STRING*           input_file_;      ///< Name used by training code.
+        STRING*           output_file_;     ///< Name used by debug code.
+        STRING*           datapath_;        ///< Current location of tessdata.
+        STRING*           language_;        ///< Last initialized language.
+        OcrEngineMode last_oem_requested_;  ///< Last ocr language mode requested.
+        bool          recognition_done_;   ///< page_res_ contains recognition data.
+        TruthCallback *truth_cb_;           /// fxn for setting truth_* in WERD_RES
+
+        /**
+         * @defgroup ThresholderParams Thresholder Parameters
+         * Parameters saved from the Thresholder. Needed to rebuild coordinates.
+         */
+        /* @{ */
+        int rect_left_;
+        int rect_top_;
+        int rect_width_;
+        int rect_height_;
+        int image_width_;
+        int image_height_;
+        /* @} */
+
+    private:
+        // A list of image filenames gets special consideration
+        bool ProcessPagesFileList(FILE *fp,
+                                  STRING *buf,
+                                  const char* retry_config, int timeout_millisec,
+                                  TessResultRenderer* renderer,
+                                  int tessedit_page_number);
+        // TIFF supports multipage so gets special consideration.
+        bool ProcessPagesMultipageTiff(const unsigned char *data,
+                                       size_t size,
+                                       const char* filename,
+                                       const char* retry_config,
+                                       int timeout_millisec,
+                                       TessResultRenderer* renderer,
+                                       int tessedit_page_number);
+        // There's currently no way to pass a document title from the
+        // Tesseract command line, and we have multiple places that choose
+        // to set the title to an empty string. Using a single named
+        // variable will hopefully reduce confusion if the situation changes
+        // in the future.
+        const char *unknown_title_ = "";
+    };  // class TessBaseAPI.
 
 /** Escape a char string - remove &<>"' with HTML codes. */
-STRING HOcrEscape(const char* text);
+    STRING HOcrEscape(const char* text);
 }  // namespace tesseract.
 
 #endif  // TESSERACT_API_BASEAPI_H_
diff --git a/src/api/capi.cpp b/src/api/capi.cpp
index 2146e8c8f1..c9216b8ede 100644
--- a/src/api/capi.cpp
+++ b/src/api/capi.cpp
@@ -66,6 +66,16 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
     return new TessHOcrRenderer(outputbase, font_info);
 }
 
+TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase)
+{
+    return new TessHOcrRenderer(outputbase);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info)
+{
+    return new TessHOcrRenderer(outputbase, font_info);
+}
+
 TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
                                                              BOOL textonly)
 {
@@ -239,9 +249,9 @@ TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, cons
 }
 
 TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language,
-    TessOcrEngineMode mode, char** configs, int configs_size,
-    char** vars_vec, char** vars_values, size_t vars_vec_size,
-    BOOL set_only_non_debug_params)
+                                        TessOcrEngineMode mode, char** configs, int configs_size,
+                                        char** vars_vec, char** vars_values, size_t vars_vec_size,
+                                        BOOL set_only_non_debug_params)
 {
     GenericVector<STRING> varNames;
     GenericVector<STRING> varValues;
@@ -332,8 +342,8 @@ TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI*
 }
 
 TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
-                                               int bytes_per_pixel, int bytes_per_line,
-                                               int left, int top, int width, int height)
+                                         int bytes_per_pixel, int bytes_per_line,
+                                         int left, int top, int width, int height)
 {
     return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height);
 }
@@ -346,7 +356,7 @@ TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle)
 #endif
 
 TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height,
-                                                  int bytes_per_pixel, int bytes_per_line)
+                                            int bytes_per_pixel, int bytes_per_line)
 {
     handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
 }
@@ -387,7 +397,7 @@ TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, str
 }
 
 TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
-                                                                  struct Pixa** pixa, int** blockids, int** paraids)
+                                                         struct Pixa** pixa, int** blockids, int** paraids)
 {
     return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids);
 }
@@ -545,7 +555,7 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand
 #ifndef DISABLED_LEGACY_ENGINE
 
 TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
-                                                            int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
+                                                           int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
 {
     bool success;
     success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
@@ -553,7 +563,7 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
 }
 
 TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
-                                                            int* num_features, int* FeatureOutlineIndex)
+                                                      int* num_features, int* FeatureOutlineIndex)
 {
     handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex);
 }
@@ -564,7 +574,7 @@ TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
 }
 
 TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches,
-                                                               int* unichar_ids, float* ratings, int* num_matches_returned)
+                                                         int* unichar_ids, float* ratings, int* num_matches_returned)
 {
     handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned);
 }
@@ -656,13 +666,13 @@ TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator*
 }
 
 TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level,
-                                                               TessPageIteratorLevel element)
+                                                         TessPageIteratorLevel element)
 {
     return handle->IsAtFinalElement(level, element) ? TRUE : FALSE;
 }
 
 TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
-                                                          int* left, int* top, int* right, int* bottom)
+                                                    int* left, int* top, int* right, int* bottom)
 {
     return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE;
 }
@@ -684,14 +694,14 @@ TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator*
 }
 
 TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
-                                                       int* x1, int* y1, int* x2, int* y2)
+                                                 int* x1, int* y1, int* x2, int* y2)
 {
     return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE;
 }
 
 TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation,
-                                                          TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
-                                                          float* deskew_angle)
+                                                    TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
+                                                    float* deskew_angle)
 {
     handle->Orientation(orientation, writing_direction, textline_order, deskew_angle);
 }
@@ -754,8 +764,8 @@ TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const T
 }
 
 TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
-                                                                          BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
-                                                                          BOOL* is_smallcaps, int* pointsize, int* font_id)
+                                                                    BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
+                                                                    BOOL* is_smallcaps, int* pointsize, int* font_id)
 {
     bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps;
     const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif,
diff --git a/src/api/capi.h b/src/api/capi.h
index ba4445b547..85908c78b0 100644
--- a/src/api/capi.h
+++ b/src/api/capi.h
@@ -56,6 +56,7 @@ extern "C" {
 typedef tesseract::TessResultRenderer TessResultRenderer;
 typedef tesseract::TessTextRenderer TessTextRenderer;
 typedef tesseract::TessHOcrRenderer TessHOcrRenderer;
+typedef tesseract::TessAltoRenderer TessAltoRenderer;
 typedef tesseract::TessPDFRenderer TessPDFRenderer;
 typedef tesseract::TessUnlvRenderer TessUnlvRenderer;
 typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer;
@@ -117,7 +118,7 @@ struct Pixa;
 /* General free functions */
 
 TESS_API const char*
-               TESS_CALL TessVersion();
+TESS_CALL TessVersion();
 TESS_API void  TESS_CALL TessDeleteText(char* text);
 TESS_API void  TESS_CALL TessDeleteTextArray(char** arr);
 TESS_API void  TESS_CALL TessDeleteIntArray(int* arr);
@@ -125,7 +126,9 @@ TESS_API void  TESS_CALL TessDeleteIntArray(int* arr);
 /* Renderer API */
 TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase);
 TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
+TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase);
 TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
+TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info);
 TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
                                                              BOOL textonly);
 TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
@@ -134,7 +137,7 @@ TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* out
 TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer);
 TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next);
 TESS_API TessResultRenderer*
-              TESS_CALL TessResultRendererNext(TessResultRenderer* renderer);
+TESS_CALL TessResultRendererNext(TessResultRenderer* renderer);
 TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title);
 TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api);
 TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer);
@@ -146,7 +149,7 @@ TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer);
 /* Base API */
 
 TESS_API TessBaseAPI*
-               TESS_CALL TessBaseAPICreate();
+TESS_CALL TessBaseAPICreate();
 TESS_API void  TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle);
 
 TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device);
@@ -169,7 +172,7 @@ TESS_API BOOL  TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, co
 TESS_API BOOL  TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value);
 TESS_API BOOL  TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value);
 TESS_API const char*
-               TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
+TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
 
 TESS_API void  TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp);
 TESS_API BOOL  TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename);
@@ -191,16 +194,16 @@ TESS_API int   TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datap
 TESS_API int   TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language);
 
 TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode,
-    char** configs, int configs_size,
-    char** vars_vec, char** vars_values, size_t vars_vec_size,
-    BOOL set_only_non_debug_params);
+                                        char** configs, int configs_size,
+                                        char** vars_vec, char** vars_values, size_t vars_vec_size,
+                                        BOOL set_only_non_debug_params);
 
 TESS_API const char*
-               TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
 TESS_API char**
-               TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
 TESS_API char**
-               TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
 
 TESS_API int   TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language);
 TESS_API void  TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
@@ -210,7 +213,7 @@ TESS_API void  TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, con
 
 TESS_API void  TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode);
 TESS_API TessPageSegMode
-               TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
 
 TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
                                          int bytes_per_pixel, int bytes_per_line,
@@ -231,32 +234,32 @@ TESS_API void  TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImag
 #endif
 
 TESS_API struct Pix*
-               TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa);
+TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
+TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
-                                                  struct Pixa** pixa, int** blockids, int** paraids);
+TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
+                                   struct Pixa** pixa, int** blockids, int** paraids);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
+TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa);
+TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
+TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
-                                                       struct Pixa** pixa, int** blockids);
+TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+                                        struct Pixa** pixa, int** blockids);
 TESS_API struct Boxa*
-               TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
-                                                        const BOOL raw_image, const int raw_padding,
-                                                        struct Pixa** pixa, int** blockids, int** paraids);
+TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+                                         const BOOL raw_image, const int raw_padding,
+                                         struct Pixa** pixa, int** blockids, int** paraids);
 
 TESS_API int   TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle);
 
 TESS_API TessPageIterator*
-               TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
+TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
 
 TESS_API int   TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
 
@@ -267,15 +270,16 @@ TESS_API int   TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ET
 TESS_API BOOL  TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle,  const char* filename, const char* retry_config,
                                                  int timeout_millisec, TessResultRenderer* renderer);
 TESS_API BOOL  TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename,
-                                               const char* retry_config, int timeout_millisec, TessResultRenderer* renderer);
+                                                const char* retry_config, int timeout_millisec, TessResultRenderer* renderer);
 
 TESS_API TessResultIterator*
-               TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle);
 TESS_API TessMutableIterator*
-               TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
+TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
 
 TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
 TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
+TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number);
 
 TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number);
 
@@ -310,19 +314,19 @@ TESS_API BOOL  TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
 #endif  // def TESS_CAPI_INCLUDE_BASEAPI
 
 TESS_API const char*
-               TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
+TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
 
 TESS_API void  TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);
 
 #ifdef TESS_CAPI_INCLUDE_BASEAPI
 
 TESS_API const TessDawg*
-               TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
+TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
 
 TESS_API int   TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle);
 
 TESS_API TessOcrEngineMode
-               TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
+TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
 
 TESS_API void  TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb);
 
@@ -335,7 +339,7 @@ TESS_API void  TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, i
 TESS_API void  TESS_CALL TessPageIteratorDelete(TessPageIterator* handle);
 
 TESS_API TessPageIterator*
-               TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle);
+TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle);
 
 TESS_API void  TESS_CALL TessPageIteratorBegin(TessPageIterator* handle);
 
@@ -350,14 +354,14 @@ TESS_API BOOL  TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* han
                                                      int* left, int* top, int* right, int* bottom);
 
 TESS_API TessPolyBlockType
-               TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle);
+TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle);
 
 TESS_API struct Pix*
-               TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level);
+TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level);
 
 TESS_API struct Pix*
-               TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
-                                                  struct Pix* original_image, int* left, int* top);
+TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
+                                   struct Pix* original_image, int* left, int* top);
 
 TESS_API BOOL  TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
                                                   int* x1, int* y1, int* x2, int* y2);
@@ -373,23 +377,23 @@ TESS_API void  TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle,
 
 TESS_API void  TESS_CALL TessResultIteratorDelete(TessResultIterator* handle);
 TESS_API TessResultIterator*
-               TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle);
+TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle);
 TESS_API TessPageIterator*
-               TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
+TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
 TESS_API const TessPageIterator*
-               TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
+TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
 TESS_API TessChoiceIterator*
-               TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
+TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
 
 TESS_API BOOL  TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level);
 TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level);
 TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level);
 TESS_API const char*
-               TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
+TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
 TESS_API const char*
-               TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
-                                                              BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
-                                                              BOOL* is_smallcaps, int* pointsize, int* font_id);
+TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
+                                               BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
+                                               BOOL* is_smallcaps, int* pointsize, int* font_id);
 
 TESS_API BOOL  TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
 TESS_API BOOL  TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
@@ -430,12 +434,12 @@ TESS_API void  TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, T
 TESS_API ROW*  TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender);
 
 TESS_API TBLOB*
-               TESS_CALL TessMakeTBLOB(Pix* pix);
+TESS_CALL TessMakeTBLOB(Pix* pix);
 
 TESS_API void  TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
 
 TESS_API BLOCK_LIST*
-               TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
+TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
 
 TESS_API void  TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
 
diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp
index af31be8e59..277bc47389 100644
--- a/src/api/renderer.cpp
+++ b/src/api/renderer.cpp
@@ -30,250 +30,298 @@ namespace tesseract {
 /**********************************************************************
  * Base Renderer interface implementation
  **********************************************************************/
-TessResultRenderer::TessResultRenderer(const char *outputbase,
-                                       const char* extension)
-    : file_extension_(extension),
-      title_(""), imagenum_(-1),
-      fout_(stdout),
-      next_(nullptr),
-      happy_(true) {
-  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
-    STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
-    fout_ = fopen(outfile.string(), "wb");
-    if (fout_ == nullptr) {
-      happy_ = false;
+    TessResultRenderer::TessResultRenderer(const char *outputbase,
+                                           const char* extension)
+            : file_extension_(extension),
+              title_(""), imagenum_(-1),
+              fout_(stdout),
+              next_(nullptr),
+              happy_(true) {
+      if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
+        STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
+        fout_ = fopen(outfile.string(), "wb");
+        if (fout_ == nullptr) {
+          happy_ = false;
+        }
+      }
     }
-  }
-}
-
-TessResultRenderer::~TessResultRenderer() {
-  if (fout_ != nullptr) {
-    if (fout_ != stdout)
-      fclose(fout_);
-    else
-      clearerr(fout_);
-  }
-  delete next_;
-}
-
-void TessResultRenderer::insert(TessResultRenderer* next) {
-  if (next == nullptr) return;
-
-  TessResultRenderer* remainder = next_;
-  next_ = next;
-  if (remainder) {
-    while (next->next_ != nullptr) {
-      next = next->next_;
+
+    TessResultRenderer::~TessResultRenderer() {
+      if (fout_ != nullptr) {
+        if (fout_ != stdout)
+          fclose(fout_);
+        else
+          clearerr(fout_);
+      }
+      delete next_;
+    }
+
+    void TessResultRenderer::insert(TessResultRenderer* next) {
+      if (next == nullptr) return;
+
+      TessResultRenderer* remainder = next_;
+      next_ = next;
+      if (remainder) {
+        while (next->next_ != nullptr) {
+          next = next->next_;
+        }
+        next->next_ = remainder;
+      }
+    }
+
+    bool TessResultRenderer::BeginDocument(const char* title) {
+      if (!happy_) return false;
+      title_ = title;
+      imagenum_ = -1;
+      bool ok = BeginDocumentHandler();
+      if (next_) {
+        ok = next_->BeginDocument(title) && ok;
+      }
+      return ok;
+    }
+
+    bool TessResultRenderer::AddImage(TessBaseAPI* api) {
+      if (!happy_) return false;
+      ++imagenum_;
+      bool ok = AddImageHandler(api);
+      if (next_) {
+        ok = next_->AddImage(api) && ok;
+      }
+      return ok;
+    }
+
+    bool TessResultRenderer::EndDocument() {
+      if (!happy_) return false;
+      bool ok = EndDocumentHandler();
+      if (next_) {
+        ok = next_->EndDocument() && ok;
+      }
+      return ok;
+    }
+
+    void TessResultRenderer::AppendString(const char* s) {
+      AppendData(s, strlen(s));
+    }
+
+    void TessResultRenderer::AppendData(const char* s, int len) {
+      if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
+    }
+
+    bool TessResultRenderer::BeginDocumentHandler() {
+      return happy_;
+    }
+
+    bool TessResultRenderer::EndDocumentHandler() {
+      return happy_;
     }
-    next->next_ = remainder;
-  }
-}
-
-bool TessResultRenderer::BeginDocument(const char* title) {
-  if (!happy_) return false;
-  title_ = title;
-  imagenum_ = -1;
-  bool ok = BeginDocumentHandler();
-  if (next_) {
-    ok = next_->BeginDocument(title) && ok;
-  }
-  return ok;
-}
-
-bool TessResultRenderer::AddImage(TessBaseAPI* api) {
-  if (!happy_) return false;
-  ++imagenum_;
-  bool ok = AddImageHandler(api);
-  if (next_) {
-    ok = next_->AddImage(api) && ok;
-  }
-  return ok;
-}
-
-bool TessResultRenderer::EndDocument() {
-  if (!happy_) return false;
-  bool ok = EndDocumentHandler();
-  if (next_) {
-    ok = next_->EndDocument() && ok;
-  }
-  return ok;
-}
-
-void TessResultRenderer::AppendString(const char* s) {
-  AppendData(s, strlen(s));
-}
-
-void TessResultRenderer::AppendData(const char* s, int len) {
-  if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
-}
-
-bool TessResultRenderer::BeginDocumentHandler() {
-  return happy_;
-}
-
-bool TessResultRenderer::EndDocumentHandler() {
-  return happy_;
-}
 
 
 /**********************************************************************
  * UTF8 Text Renderer interface implementation
  **********************************************************************/
-TessTextRenderer::TessTextRenderer(const char *outputbase)
-    : TessResultRenderer(outputbase, "txt") {
-}
+    TessTextRenderer::TessTextRenderer(const char *outputbase)
+            : TessResultRenderer(outputbase, "txt") {
+    }
 
-bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
-  const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
-  if (utf8 == nullptr) {
-    return false;
-  }
+    bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
+      const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
+      if (utf8 == nullptr) {
+        return false;
+      }
 
-  AppendString(utf8.get());
+      AppendString(utf8.get());
 
-  const char* pageSeparator = api->GetStringVariable("page_separator");
-  if (pageSeparator != nullptr && *pageSeparator != '\0') {
-    AppendString(pageSeparator);
-  }
+      const char* pageSeparator = api->GetStringVariable("page_separator");
+      if (pageSeparator != nullptr && *pageSeparator != '\0') {
+        AppendString(pageSeparator);
+      }
 
-  return true;
-}
+      return true;
+    }
 
 /**********************************************************************
  * HOcr Text Renderer interface implementation
  **********************************************************************/
-TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
-    : TessResultRenderer(outputbase, "hocr") {
-    font_info_ = false;
-}
-
-TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
-    : TessResultRenderer(outputbase, "hocr") {
-    font_info_ = font_info;
-}
-
-bool TessHOcrRenderer::BeginDocumentHandler() {
-  AppendString(
-      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-      "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
-      "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
-      "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
-      "lang=\"en\">\n <head>\n  <title>");
-  AppendString(title());
-  AppendString(
-      "</title>\n"
-      "<meta http-equiv=\"Content-Type\" content=\"text/html;"
-      "charset=utf-8\" />\n"
-      "  <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
+    TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
+            : TessResultRenderer(outputbase, "hocr") {
+      font_info_ = false;
+    }
+
+    TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
+            : TessResultRenderer(outputbase, "hocr") {
+      font_info_ = font_info;
+    }
+
+    bool TessHOcrRenderer::BeginDocumentHandler() {
+      AppendString(
+              "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+              "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
+              "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+              "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
+              "lang=\"en\">\n <head>\n  <title>");
+      AppendString(title());
+      AppendString(
+              "</title>\n"
+              "<meta http-equiv=\"Content-Type\" content=\"text/html;"
+              "charset=utf-8\" />\n"
+              "  <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
               "' />\n"
-      "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
-      " ocr_line ocrx_word ocrp_wconf");
-  if (font_info_)
-    AppendString(
-      " ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
-  AppendString(
-      "'/>\n"
-      "</head>\n<body>\n");
+              "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
+              " ocr_line ocrx_word ocrp_wconf");
+      if (font_info_)
+        AppendString(
+                " ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
+      AppendString(
+              "'/>\n"
+              "</head>\n<body>\n");
+
+      return true;
+    }
 
-  return true;
-}
+    bool TessHOcrRenderer::EndDocumentHandler() {
+      AppendString(" </body>\n</html>\n");
 
-bool TessHOcrRenderer::EndDocumentHandler() {
-  AppendString(" </body>\n</html>\n");
+      return true;
+    }
+
+    bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
+      const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
+      if (hocr == nullptr) return false;
+
+      AppendString(hocr.get());
+
+      return true;
+    }
+
+/**********************************************************************
+ * Alto Text Renderer interface implementation
+ **********************************************************************/
+    TessAltoRenderer::TessAltoRenderer(const char *outputbase)
+            : TessResultRenderer(outputbase, "alto") {
+    }
 
-  return true;
-}
+    bool TessAltoRenderer::BeginDocumentHandler() {
+      AppendString(
+              "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+              "<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd\">\n"
+              "\t<Description>\n"
+              "\t\t<MeasurementUnit>pixel</MeasurementUnit>\n"
+              "\t\t<sourceImageInformation>\n"
+              "\t\t\t<fileName>");
+
+      AppendString(title());
+
+      AppendString("\t\t\t</fileName>\n"
+                   "\t\t</sourceImageInformation>\n"
+                   "\t\t<OCRProcessing ID=\"OCR_0\">\n"
+                   "\t\t\t<ocrProcessingStep>\n"
+                   "\t\t\t\t<processingSoftware>\n"
+                   "\t\t\t\t\t<softwareName>tesseract 4.0.0</softwareName>\n"
+                   "\t\t\t\t</processingSoftware>\n"
+                   "\t\t\t</ocrProcessingStep>\n"
+                   "\t\t</OCRProcessing>\n"
+                   "\t</Description>\n"
+                   "\t<Layout>\n");
+
+      return true;
+    }
+
+    bool TessAltoRenderer::EndDocumentHandler() {
+      AppendString("\t</Layout>\n</alto>\n");
 
-bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
-  const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
-  if (hocr == nullptr) return false;
+      return true;
+    }
 
-  AppendString(hocr.get());
+    bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) {
+      const std::unique_ptr<const char[]> hocr(api->GetAltoText(imagenum()));
+      if (hocr == nullptr) return false;
 
-  return true;
-}
+      AppendString(hocr.get());
+
+      return true;
+    }
 
 /**********************************************************************
  * TSV Text Renderer interface implementation
  **********************************************************************/
-TessTsvRenderer::TessTsvRenderer(const char* outputbase)
-    : TessResultRenderer(outputbase, "tsv") {
-  font_info_ = false;
-}
+    TessTsvRenderer::TessTsvRenderer(const char* outputbase)
+            : TessResultRenderer(outputbase, "tsv") {
+      font_info_ = false;
+    }
 
-TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
-    : TessResultRenderer(outputbase, "tsv") {
-  font_info_ = font_info;
-}
+    TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
+            : TessResultRenderer(outputbase, "tsv") {
+      font_info_ = font_info;
+    }
 
-bool TessTsvRenderer::BeginDocumentHandler() {
-  // Output TSV column headings
-  AppendString(
-      "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
-      "num\tleft\ttop\twidth\theight\tconf\ttext\n");
-  return true;
-}
+    bool TessTsvRenderer::BeginDocumentHandler() {
+      // Output TSV column headings
+      AppendString(
+              "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
+              "num\tleft\ttop\twidth\theight\tconf\ttext\n");
+      return true;
+    }
 
-bool TessTsvRenderer::EndDocumentHandler() { return true; }
+    bool TessTsvRenderer::EndDocumentHandler() { return true; }
 
-bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
-  const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
-  if (tsv == nullptr) return false;
+    bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
+      const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
+      if (tsv == nullptr) return false;
 
-  AppendString(tsv.get());
+      AppendString(tsv.get());
 
-  return true;
-}
+      return true;
+    }
 
 /**********************************************************************
  * UNLV Text Renderer interface implementation
  **********************************************************************/
-TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
-    : TessResultRenderer(outputbase, "unlv") {
-}
+    TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
+            : TessResultRenderer(outputbase, "unlv") {
+    }
 
-bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
-  const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
-  if (unlv == nullptr) return false;
+    bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
+      const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
+      if (unlv == nullptr) return false;
 
-  AppendString(unlv.get());
+      AppendString(unlv.get());
 
-  return true;
-}
+      return true;
+    }
 
 /**********************************************************************
  * BoxText Renderer interface implementation
  **********************************************************************/
-TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
-    : TessResultRenderer(outputbase, "box") {
-}
+    TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
+            : TessResultRenderer(outputbase, "box") {
+    }
 
-bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
-  const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
-  if (text == nullptr) return false;
+    bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
+      const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
+      if (text == nullptr) return false;
 
-  AppendString(text.get());
+      AppendString(text.get());
 
-  return true;
-}
+      return true;
+    }
 
 #ifndef DISABLED_LEGACY_ENGINE
 
 /**********************************************************************
  * Osd Text Renderer interface implementation
  **********************************************************************/
-TessOsdRenderer::TessOsdRenderer(const char* outputbase)
-    : TessResultRenderer(outputbase, "osd") {}
+    TessOsdRenderer::TessOsdRenderer(const char* outputbase)
+            : TessResultRenderer(outputbase, "osd") {}
 
-bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
-  char* osd = api->GetOsdText(imagenum());
-  if (osd == nullptr) return false;
+    bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
+      char* osd = api->GetOsdText(imagenum());
+      if (osd == nullptr) return false;
 
-  AppendString(osd);
-  delete[] osd;
+      AppendString(osd);
+      delete[] osd;
 
-  return true;
-}
+      return true;
+    }
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
diff --git a/src/api/renderer.h b/src/api/renderer.h
index 6c753403cc..cb91f3e005 100644
--- a/src/api/renderer.h
+++ b/src/api/renderer.h
@@ -27,7 +27,7 @@
 
 namespace tesseract {
 
-class TessBaseAPI;
+    class TessBaseAPI;
 
 /**
  * Interface for rendering tesseract results into a document, such as text,
@@ -42,218 +42,232 @@ class TessBaseAPI;
  * renderers can manage the associated state needed for the specific formats
  * in addition to the heuristics for producing it.
  */
-class TESS_API TessResultRenderer {
-  public:
-    virtual ~TessResultRenderer();
-
-    // Takes ownership of pointer so must be new'd instance.
-    // Renderers aren't ordered, but appends the sequences of next parameter
-    // and existing next(). The renderers should be unique across both lists.
-    void insert(TessResultRenderer* next);
-
-    // Returns the next renderer or nullptr.
-    TessResultRenderer* next() { return next_; }
-
-    /**
-     * Starts a new document with the given title.
-     * This clears the contents of the output data.
-     * Title should use UTF-8 encoding.
-     */
-    bool BeginDocument(const char* title);
-
-    /**
-     * Adds the recognized text from the source image to the current document.
-     * Invalid if BeginDocument not yet called.
-     *
-     * Note that this API is a bit weird but is designed to fit into the
-     * current TessBaseAPI implementation where the api has lots of state
-     * information that we might want to add in.
-     */
-    bool AddImage(TessBaseAPI* api);
-
-    /**
-     * Finishes the document and finalizes the output data
-     * Invalid if BeginDocument not yet called.
-     */
-    bool EndDocument();
-
-    const char* file_extension() const { return file_extension_; }
-    const char* title() const { return title_.c_str(); }
-
-    // Is everything fine? Otherwise something went wrong.
-    bool happy() { return happy_; }
-
-    /**
-     * Returns the index of the last image given to AddImage
-     * (i.e. images are incremented whether the image succeeded or not)
-     *
-     * This is always defined. It means either the number of the
-     * current image, the last image ended, or in the completed document
-     * depending on when in the document lifecycle you are looking at it.
-     * Will return -1 if a document was never started.
-     */
-    int imagenum() const { return imagenum_; }
-
-  protected:
-    /**
-     * Called by concrete classes.
-     *
-     * outputbase is the name of the output file excluding
-     * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-     *
-     * extension indicates the file extension to be used for output
-     * files. For example "pdf" will produce a .pdf file, and "hocr"
-     * will produce .hocr files.
-     */
-    TessResultRenderer(const char *outputbase,
-                       const char* extension);
-
-    // Hook for specialized handling in BeginDocument()
-    virtual bool BeginDocumentHandler();
-
-    // This must be overridden to render the OCR'd results
-    virtual bool AddImageHandler(TessBaseAPI* api) = 0;
-
-    // Hook for specialized handling in EndDocument()
-    virtual bool EndDocumentHandler();
-
-    // Renderers can call this to append '\0' terminated strings into
-    // the output string returned by GetOutput.
-    // This method will grow the output buffer if needed.
-    void AppendString(const char* s);
-
-    // Renderers can call this to append binary byte sequences into
-    // the output string returned by GetOutput. Note that s is not necessarily
-    // '\0' terminated (and can contain '\0' within it).
-    // This method will grow the output buffer if needed.
-    void AppendData(const char* s, int len);
-
-  private:
-    const char* file_extension_;  // standard extension for generated output
-    STRING title_;                // title of document being renderered
-    int imagenum_;                // index of last image added
-
-    FILE* fout_;                  // output file pointer
-    TessResultRenderer* next_;    // Can link multiple renderers together
-    bool happy_;                  // I get grumpy when the disk fills up, etc.
-};
+    class TESS_API TessResultRenderer {
+    public:
+        virtual ~TessResultRenderer();
+
+        // Takes ownership of pointer so must be new'd instance.
+        // Renderers aren't ordered, but appends the sequences of next parameter
+        // and existing next(). The renderers should be unique across both lists.
+        void insert(TessResultRenderer* next);
+
+        // Returns the next renderer or nullptr.
+        TessResultRenderer* next() { return next_; }
+
+        /**
+         * Starts a new document with the given title.
+         * This clears the contents of the output data.
+         * Title should use UTF-8 encoding.
+         */
+        bool BeginDocument(const char* title);
+
+        /**
+         * Adds the recognized text from the source image to the current document.
+         * Invalid if BeginDocument not yet called.
+         *
+         * Note that this API is a bit weird but is designed to fit into the
+         * current TessBaseAPI implementation where the api has lots of state
+         * information that we might want to add in.
+         */
+        bool AddImage(TessBaseAPI* api);
+
+        /**
+         * Finishes the document and finalizes the output data
+         * Invalid if BeginDocument not yet called.
+         */
+        bool EndDocument();
+
+        const char* file_extension() const { return file_extension_; }
+        const char* title() const { return title_.c_str(); }
+
+        // Is everything fine? Otherwise something went wrong.
+        bool happy() { return happy_; }
+
+        /**
+         * Returns the index of the last image given to AddImage
+         * (i.e. images are incremented whether the image succeeded or not)
+         *
+         * This is always defined. It means either the number of the
+         * current image, the last image ended, or in the completed document
+         * depending on when in the document lifecycle you are looking at it.
+         * Will return -1 if a document was never started.
+         */
+        int imagenum() const { return imagenum_; }
+
+    protected:
+        /**
+         * Called by concrete classes.
+         *
+         * outputbase is the name of the output file excluding
+         * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
+         *
+         * extension indicates the file extension to be used for output
+         * files. For example "pdf" will produce a .pdf file, and "hocr"
+         * will produce .hocr files.
+         */
+        TessResultRenderer(const char *outputbase,
+                           const char* extension);
+
+        // Hook for specialized handling in BeginDocument()
+        virtual bool BeginDocumentHandler();
+
+        // This must be overridden to render the OCR'd results
+        virtual bool AddImageHandler(TessBaseAPI* api) = 0;
+
+        // Hook for specialized handling in EndDocument()
+        virtual bool EndDocumentHandler();
+
+        // Renderers can call this to append '\0' terminated strings into
+        // the output string returned by GetOutput.
+        // This method will grow the output buffer if needed.
+        void AppendString(const char* s);
+
+        // Renderers can call this to append binary byte sequences into
+        // the output string returned by GetOutput. Note that s is not necessarily
+        // '\0' terminated (and can contain '\0' within it).
+        // This method will grow the output buffer if needed.
+        void AppendData(const char* s, int len);
+
+    private:
+        const char* file_extension_;  // standard extension for generated output
+        STRING title_;                // title of document being renderered
+        int imagenum_;                // index of last image added
+
+        FILE* fout_;                  // output file pointer
+        TessResultRenderer* next_;    // Can link multiple renderers together
+        bool happy_;                  // I get grumpy when the disk fills up, etc.
+    };
 
 /**
  * Renders tesseract output into a plain UTF-8 text string
  */
-class TESS_API TessTextRenderer : public TessResultRenderer {
- public:
-  explicit TessTextRenderer(const char *outputbase);
+    class TESS_API TessTextRenderer : public TessResultRenderer {
+    public:
+        explicit TessTextRenderer(const char *outputbase);
 
- protected:
-  virtual bool AddImageHandler(TessBaseAPI* api);
-};
+    protected:
+        virtual bool AddImageHandler(TessBaseAPI* api);
+    };
 
 /**
  * Renders tesseract output into an hocr text string
  */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
- public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
+    class TESS_API TessHOcrRenderer : public TessResultRenderer {
+    public:
+        explicit TessHOcrRenderer(const char *outputbase, bool font_info);
+        explicit TessHOcrRenderer(const char *outputbase);
 
- protected:
-  virtual bool BeginDocumentHandler();
-  virtual bool AddImageHandler(TessBaseAPI* api);
-  virtual bool EndDocumentHandler();
+    protected:
+        virtual bool BeginDocumentHandler();
+        virtual bool AddImageHandler(TessBaseAPI* api);
+        virtual bool EndDocumentHandler();
 
- private:
-  bool font_info_;  // whether to print font information
-};
+    private:
+        bool font_info_;  // whether to print font information
+    };
+
+/**
+ * Renders tesseract output into an alto text string
+ */
+    class TESS_API TessAltoRenderer : public TessResultRenderer {
+    public:
+        explicit TessAltoRenderer(const char *outputbase);
+
+    protected:
+        virtual bool BeginDocumentHandler();
+        virtual bool AddImageHandler(TessBaseAPI* api);
+        virtual bool EndDocumentHandler();
+
+    };
 
 /**
  * Renders Tesseract output into a TSV string
  */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
- public:
-  explicit TessTsvRenderer(const char* outputbase, bool font_info);
-  explicit TessTsvRenderer(const char* outputbase);
+    class TESS_API TessTsvRenderer : public TessResultRenderer {
+    public:
+        explicit TessTsvRenderer(const char* outputbase, bool font_info);
+        explicit TessTsvRenderer(const char* outputbase);
 
- protected:
-  virtual bool BeginDocumentHandler();
-  virtual bool AddImageHandler(TessBaseAPI* api);
-  virtual bool EndDocumentHandler();
+    protected:
+        virtual bool BeginDocumentHandler();
+        virtual bool AddImageHandler(TessBaseAPI* api);
+        virtual bool EndDocumentHandler();
 
- private:
-  bool font_info_;              // whether to print font information
-};
+    private:
+        bool font_info_;              // whether to print font information
+    };
 
 /**
  * Renders tesseract output into searchable PDF
  */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
- public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false);
-
- protected:
-  virtual bool BeginDocumentHandler();
-  virtual bool AddImageHandler(TessBaseAPI* api);
-  virtual bool EndDocumentHandler();
-
- private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                     // counter for PDF objects
-  GenericVector<long int> offsets_;  // offset of every PDF object in bytes
-  GenericVector<long int> pages_;    // object number for every /Page object
-  std::string datadir_;              // where to find the custom font
-  bool textonly_;                    // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
-                          char** pdf_object, long int* pdf_object_size, const int jpg_quality);
-};
+    class TESS_API TessPDFRenderer : public TessResultRenderer {
+    public:
+        // datadir is the location of the TESSDATA. We need it because
+        // we load a custom PDF font from this location.
+        TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false);
+
+    protected:
+        virtual bool BeginDocumentHandler();
+        virtual bool AddImageHandler(TessBaseAPI* api);
+        virtual bool EndDocumentHandler();
+
+    private:
+        // We don't want to have every image in memory at once,
+        // so we store some metadata as we go along producing
+        // PDFs one page at a time. At the end, that metadata is
+        // used to make everything that isn't easily handled in a
+        // streaming fashion.
+        long int obj_;                     // counter for PDF objects
+        GenericVector<long int> offsets_;  // offset of every PDF object in bytes
+        GenericVector<long int> pages_;    // object number for every /Page object
+        std::string datadir_;              // where to find the custom font
+        bool textonly_;                    // skip images if set
+        // Bookkeeping only. DIY = Do It Yourself.
+        void AppendPDFObjectDIY(size_t objectsize);
+        // Bookkeeping + emit data.
+        void AppendPDFObject(const char *data);
+        // Create the /Contents object for an entire page.
+        char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
+        // Turn an image into a PDF object. Only transcode if we have to.
+        static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
+                                  char** pdf_object, long int* pdf_object_size, const int jpg_quality);
+    };
 
 
 /**
  * Renders tesseract output into a plain UTF-8 text string
  */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
- public:
-  explicit TessUnlvRenderer(const char *outputbase);
+    class TESS_API TessUnlvRenderer : public TessResultRenderer {
+    public:
+        explicit TessUnlvRenderer(const char *outputbase);
 
- protected:
-  virtual bool AddImageHandler(TessBaseAPI* api);
-};
+    protected:
+        virtual bool AddImageHandler(TessBaseAPI* api);
+    };
 
 /**
  * Renders tesseract output into a plain UTF-8 text string
  */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
- public:
-  explicit TessBoxTextRenderer(const char *outputbase);
+    class TESS_API TessBoxTextRenderer : public TessResultRenderer {
+    public:
+        explicit TessBoxTextRenderer(const char *outputbase);
 
- protected:
-  virtual bool AddImageHandler(TessBaseAPI* api);
-};
+    protected:
+        virtual bool AddImageHandler(TessBaseAPI* api);
+    };
 
 #ifndef DISABLED_LEGACY_ENGINE
 
 /**
  * Renders tesseract output into an osd text string
  */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
- public:
-  explicit TessOsdRenderer(const char* outputbase);
+    class TESS_API TessOsdRenderer : public TessResultRenderer {
+    public:
+        explicit TessOsdRenderer(const char* outputbase);
 
- protected:
-  virtual bool AddImageHandler(TessBaseAPI* api);
-};
+    protected:
+        virtual bool AddImageHandler(TessBaseAPI* api);
+    };
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp
index 5fd2cf6339..a2b1c4c185 100644
--- a/src/api/tesseractmain.cpp
+++ b/src/api/tesseractmain.cpp
@@ -66,20 +66,20 @@ static void Win32WarningHandler(const char* module, const char* fmt,
 #endif   // _WIN32
 
 static void PrintVersionInfo() {
-  char* versionStrP;
+    char* versionStrP;
 
-  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
+    printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
 
-  versionStrP = getLeptonicaVersion();
-  printf(" %s\n", versionStrP);
-  lept_free(versionStrP);
+    versionStrP = getLeptonicaVersion();
+    printf(" %s\n", versionStrP);
+    lept_free(versionStrP);
 
-  versionStrP = getImagelibVersions();
-  printf("  %s\n", versionStrP);
-  lept_free(versionStrP);
+    versionStrP = getImagelibVersions();
+    printf("  %s\n", versionStrP);
+    lept_free(versionStrP);
 
 #ifdef USE_OPENCL
-  cl_platform_id platform[4];
+    cl_platform_id platform[4];
   cl_uint num_platforms;
 
   printf(" OpenCL info:\n");
@@ -118,155 +118,155 @@ static void PrintVersionInfo() {
 }
 
 static void PrintHelpForPSM() {
-  const char* msg =
-      "Page segmentation modes:\n"
-      "  0    Orientation and script detection (OSD) only.\n"
-      "  1    Automatic page segmentation with OSD.\n"
-      "  2    Automatic page segmentation, but no OSD, or OCR.\n"
-      "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
-      "  4    Assume a single column of text of variable sizes.\n"
-      "  5    Assume a single uniform block of vertically aligned text.\n"
-      "  6    Assume a single uniform block of text.\n"
-      "  7    Treat the image as a single text line.\n"
-      "  8    Treat the image as a single word.\n"
-      "  9    Treat the image as a single word in a circle.\n"
-      " 10    Treat the image as a single character.\n"
-      " 11    Sparse text. Find as much text as possible in no"
-      " particular order.\n"
-      " 12    Sparse text with OSD.\n"
-      " 13    Raw line. Treat the image as a single text line,\n"
-      "       bypassing hacks that are Tesseract-specific.\n";
+    const char* msg =
+            "Page segmentation modes:\n"
+            "  0    Orientation and script detection (OSD) only.\n"
+            "  1    Automatic page segmentation with OSD.\n"
+            "  2    Automatic page segmentation, but no OSD, or OCR.\n"
+            "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
+            "  4    Assume a single column of text of variable sizes.\n"
+            "  5    Assume a single uniform block of vertically aligned text.\n"
+            "  6    Assume a single uniform block of text.\n"
+            "  7    Treat the image as a single text line.\n"
+            "  8    Treat the image as a single word.\n"
+            "  9    Treat the image as a single word in a circle.\n"
+            " 10    Treat the image as a single character.\n"
+            " 11    Sparse text. Find as much text as possible in no"
+            " particular order.\n"
+            " 12    Sparse text with OSD.\n"
+            " 13    Raw line. Treat the image as a single text line,\n"
+            "       bypassing hacks that are Tesseract-specific.\n";
 
 #ifdef DISABLED_LEGACY_ENGINE
-  const char* disabled_osd_msg =
+    const char* disabled_osd_msg =
       "\nNOTE: The OSD modes are currently disabled.\n";
   printf("%s%s", msg, disabled_osd_msg);
 #else
-  printf("%s", msg);
+    printf("%s", msg);
 #endif
 }
 
 #ifndef DISABLED_LEGACY_ENGINE
 static void PrintHelpForOEM() {
-  const char* msg =
-      "OCR Engine modes:\n"
-      "  0    Legacy engine only.\n"
-      "  1    Neural nets LSTM engine only.\n"
-      "  2    Legacy + LSTM engines.\n"
-      "  3    Default, based on what is available.\n";
-
-  printf("%s", msg);
+    const char* msg =
+            "OCR Engine modes:\n"
+            "  0    Legacy engine only.\n"
+            "  1    Neural nets LSTM engine only.\n"
+            "  2    Legacy + LSTM engines.\n"
+            "  3    Default, based on what is available.\n";
+
+    printf("%s", msg);
 }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 static void PrintHelpExtra(const char* program) {
-  printf(
-      "Usage:\n"
-      "  %s --help | --help-extra | --help-psm | "
-#ifndef DISABLED_LEGACY_ENGINE
-      "--help-oem | "
-#endif
-      "--version\n"
-      "  %s --list-langs [--tessdata-dir PATH]\n"
-      "  %s --print-parameters [options...] [configfile...]\n"
-      "  %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
-      "\n"
-      "OCR options:\n"
-      "  --tessdata-dir PATH   Specify the location of tessdata path.\n"
-      "  --user-words PATH     Specify the location of user words file.\n"
-      "  --user-patterns PATH  Specify the location of user patterns file.\n"
-      "  --dpi VALUE           Specify DPI for input image.\n"
-      "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
-      "  -c VAR=VALUE          Set value for config variables.\n"
-      "                        Multiple -c arguments are allowed.\n"
-      "  --psm NUM             Specify page segmentation mode.\n"
-#ifndef DISABLED_LEGACY_ENGINE
-      "  --oem NUM             Specify OCR Engine mode.\n"
-#endif
-      "NOTE: These options must occur before any configfile.\n"
-      "\n",
-      program, program, program, program
-  );
-
-  PrintHelpForPSM();
+    printf(
+            "Usage:\n"
+            "  %s --help | --help-extra | --help-psm | "
+            #ifndef DISABLED_LEGACY_ENGINE
+            "--help-oem | "
+            #endif
+            "--version\n"
+            "  %s --list-langs [--tessdata-dir PATH]\n"
+            "  %s --print-parameters [options...] [configfile...]\n"
+            "  %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
+            "\n"
+            "OCR options:\n"
+            "  --tessdata-dir PATH   Specify the location of tessdata path.\n"
+            "  --user-words PATH     Specify the location of user words file.\n"
+            "  --user-patterns PATH  Specify the location of user patterns file.\n"
+            "  --dpi VALUE           Specify DPI for input image.\n"
+            "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
+            "  -c VAR=VALUE          Set value for config variables.\n"
+            "                        Multiple -c arguments are allowed.\n"
+            "  --psm NUM             Specify page segmentation mode.\n"
+            #ifndef DISABLED_LEGACY_ENGINE
+            "  --oem NUM             Specify OCR Engine mode.\n"
+            #endif
+            "NOTE: These options must occur before any configfile.\n"
+            "\n",
+            program, program, program, program
+    );
+
+    PrintHelpForPSM();
 #ifndef DISABLED_LEGACY_ENGINE
-  printf("\n");
-  PrintHelpForOEM();
+    printf("\n");
+    PrintHelpForOEM();
 #endif
 
-  printf(
-      "\n"
-      "Single options:\n"
-      "  -h, --help            Show minimal help message.\n"
-      "  --help-extra          Show extra help for advanced users.\n"
-      "  --help-psm            Show page segmentation modes.\n"
-#ifndef DISABLED_LEGACY_ENGINE
-      "  --help-oem            Show OCR Engine modes.\n"
-#endif
-      "  -v, --version         Show version information.\n"
-      "  --list-langs          List available languages for tesseract engine.\n"
-      "  --print-parameters    Print tesseract parameters.\n"
-  );
+    printf(
+            "\n"
+            "Single options:\n"
+            "  -h, --help            Show minimal help message.\n"
+            "  --help-extra          Show extra help for advanced users.\n"
+            "  --help-psm            Show page segmentation modes.\n"
+            #ifndef DISABLED_LEGACY_ENGINE
+            "  --help-oem            Show OCR Engine modes.\n"
+            #endif
+            "  -v, --version         Show version information.\n"
+            "  --list-langs          List available languages for tesseract engine.\n"
+            "  --print-parameters    Print tesseract parameters.\n"
+    );
 }
 
 static void PrintHelpMessage(const char* program) {
-  printf(
-      "Usage:\n"
-      "  %s --help | --help-extra | --version\n"
-      "  %s --list-langs\n"
-      "  %s imagename outputbase [options...] [configfile...]\n"
-      "\n"
-      "OCR options:\n"
-      "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
-      "NOTE: These options must occur before any configfile.\n"
-      "\n"
-      "Single options:\n"
-      "  --help                Show this help message.\n"
-      "  --help-extra          Show extra help for advanced users.\n"
-      "  --version             Show version information.\n"
-      "  --list-langs          List available languages for tesseract engine.\n",
-      program, program, program
-  );
+    printf(
+            "Usage:\n"
+            "  %s --help | --help-extra | --version\n"
+            "  %s --list-langs\n"
+            "  %s imagename outputbase [options...] [configfile...]\n"
+            "\n"
+            "OCR options:\n"
+            "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
+            "NOTE: These options must occur before any configfile.\n"
+            "\n"
+            "Single options:\n"
+            "  --help                Show this help message.\n"
+            "  --help-extra          Show extra help for advanced users.\n"
+            "  --version             Show version information.\n"
+            "  --list-langs          List available languages for tesseract engine.\n",
+            program, program, program
+    );
 }
 
 static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
                                    char** argv) {
-  char opt1[256], opt2[255];
-  for (int i = 0; i < argc; i++) {
-    if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
-      strncpy(opt1, argv[i + 1], 255);
-      opt1[255] = '\0';
-      char* p = strchr(opt1, '=');
-      if (!p) {
-        fprintf(stderr, "Missing = in configvar assignment\n");
-        exit(EXIT_FAILURE);
-      }
-      *p = 0;
-      strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
-      opt2[254] = 0;
-      ++i;
-
-      if (!api->SetVariable(opt1, opt2)) {
-        fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
-      }
+    char opt1[256], opt2[255];
+    for (int i = 0; i < argc; i++) {
+        if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
+            strncpy(opt1, argv[i + 1], 255);
+            opt1[255] = '\0';
+            char* p = strchr(opt1, '=');
+            if (!p) {
+                fprintf(stderr, "Missing = in configvar assignment\n");
+                exit(EXIT_FAILURE);
+            }
+            *p = 0;
+            strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
+            opt2[254] = 0;
+            ++i;
+
+            if (!api->SetVariable(opt1, opt2)) {
+                fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
+            }
+        }
     }
-  }
 }
 
 static void PrintLangsList(tesseract::TessBaseAPI* api) {
-  GenericVector<STRING> languages;
-  api->GetAvailableLanguagesAsVector(&languages);
-  printf("List of available languages (%d):\n", languages.size());
-  for (int index = 0; index < languages.size(); ++index) {
-    STRING& string = languages[index];
-    printf("%s\n", string.string());
-  }
-  api->End();
+    GenericVector<STRING> languages;
+    api->GetAvailableLanguagesAsVector(&languages);
+    printf("List of available languages (%d):\n", languages.size());
+    for (int index = 0; index < languages.size(); ++index) {
+        STRING& string = languages[index];
+        printf("%s\n", string.string());
+    }
+    api->End();
 }
 
 static void PrintBanner() {
-  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
-          tesseract::TessBaseAPI::Version());
+    tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
+            tesseract::TessBaseAPI::Version());
 }
 
 /**
@@ -285,15 +285,15 @@ static void PrintBanner() {
  */
 static void FixPageSegMode(tesseract::TessBaseAPI* api,
                            tesseract::PageSegMode pagesegmode) {
-  if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
-    api->SetPageSegMode(pagesegmode);
+    if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
+        api->SetPageSegMode(pagesegmode);
 }
 
 static void checkArgValues(int arg, const char* mode, int count) {
-  if (arg >= count || arg < 0) {
-    printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
-    exit(EXIT_SUCCESS);
-  }
+    if (arg >= count || arg < 0) {
+        printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
+        exit(EXIT_SUCCESS);
+    }
 }
 
 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
@@ -304,205 +304,219 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
                       GenericVector<STRING>* vars_values, l_int32* arg_i,
                       tesseract::PageSegMode* pagesegmode,
                       tesseract::OcrEngineMode* enginemode) {
-  bool noocr = false;
-  int i;
-  for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
-    if (*image != nullptr && *outputbase == nullptr) {
-      // outputbase follows image, don't allow options at that position.
-      *outputbase = argv[i];
-    } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
-      PrintHelpMessage(argv[0]);
-      noocr = true;
-    } else if (strcmp(argv[i], "--help-extra") == 0) {
-      PrintHelpExtra(argv[0]);
-      noocr = true;
-    } else if ((strcmp(argv[i], "--help-psm") == 0)) {
-      PrintHelpForPSM();
-      noocr = true;
+    bool noocr = false;
+    int i;
+    for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
+        if (*image != nullptr && *outputbase == nullptr) {
+            // outputbase follows image, don't allow options at that position.
+            *outputbase = argv[i];
+        } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
+            PrintHelpMessage(argv[0]);
+            noocr = true;
+        } else if (strcmp(argv[i], "--help-extra") == 0) {
+            PrintHelpExtra(argv[0]);
+            noocr = true;
+        } else if ((strcmp(argv[i], "--help-psm") == 0)) {
+            PrintHelpForPSM();
+            noocr = true;
 #ifndef DISABLED_LEGACY_ENGINE
-    } else if ((strcmp(argv[i], "--help-oem") == 0)) {
-      PrintHelpForOEM();
-      noocr = true;
+        } else if ((strcmp(argv[i], "--help-oem") == 0)) {
+            PrintHelpForOEM();
+            noocr = true;
 #endif
-    } else if ((strcmp(argv[i], "-v") == 0) ||
-               (strcmp(argv[i], "--version") == 0)) {
-      PrintVersionInfo();
-      noocr = true;
-    } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
-      *lang = argv[i + 1];
-      ++i;
-    } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
-      *datapath = argv[i + 1];
-      ++i;
-    } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
-      *dpi = atoi(argv[i + 1]);
-      ++i;
-    } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
-      vars_vec->push_back("user_words_file");
-      vars_values->push_back(argv[i + 1]);
-      ++i;
-    } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
-      vars_vec->push_back("user_patterns_file");
-      vars_values->push_back(argv[i + 1]);
-      ++i;
-    } else if (strcmp(argv[i], "--list-langs") == 0) {
-      noocr = true;
-      *list_langs = true;
-    } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
-      checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
-      *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
-      ++i;
-    } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
+        } else if ((strcmp(argv[i], "-v") == 0) ||
+                   (strcmp(argv[i], "--version") == 0)) {
+            PrintVersionInfo();
+            noocr = true;
+        } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
+            *lang = argv[i + 1];
+            ++i;
+        } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
+            *datapath = argv[i + 1];
+            ++i;
+        } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
+            *dpi = atoi(argv[i + 1]);
+            ++i;
+        } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
+            vars_vec->push_back("user_words_file");
+            vars_values->push_back(argv[i + 1]);
+            ++i;
+        } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
+            vars_vec->push_back("user_patterns_file");
+            vars_values->push_back(argv[i + 1]);
+            ++i;
+        } else if (strcmp(argv[i], "--list-langs") == 0) {
+            noocr = true;
+            *list_langs = true;
+        } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
+            checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
+            *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
+            ++i;
+        } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
 #ifndef DISABLED_LEGACY_ENGINE
-      int oem = atoi(argv[i + 1]);
-      checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
-      *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
+            int oem = atoi(argv[i + 1]);
+            checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
+            *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
 #endif
-      ++i;
-    } else if (strcmp(argv[i], "--print-parameters") == 0) {
-      noocr = true;
-      *print_parameters = true;
-    } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
-      // handled properly after api init
-      ++i;
-    } else if (*image == nullptr) {
-      *image = argv[i];
-    } else {
-      // Unexpected argument.
-      fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
-      exit(EXIT_FAILURE);
+            ++i;
+        } else if (strcmp(argv[i], "--print-parameters") == 0) {
+            noocr = true;
+            *print_parameters = true;
+        } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
+            // handled properly after api init
+            ++i;
+        } else if (*image == nullptr) {
+            *image = argv[i];
+        } else {
+            // Unexpected argument.
+            fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
+            exit(EXIT_FAILURE);
+        }
     }
-  }
-
-  *arg_i = i;
 
-  if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
-    // OSD = orientation and script detection.
-    if (*lang != nullptr && strcmp(*lang, "osd")) {
-      // If the user explicitly specifies a language (other than osd)
-      // or a script, only orientation can be detected.
-      fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
-    } else {
-      // That mode requires osd.traineddata to detect orientation and script.
-      *lang = "osd";
+    *arg_i = i;
+
+    if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
+        // OSD = orientation and script detection.
+        if (*lang != nullptr && strcmp(*lang, "osd")) {
+            // If the user explicitly specifies a language (other than osd)
+            // or a script, only orientation can be detected.
+            fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
+        } else {
+            // That mode requires osd.traineddata to detect orientation and script.
+            *lang = "osd";
+        }
     }
-  }
 
-  if (*outputbase == nullptr && noocr == false) {
-    PrintHelpMessage(argv[0]);
-    exit(EXIT_FAILURE);
-  }
+    if (*outputbase == nullptr && noocr == false) {
+        PrintHelpMessage(argv[0]);
+        exit(EXIT_FAILURE);
+    }
 }
 
 static void PreloadRenderers(
-    tesseract::TessBaseAPI* api,
-    tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
-    tesseract::PageSegMode pagesegmode, const char* outputbase) {
-  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
+        tesseract::TessBaseAPI* api,
+        tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
+        tesseract::PageSegMode pagesegmode, const char* outputbase) {
+    if (pagesegmode == tesseract::PSM_OSD_ONLY) {
 #ifndef DISABLED_LEGACY_ENGINE
-    renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
+        renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
 #endif  // ndef DISABLED_LEGACY_ENGINE
-  } else {
-    bool b;
-    api->GetBoolVariable("tessedit_create_hocr", &b);
-    if (b) {
-      bool font_info;
-      api->GetBoolVariable("hocr_font_info", &font_info);
-      tesseract::TessHOcrRenderer* renderer =
-          new tesseract::TessHOcrRenderer(outputbase, font_info);
-      if (renderer->happy()) {
-        renderers->push_back(renderer);
-      } else {
-        delete renderer;
-        tprintf("Error, could not create hOCR output file: %s\n",
-                strerror(errno));
-      }
-    }
+    } else {
+        bool b;
+        api->GetBoolVariable("tessedit_create_hocr", &b);
+        if (b) {
+            bool font_info;
+            api->GetBoolVariable("hocr_font_info", &font_info);
+            tesseract::TessHOcrRenderer* renderer =
+                    new tesseract::TessHOcrRenderer(outputbase, font_info);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create hOCR output file: %s\n",
+                        strerror(errno));
+            }
+        }
 
-    api->GetBoolVariable("tessedit_create_tsv", &b);
-    if (b) {
-      bool font_info;
-      api->GetBoolVariable("hocr_font_info", &font_info);
-      tesseract::TessTsvRenderer* renderer =
-          new tesseract::TessTsvRenderer(outputbase, font_info);
-      if (renderer->happy()) {
-        renderers->push_back(renderer);
-      } else {
-        delete renderer;
-        tprintf("Error, could not create TSV output file: %s\n",
-                strerror(errno));
-      }
-    }
+        api->GetBoolVariable("tessedit_create_alto", &b);
+        if (b) {
+            bool font_info;
+            tesseract::TessAltoRenderer* renderer =
+                    new tesseract::TessAltoRenderer(outputbase);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create ALTO output file: %s\n",
+                        strerror(errno));
+            }
+        }
 
-    api->GetBoolVariable("tessedit_create_pdf", &b);
-    if (b) {
-      #ifdef WIN32
-        if (_setmode(_fileno(stdout), _O_BINARY) == -1)
+        api->GetBoolVariable("tessedit_create_tsv", &b);
+        if (b) {
+            bool font_info;
+            api->GetBoolVariable("hocr_font_info", &font_info);
+            tesseract::TessTsvRenderer* renderer =
+                    new tesseract::TessTsvRenderer(outputbase, font_info);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create TSV output file: %s\n",
+                        strerror(errno));
+            }
+        }
+
+        api->GetBoolVariable("tessedit_create_pdf", &b);
+        if (b) {
+#ifdef WIN32
+            if (_setmode(_fileno(stdout), _O_BINARY) == -1)
           tprintf("ERROR: cin to binary: %s", strerror(errno));
-      #endif  // WIN32
-      bool textonly;
-      api->GetBoolVariable("textonly_pdf", &textonly);
-      tesseract::TessPDFRenderer* renderer =
-        new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(),
-                                       textonly);
-      if (renderer->happy()) {
-        renderers->push_back(renderer);
-      } else {
-        delete renderer;
-        tprintf("Error, could not create PDF output file: %s\n",
-                strerror(errno));
-      }
-    }
+#endif  // WIN32
+            bool textonly;
+            api->GetBoolVariable("textonly_pdf", &textonly);
+            tesseract::TessPDFRenderer* renderer =
+                    new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(),
+                                                   textonly);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create PDF output file: %s\n",
+                        strerror(errno));
+            }
+        }
 
-    api->GetBoolVariable("tessedit_write_unlv", &b);
-    if (b) {
-      api->SetVariable("unlv_tilde_crunching", "true");
-      tesseract::TessUnlvRenderer* renderer =
-        new tesseract::TessUnlvRenderer(outputbase);
-      if (renderer->happy()) {
-        renderers->push_back(renderer);
-      } else {
-        delete renderer;
-        tprintf("Error, could not create UNLV output file: %s\n",
-                strerror(errno));
-      }
-    }
+        api->GetBoolVariable("tessedit_write_unlv", &b);
+        if (b) {
+            api->SetVariable("unlv_tilde_crunching", "true");
+            tesseract::TessUnlvRenderer* renderer =
+                    new tesseract::TessUnlvRenderer(outputbase);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create UNLV output file: %s\n",
+                        strerror(errno));
+            }
+        }
 
-    api->GetBoolVariable("tessedit_create_boxfile", &b);
-    if (b) {
-      tesseract::TessBoxTextRenderer* renderer =
-        new tesseract::TessBoxTextRenderer(outputbase);
-      if (renderer->happy()) {
-        renderers->push_back(renderer);
-      } else {
-        delete renderer;
-        tprintf("Error, could not create BOX output file: %s\n",
-                strerror(errno));
-      }
-    }
+        api->GetBoolVariable("tessedit_create_boxfile", &b);
+        if (b) {
+            tesseract::TessBoxTextRenderer* renderer =
+                    new tesseract::TessBoxTextRenderer(outputbase);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create BOX output file: %s\n",
+                        strerror(errno));
+            }
+        }
 
-    api->GetBoolVariable("tessedit_create_txt", &b);
-    if (b || renderers->empty()) {
-      tesseract::TessTextRenderer* renderer =
-        new tesseract::TessTextRenderer(outputbase);
-      if (renderer->happy()) {
-        renderers->push_back(renderer);
-      } else {
-        delete renderer;
-        tprintf("Error, could not create TXT output file: %s\n",
-                strerror(errno));
-      }
+        api->GetBoolVariable("tessedit_create_txt", &b);
+        if (b || renderers->empty()) {
+            tesseract::TessTextRenderer* renderer =
+                    new tesseract::TessTextRenderer(outputbase);
+            if (renderer->happy()) {
+                renderers->push_back(renderer);
+            } else {
+                delete renderer;
+                tprintf("Error, could not create TXT output file: %s\n",
+                        strerror(errno));
+            }
+        }
     }
-  }
 
-  if (!renderers->empty()) {
-    // Since the PointerVector auto-deletes, null-out the renderers that are
-    // added to the root, and leave the root in the vector.
-    for (int r = 1; r < renderers->size(); ++r) {
-      (*renderers)[0]->insert((*renderers)[r]);
-      (*renderers)[r] = nullptr;
+    if (!renderers->empty()) {
+        // Since the PointerVector auto-deletes, null-out the renderers that are
+        // added to the root, and leave the root in the vector.
+        for (int r = 1; r < renderers->size(); ++r) {
+            (*renderers)[0]->insert((*renderers)[r]);
+            (*renderers)[r] = nullptr;
+        }
     }
-  }
 }
 
 
@@ -512,135 +526,135 @@ static void PreloadRenderers(
  **********************************************************************/
 
 int main(int argc, char** argv) {
-  const char* lang = nullptr;
-  const char* image = nullptr;
-  const char* outputbase = nullptr;
-  const char* datapath = nullptr;
-  bool list_langs = false;
-  bool print_parameters = false;
-  l_int32 dpi = 0;
-  int arg_i = 1;
-  tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
+    const char* lang = nullptr;
+    const char* image = nullptr;
+    const char* outputbase = nullptr;
+    const char* datapath = nullptr;
+    bool list_langs = false;
+    bool print_parameters = false;
+    l_int32 dpi = 0;
+    int arg_i = 1;
+    tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
 #ifdef DISABLED_LEGACY_ENGINE
-  auto enginemode = tesseract::OEM_LSTM_ONLY;
+    auto enginemode = tesseract::OEM_LSTM_ONLY;
 #else
-  tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
+    tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
 #endif
-  /* main() calls functions like ParseArgs which call exit().
-   * This results in memory leaks if vars_vec and vars_values are
-   * declared as auto variables (destructor is not called then). */
-  static GenericVector<STRING> vars_vec;
-  static GenericVector<STRING> vars_values;
+    /* main() calls functions like ParseArgs which call exit().
+     * This results in memory leaks if vars_vec and vars_values are
+     * declared as auto variables (destructor is not called then). */
+    static GenericVector<STRING> vars_vec;
+    static GenericVector<STRING> vars_values;
 
 #if !defined(DEBUG)
-  // Disable debugging and informational messages from Leptonica.
-  setMsgSeverity(L_SEVERITY_ERROR);
+    // Disable debugging and informational messages from Leptonica.
+    setMsgSeverity(L_SEVERITY_ERROR);
 #endif
 
 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
-  /* Show libtiff errors and warnings on console (not in GUI). */
+    /* Show libtiff errors and warnings on console (not in GUI). */
   TIFFSetErrorHandler(Win32ErrorHandler);
   TIFFSetWarningHandler(Win32WarningHandler);
 #endif // HAVE_TIFFIO_H && _WIN32
 
-  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
-            &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
-            &pagesegmode, &enginemode);
-
-  if (lang == nullptr) {
-    // Set default language if none was given.
-    lang = "eng";
-  }
-
-  if (image == nullptr && !list_langs && !print_parameters)
-    return EXIT_SUCCESS;
+    ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
+              &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
+              &pagesegmode, &enginemode);
 
-  PERF_COUNT_START("Tesseract:main")
+    if (lang == nullptr) {
+        // Set default language if none was given.
+        lang = "eng";
+    }
 
-  // Call GlobalDawgCache here to create the global DawgCache object before
-  // the TessBaseAPI object. This fixes the order of destructor calls:
-  // first TessBaseAPI must be destructed, DawgCache must be the last object.
-  tesseract::Dict::GlobalDawgCache();
+    if (image == nullptr && !list_langs && !print_parameters)
+        return EXIT_SUCCESS;
 
-  // Avoid memory leak caused by auto variable when return is called.
-  static tesseract::TessBaseAPI api;
+    PERF_COUNT_START("Tesseract:main")
 
-  api.SetOutputName(outputbase);
+    // Call GlobalDawgCache here to create the global DawgCache object before
+    // the TessBaseAPI object. This fixes the order of destructor calls:
+    // first TessBaseAPI must be destructed, DawgCache must be the last object.
+    tesseract::Dict::GlobalDawgCache();
 
-  const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
-                             argc - arg_i, &vars_vec, &vars_values, false);
+    // Avoid memory leak caused by auto variable when return is called.
+    static tesseract::TessBaseAPI api;
 
-  SetVariablesFromCLArgs(&api, argc, argv);
+    api.SetOutputName(outputbase);
 
-  if (list_langs) {
-    PrintLangsList(&api);
-    return EXIT_SUCCESS;
-  }
+    const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
+                                     argc - arg_i, &vars_vec, &vars_values, false);
 
-  if (init_failed) {
-    fprintf(stderr, "Could not initialize tesseract.\n");
-    return EXIT_FAILURE;
-  }
+    SetVariablesFromCLArgs(&api, argc, argv);
 
-  if (print_parameters) {
-    FILE* fout = stdout;
-    fprintf(stdout, "Tesseract parameters:\n");
-    api.PrintVariables(fout);
-    api.End();
-    return EXIT_SUCCESS;
-  }
+    if (list_langs) {
+        PrintLangsList(&api);
+        return EXIT_SUCCESS;
+    }
 
-  FixPageSegMode(&api, pagesegmode);
+    if (init_failed) {
+        fprintf(stderr, "Could not initialize tesseract.\n");
+        return EXIT_FAILURE;
+    }
 
-  if (dpi) {
-    char dpi_string[255];
-    snprintf(dpi_string, 254, "%d", dpi);
-    api.SetVariable("user_defined_dpi", dpi_string);
-  }
+    if (print_parameters) {
+        FILE* fout = stdout;
+        fprintf(stdout, "Tesseract parameters:\n");
+        api.PrintVariables(fout);
+        api.End();
+        return EXIT_SUCCESS;
+    }
 
-  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
-    int ret_val = EXIT_SUCCESS;
+    FixPageSegMode(&api, pagesegmode);
 
-    Pix* pixs = pixRead(image);
-    if (!pixs) {
-      fprintf(stderr, "Leptonica can't process input file: %s\n", image);
-      return 2;
+    if (dpi) {
+        char dpi_string[255];
+        snprintf(dpi_string, 254, "%d", dpi);
+        api.SetVariable("user_defined_dpi", dpi_string);
     }
 
-    api.SetImage(pixs);
+    if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
+        int ret_val = EXIT_SUCCESS;
 
-    tesseract::Orientation orientation;
-    tesseract::WritingDirection direction;
-    tesseract::TextlineOrder order;
-    float deskew_angle;
+        Pix* pixs = pixRead(image);
+        if (!pixs) {
+            fprintf(stderr, "Leptonica can't process input file: %s\n", image);
+            return 2;
+        }
 
-    const tesseract::PageIterator* it = api.AnalyseLayout();
-    if (it) {
-      it->Orientation(&orientation, &direction, &order, &deskew_angle);
-      tprintf(
-          "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
-          "Deskew angle: %.4f\n",
-          orientation, direction, order, deskew_angle);
-    } else {
-      ret_val = EXIT_FAILURE;
-    }
+        api.SetImage(pixs);
+
+        tesseract::Orientation orientation;
+        tesseract::WritingDirection direction;
+        tesseract::TextlineOrder order;
+        float deskew_angle;
+
+        const tesseract::PageIterator* it = api.AnalyseLayout();
+        if (it) {
+            it->Orientation(&orientation, &direction, &order, &deskew_angle);
+            tprintf(
+                    "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
+                    "Deskew angle: %.4f\n",
+                    orientation, direction, order, deskew_angle);
+        } else {
+            ret_val = EXIT_FAILURE;
+        }
 
-    delete it;
+        delete it;
 
-    pixDestroy(&pixs);
-    return ret_val;
-  }
+        pixDestroy(&pixs);
+        return ret_val;
+    }
 
-  // set in_training_mode to true when using one of these configs:
-  // ambigs.train, box.train, box.train.stderr, linebox, rebox
-  bool b = false;
-  bool in_training_mode =
-      (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
-      (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
-      (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
+    // set in_training_mode to true when using one of these configs:
+    // ambigs.train, box.train, box.train.stderr, linebox, rebox
+    bool b = false;
+    bool in_training_mode =
+            (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
+            (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
+            (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
 
 #ifdef DISABLED_LEGACY_ENGINE
-  auto cur_psm = api.GetPageSegMode();
+    auto cur_psm = api.GetPageSegMode();
   auto osd_warning = std::string("");
   if (cur_psm == tesseract::PSM_OSD_ONLY) {
     const char* disabled_osd_msg =
@@ -660,37 +674,37 @@ int main(int argc, char** argv) {
   }
 #endif  // def DISABLED_LEGACY_ENGINE
 
-  // Avoid memory leak caused by auto variable when exit() is called.
-  static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
+    // Avoid memory leak caused by auto variable when exit() is called.
+    static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
 
-  if (in_training_mode) {
-    renderers.push_back(nullptr);
-  } else {
-    PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
-  }
+    if (in_training_mode) {
+        renderers.push_back(nullptr);
+    } else {
+        PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
+    }
 
-  bool banner = false;
-  if (outputbase != nullptr && strcmp(outputbase, "-") &&
-      strcmp(outputbase, "stdout")) {
-    banner = true;
-  }
+    bool banner = false;
+    if (outputbase != nullptr && strcmp(outputbase, "-") &&
+        strcmp(outputbase, "stdout")) {
+        banner = true;
+    }
 
-  if (!renderers.empty()) {
-    if (banner) PrintBanner();
+    if (!renderers.empty()) {
+        if (banner) PrintBanner();
 #ifdef DISABLED_LEGACY_ENGINE
-    if (!osd_warning.empty()) {
+        if (!osd_warning.empty()) {
       fprintf(stderr, "%s",osd_warning.c_str());
     }
 #endif
-    bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]);
-    if (!succeed) {
-      fprintf(stderr, "Error during processing.\n");
-      return EXIT_FAILURE;
+        bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]);
+        if (!succeed) {
+            fprintf(stderr, "Error during processing.\n");
+            return EXIT_FAILURE;
+        }
     }
-  }
 
-  PERF_COUNT_END
+    PERF_COUNT_END
 
-  return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
 
diff --git a/tessdata/configs/alto b/tessdata/configs/alto
new file mode 100644
index 0000000000..0dd12a7a70
--- /dev/null
+++ b/tessdata/configs/alto
@@ -0,0 +1 @@
+tessedit_create_alto 1

From 38b705c660a28c9128b2b77ffa67729b12c07d41 Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Mon, 19 Nov 2018 22:12:11 -0500
Subject: [PATCH 2/9] Separate ALTO functionality into altorenderer.cpp

---
 CMakeLists.txt                |   3 +-
 android/jni/Android.mk        |   1 +
 src/api/Makefile.am           |   2 +-
 src/api/altorenderer.cpp      | 254 ++++++++++++++++++++++++++++++++++
 src/api/baseapi.cpp           | 165 ----------------------
 src/api/renderer.cpp          |  48 -------
 src/api/renderer.h            |   1 +
 src/api/tesseractmain.cpp     |   1 -
 src/ccmain/tesseractclass.cpp |   2 +
 src/ccmain/tesseractclass.h   |   1 +
 10 files changed, 262 insertions(+), 216 deletions(-)
 create mode 100644 src/api/altorenderer.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2cf2cecc6e..3076140edd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -215,6 +215,7 @@ set(tesseract_src ${tesseract_src}
     src/api/capi.cpp
     src/api/renderer.cpp
     src/api/pdfrenderer.cpp
+    src/api/altorenderer.cpp
 )
 
 if (WIN32)
@@ -223,7 +224,7 @@ if (WIN32)
         set(tesseract_hdr
             ${tesseract_hdr}
             ${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h)
-        set(tesseract_rsc ${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/libtesseract.rc)
+        set(tesseract_rsc ${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/libtesseract.rc src/api/altorenderer.cpp)
         set_source_files_properties(
             ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
             PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index 170bb51344..225a2f4c97 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -31,6 +31,7 @@ LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/../../api/*.cpp $(LOCAL_PATH)/../../
 
 EXPLICIT_SRC_EXCLUDES := \
   $(LOCAL_PATH)/../../api/pdfrenderer.cpp \
+  $(LOCAL_PATH)/../../api/altorenderer.cpp \
   $(LOCAL_PATH)/../../api/tesseractmain.cpp \
 
 LOCAL_SRC_FILES := $(filter-out $(EXPLICIT_SRC_EXCLUDES), $(LOCAL_SRC_FILES))
diff --git a/src/api/Makefile.am b/src/api/Makefile.am
index bdecc6a77f..3fdf17fb19 100644
--- a/src/api/Makefile.am
+++ b/src/api/Makefile.am
@@ -32,7 +32,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS)
 if VISIBILITY
 libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS
 endif
-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp
+libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp altorenderer.cpp
 
 lib_LTLIBRARIES += libtesseract.la
 libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS)
diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
new file mode 100644
index 0000000000..2b4755d046
--- /dev/null
+++ b/src/api/altorenderer.cpp
@@ -0,0 +1,254 @@
+///////////////////////////////////////////////////////////////////////
+// File:        altorenderer.cpp
+// Description: ALTO rendering interface
+//
+// (C) Copyright 2018
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "baseapi.h"
+#include "renderer.h"
+
+namespace tesseract {
+
+/**********************************************************************
+ * Alto Text Renderer interface implementation
+ **********************************************************************/
+    TessAltoRenderer::TessAltoRenderer(const char *outputbase)
+            : TessResultRenderer(outputbase, "xml") {
+    }
+
+    /**
+    * Append the ALTO XML for the beginning of the document
+    */
+    bool TessAltoRenderer::BeginDocumentHandler() {
+        AppendString(
+                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+                "<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd\">\n"
+                "\t<Description>\n"
+                "\t\t<MeasurementUnit>pixel</MeasurementUnit>\n"
+                "\t\t<sourceImageInformation>\n"
+                "\t\t\t<fileName>");
+
+        AppendString(title());
+
+        AppendString("\t\t\t</fileName>\n"
+                     "\t\t</sourceImageInformation>\n"
+                     "\t\t<OCRProcessing ID=\"OCR_0\">\n"
+                     "\t\t\t<ocrProcessingStep>\n"
+                     "\t\t\t\t<processingSoftware>\n"
+                     "\t\t\t\t\t<softwareName>tesseract 4.0.0</softwareName>\n"
+                     "\t\t\t\t</processingSoftware>\n"
+                     "\t\t\t</ocrProcessingStep>\n"
+                     "\t\t</OCRProcessing>\n"
+                     "\t</Description>\n"
+                     "\t<Layout>\n");
+
+        return true;
+    }
+
+    /**
+    * Append the ALTO XML for the end of the document
+    */
+    bool TessAltoRenderer::EndDocumentHandler() {
+        AppendString("\t</Layout>\n</alto>\n");
+
+        return true;
+    }
+
+    /**
+    * Append the ALTO XML for the layout of the image
+    */
+    bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
+        const std::unique_ptr<const char[]> hocr(api->GetAltoText(imagenum()));
+        if (hocr == nullptr) return false;
+
+        AppendString(hocr.get());
+
+        return true;
+    }
+
+    /**
+    * Add a unique ID to an ALTO element
+    */
+    static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) {
+        const size_t BUFSIZE = 64;
+        char id_buffer[BUFSIZE];
+        snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
+        id_buffer[BUFSIZE - 1] = '\0';
+        *alto_str += " ID=\"";
+        *alto_str += id_buffer;
+        *alto_str += "\"";
+    }
+
+    /**
+    * Add coordinates to specified TextBlock, TextLine, or String bounding box
+    * Add word confidence if adding to a String bounding box
+    */
+    static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
+                             STRING *alto_str) {
+        int left, top, right, bottom;
+        it->BoundingBox(level, &left, &top, &right, &bottom);
+
+        int hpos = left;
+        int vpos = top;
+        int height = bottom - top;
+        int width = right - left;
+
+        *alto_str += " HPOS=\"";
+        alto_str->add_str_int("", hpos);
+        *alto_str += "\"";
+        *alto_str += " VPOS=\"";
+        alto_str->add_str_int("", vpos);
+        *alto_str += "\"";
+        *alto_str += " WIDTH=\"";
+        alto_str->add_str_int("", width);
+        *alto_str += "\"";
+        *alto_str += " HEIGHT=\"";
+        alto_str->add_str_int("", height);
+        *alto_str += "\"";
+
+        if (level == RIL_WORD) {
+            int wc = it->Confidence(RIL_WORD);
+            *alto_str += " WC=\"0.";
+            alto_str->add_str_int("", wc);
+            *alto_str += "\"";
+        }
+        if (level != RIL_WORD) {
+
+            *alto_str += ">";
+        }
+    }
+
+    /**
+     * Make an XML-formatted string with ALTO markup from the internal
+     * data structures.
+     */
+        char *TessBaseAPI::GetAltoText(int page_number) {
+            return GetAltoText(nullptr, page_number);
+        }
+
+    /**
+     * Make an XML-formatted string with ALTO markup from the internal
+     * data structures.
+     */
+        char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
+            if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
+                return nullptr;
+
+            int lcnt = 0, bcnt = 0, wcnt = 0;
+            int page_id = page_number;
+
+            STRING alto_str("");
+
+            if (input_file_ == nullptr)
+                SetInputName(nullptr);
+
+    #ifdef _WIN32
+            // convert input name from ANSI encoding to utf-8
+          int str16_len =
+              MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
+          wchar_t *uni16_str = new WCHAR[str16_len];
+          str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
+                                          uni16_str, str16_len);
+          int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
+                                             nullptr, nullptr);
+          char *utf8_str = new char[utf8_len];
+          WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
+                              utf8_len, nullptr, nullptr);
+          *input_file_ = utf8_str;
+          delete[] uni16_str;
+          delete[] utf8_str;
+    #endif
+
+            alto_str += "\t\t<Page WIDTH=\"";
+            alto_str.add_str_int("", rect_width_);
+            alto_str += "\" HEIGHT=\"";
+            alto_str.add_str_int("", rect_height_);
+            alto_str += "\" PHYSICAL_IMG_NR=\"";
+            alto_str.add_str_int("", rect_height_);
+            alto_str += "\"";
+            AddIdToAlto(&alto_str, "page", page_id);
+            alto_str += ">\n";
+            alto_str += ("\t\t\t<PrintSpace HPOS=\"0\" "
+                         "VPOS=\"0\""
+                         " WIDTH=\"");
+            alto_str.add_str_int("", rect_width_);
+            alto_str += "\" HEIGHT=\"";
+            alto_str.add_str_int("", rect_height_);
+            alto_str += "\">\n";
+
+            ResultIterator *res_it = GetIterator();
+            while (!res_it->Empty(RIL_BLOCK)) {
+                if (res_it->Empty(RIL_WORD)) {
+                    res_it->Next(RIL_WORD);
+                    continue;
+                }
+
+                if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+                    alto_str += "\t\t\t\t<TextBlock ";
+                    AddIdToAlto(&alto_str, "block", bcnt);
+                    AddBoxToAlto(res_it, RIL_BLOCK, &alto_str);
+                    alto_str += "\n";
+                }
+
+                if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+
+                    alto_str += "\t\t\t\t\t<TextLine ";
+                    AddIdToAlto(&alto_str, "line", lcnt);
+                    AddBoxToAlto(res_it, RIL_TEXTLINE, &alto_str);
+                    alto_str += "\n";
+                }
+
+                alto_str += "\t\t\t\t\t\t<String ";
+                AddIdToAlto(&alto_str, "string", wcnt);
+                AddBoxToAlto(res_it, RIL_WORD, &alto_str);
+                alto_str += " CONTENT=\"";
+
+
+                bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
+                bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
+
+                do {
+                    const std::unique_ptr<const char[]> grapheme(
+                            res_it->GetUTF8Text(RIL_SYMBOL));
+                    if (grapheme && grapheme[0] != 0) {
+                        alto_str += HOcrEscape(grapheme.get());
+                    }
+                    res_it->Next(RIL_SYMBOL);
+                } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+
+                alto_str += "\"/>\n";
+
+                wcnt++;
+
+                if (last_word_in_line) {
+                    alto_str += "\t\t\t\t\t</TextLine>\n";
+                    lcnt++;
+                }
+
+                if (last_word_in_block) {
+                    alto_str += "\t\t\t\t</TextBlock>\n";
+                    bcnt++;
+                }
+            }
+
+            alto_str += "\t\t\t</PrintSpace>\n";
+            alto_str += "\t\t</Page>\n";
+
+            char *ret = new char[alto_str.length() + 1];
+            strcpy(ret, alto_str.string());
+            delete res_it;
+            return ret;
+        }
+
+    }
\ No newline at end of file
diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 05f04a443c..52fb563b02 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -1426,16 +1426,6 @@ namespace tesseract {
       *hocr_str += "'";
     }
 
-    static void AddIdToAlto(STRING* alto_str, const std::string base, int num1) {
-      const size_t BUFSIZE = 64;
-      char id_buffer[BUFSIZE];
-      snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
-      id_buffer[BUFSIZE - 1] = '\0';
-      *alto_str += " ID=\"";
-      *alto_str += id_buffer;
-      *alto_str += "\"";
-    }
-
     static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
                              STRING* hocr_str) {
       int left, top, right, bottom;
@@ -1460,41 +1450,6 @@ namespace tesseract {
       *hocr_str += "\">";
     }
 
-    static void AddBoxToAlto(const ResultIterator* it, PageIteratorLevel level,
-                             STRING* alto_str) {
-      int left, top, right, bottom;
-      it->BoundingBox(level, &left, &top, &right, &bottom);
-
-      int hpos = left;
-      int vpos = top;
-      int height = bottom - top;
-      int width = right - left;
-
-      *alto_str += " HPOS=\"";
-      alto_str->add_str_int("", hpos);
-      *alto_str += "\"";
-      *alto_str += " VPOS=\"";
-      alto_str->add_str_int("", vpos);
-      *alto_str += "\"";
-      *alto_str += " WIDTH=\"";
-      alto_str->add_str_int("", width);
-      *alto_str += "\"";
-      *alto_str += " HEIGHT=\"";
-      alto_str->add_str_int("", height);
-      *alto_str += "\"";
-
-      if (level == RIL_WORD) {
-        int wc = it->Confidence(RIL_WORD);
-        *alto_str += " WC=\"0.";
-        alto_str->add_str_int("", wc);
-        *alto_str += "\"";
-      }
-      if (level != RIL_WORD) {
-
-        *alto_str += ">";
-      }
-    }
-
     static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
                             STRING* hocr_str) {
       int left, top, right, bottom;
@@ -1518,14 +1473,6 @@ namespace tesseract {
       return GetHOCRText(nullptr, page_number);
     }
 
-/**
- * Make an XML-formatted string with ALTO markup from the internal
- * data structures.
- */
-    char* TessBaseAPI::GetAltoText(int page_number) {
-      return GetAltoText(nullptr, page_number);
-    }
-
 /**
  * Make a HTML-formatted string with hOCR markup from the internal
  * data structures.
@@ -1752,119 +1699,7 @@ namespace tesseract {
       return ret;
     }
 
-/**
- * Make an XML-formatted string with ALTO markup from the internal
- * data structures.
- */
-    char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
-      if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
-        return nullptr;
-
-      int lcnt = 0, bcnt = 0, wcnt = 0;
-      int page_id = page_number;
-
-      STRING alto_str("");
-
-      if (input_file_ == nullptr)
-        SetInputName(nullptr);
 
-#ifdef _WIN32
-      // convert input name from ANSI encoding to utf-8
-  int str16_len =
-      MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
-  wchar_t *uni16_str = new WCHAR[str16_len];
-  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
-                                  uni16_str, str16_len);
-  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
-                                     nullptr, nullptr);
-  char *utf8_str = new char[utf8_len];
-  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
-                      utf8_len, nullptr, nullptr);
-  *input_file_ = utf8_str;
-  delete[] uni16_str;
-  delete[] utf8_str;
-#endif
-
-      alto_str += "\t\t<Page WIDTH=\"";
-      alto_str.add_str_int("", rect_width_);
-      alto_str += "\" HEIGHT=\"";
-      alto_str.add_str_int("", rect_height_);
-      alto_str += "\" PHYSICAL_IMG_NR=\"";
-      alto_str.add_str_int("", rect_height_);
-      alto_str += "\"";
-      AddIdToAlto(&alto_str, "page", page_id);
-      alto_str += ">\n";
-      alto_str += ("\t\t\t<PrintSpace HPOS=\"0\" "
-                   "VPOS=\"0\""
-                   " WIDTH=\"");
-      alto_str.add_str_int("", rect_width_);
-      alto_str += "\" HEIGHT=\"";
-      alto_str.add_str_int("", rect_height_);
-      alto_str += "\">\n";
-
-      ResultIterator *res_it = GetIterator();
-      while (!res_it->Empty(RIL_BLOCK)) {
-        if (res_it->Empty(RIL_WORD)) {
-          res_it->Next(RIL_WORD);
-          continue;
-        }
-
-        if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-          alto_str += "\t\t\t\t<TextBlock ";
-          AddIdToAlto(&alto_str, "block", bcnt);
-          AddBoxToAlto(res_it, RIL_BLOCK, &alto_str);
-          alto_str += "\n";
-        }
-
-        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-
-          alto_str += "\t\t\t\t\t<TextLine ";
-          AddIdToAlto(&alto_str, "line", lcnt);
-          AddBoxToAlto(res_it, RIL_TEXTLINE, &alto_str);
-          alto_str += "\n";
-        }
-
-        alto_str += "\t\t\t\t\t\t<String ";
-        AddIdToAlto(&alto_str, "string", wcnt);
-        AddBoxToAlto(res_it, RIL_WORD, &alto_str);
-        alto_str += " CONTENT=\"";
-
-
-        bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
-        bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
-
-        do {
-          const std::unique_ptr<const char[]> grapheme(
-                  res_it->GetUTF8Text(RIL_SYMBOL));
-          if (grapheme && grapheme[0] != 0) {
-            alto_str += HOcrEscape(grapheme.get());
-          }
-          res_it->Next(RIL_SYMBOL);
-        } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-
-        alto_str += "\"/>\n";
-
-        wcnt++;
-
-        if (last_word_in_line) {
-          alto_str += "\t\t\t\t\t</TextLine>\n";
-          lcnt++;
-        }
-
-        if (last_word_in_block) {
-          alto_str += "\t\t\t\t</TextBlock>\n";
-          bcnt++;
-        }
-      }
-
-      alto_str += "\t\t\t</PrintSpace>\n";
-      alto_str += "\t\t</Page>\n";
-
-      char *ret = new char[alto_str.length() + 1];
-      strcpy(ret, alto_str.string());
-      delete res_it;
-      return ret;
-    }
 
 /**
  * Make a TSV-formatted string from the internal data structures.
diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp
index 277bc47389..e2cf91003e 100644
--- a/src/api/renderer.cpp
+++ b/src/api/renderer.cpp
@@ -193,54 +193,6 @@ namespace tesseract {
       return true;
     }
 
-/**********************************************************************
- * Alto Text Renderer interface implementation
- **********************************************************************/
-    TessAltoRenderer::TessAltoRenderer(const char *outputbase)
-            : TessResultRenderer(outputbase, "alto") {
-    }
-
-    bool TessAltoRenderer::BeginDocumentHandler() {
-      AppendString(
-              "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-              "<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd\">\n"
-              "\t<Description>\n"
-              "\t\t<MeasurementUnit>pixel</MeasurementUnit>\n"
-              "\t\t<sourceImageInformation>\n"
-              "\t\t\t<fileName>");
-
-      AppendString(title());
-
-      AppendString("\t\t\t</fileName>\n"
-                   "\t\t</sourceImageInformation>\n"
-                   "\t\t<OCRProcessing ID=\"OCR_0\">\n"
-                   "\t\t\t<ocrProcessingStep>\n"
-                   "\t\t\t\t<processingSoftware>\n"
-                   "\t\t\t\t\t<softwareName>tesseract 4.0.0</softwareName>\n"
-                   "\t\t\t\t</processingSoftware>\n"
-                   "\t\t\t</ocrProcessingStep>\n"
-                   "\t\t</OCRProcessing>\n"
-                   "\t</Description>\n"
-                   "\t<Layout>\n");
-
-      return true;
-    }
-
-    bool TessAltoRenderer::EndDocumentHandler() {
-      AppendString("\t</Layout>\n</alto>\n");
-
-      return true;
-    }
-
-    bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) {
-      const std::unique_ptr<const char[]> hocr(api->GetAltoText(imagenum()));
-      if (hocr == nullptr) return false;
-
-      AppendString(hocr.get());
-
-      return true;
-    }
-
 /**********************************************************************
  * TSV Text Renderer interface implementation
  **********************************************************************/
diff --git a/src/api/renderer.h b/src/api/renderer.h
index cb91f3e005..f2313c31fd 100644
--- a/src/api/renderer.h
+++ b/src/api/renderer.h
@@ -180,6 +180,7 @@ namespace tesseract {
 
     };
 
+
 /**
  * Renders Tesseract output into a TSV string
  */
diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp
index a2b1c4c185..8ec5e7b75f 100644
--- a/src/api/tesseractmain.cpp
+++ b/src/api/tesseractmain.cpp
@@ -421,7 +421,6 @@ static void PreloadRenderers(
 
         api->GetBoolVariable("tessedit_create_alto", &b);
         if (b) {
-            bool font_info;
             tesseract::TessAltoRenderer* renderer =
                     new tesseract::TessAltoRenderer(outputbase);
             if (renderer->happy()) {
diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp
index c6aa833723..997e6a4258 100644
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@@ -387,6 +387,8 @@ Tesseract::Tesseract()
                   this->params()),
       BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file",
                   this->params()),
+      BOOL_MEMBER(tessedit_create_alto, false, "Write .xml ALTO file",
+                  this->params()),
       BOOL_MEMBER(tessedit_create_tsv, false, "Write .tsv output file",
                   this->params()),
       BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file",
diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h
index 8b821f86ac..61ee727616 100644
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@@ -1037,6 +1037,7 @@ class Tesseract : public Wordrec {
   BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
   BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file");
   BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
+  BOOL_VAR_H(tessedit_create_alto, false, "Write .xml ALTO output file");
   BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file");
   BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
   BOOL_VAR_H(textonly_pdf, false,

From 3ba5a09574f21bfd58105d18b607d85c8e8748ac Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Tue, 20 Nov 2018 18:14:22 -0500
Subject: [PATCH 3/9] Clean up formatting

---
 src/api/altorenderer.cpp |   33 +-
 src/api/baseapi.cpp      | 3542 +++++++++++++++++++-------------------
 src/api/baseapi.h        |   27 +-
 src/api/capi.cpp         |    5 -
 src/api/capi.h           |    4 +-
 src/api/renderer.cpp     |  220 +--
 src/api/renderer.h       |    1 -
 7 files changed, 1913 insertions(+), 1919 deletions(-)

diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
index 2b4755d046..a654ad818d 100644
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@@ -1,19 +1,20 @@
-///////////////////////////////////////////////////////////////////////
-// File:        altorenderer.cpp
-// Description: ALTO rendering interface
-//
-// (C) Copyright 2018
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
+/**********************************************************************
+ * File:        altorenderer.cpp
+ * Description: ALTO rendering interface
+ * Author:      Jake Sebright
+ *
+ * (C) Copyright 2018
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
 
 #include "baseapi.h"
 #include "renderer.h"
diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 52fb563b02..5a7945154d 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -118,10 +118,10 @@ namespace tesseract {
     static void addAvailableLanguages(const STRING &datadir, const STRING &base,
                                       GenericVector<STRING>* langs)
     {
-      const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
-      const size_t extlen = sizeof(kTrainedDataSuffix);
+        const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
+        const size_t extlen = sizeof(kTrainedDataSuffix);
 #ifdef _WIN32
-      WIN32_FIND_DATA data;
+        WIN32_FIND_DATA data;
     HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data);
     if (handle != INVALID_HANDLE_VALUE) {
       BOOL result = TRUE;
@@ -146,37 +146,37 @@ namespace tesseract {
       FindClose(handle);
     }
 #else  // _WIN32
-      DIR* dir = opendir((datadir + base).string());
-      if (dir != nullptr) {
-        dirent *de;
-        while ((de = readdir(dir))) {
-          char *name = de->d_name;
-          // Skip '.', '..', and hidden files
-          if (name[0] != '.') {
-            struct stat st;
-            if (stat((datadir + base2 + name).string(), &st) == 0 &&
-                (st.st_mode & S_IFDIR) == S_IFDIR) {
-              addAvailableLanguages(datadir, base2 + name, langs);
-            } else {
-              size_t len = strlen(name);
-              if (len > extlen && name[len - extlen] == '.' &&
-                  strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
-                name[len - extlen] = '\0';
-                langs->push_back(base2 + name);
-              }
+        DIR* dir = opendir((datadir + base).string());
+        if (dir != nullptr) {
+            dirent *de;
+            while ((de = readdir(dir))) {
+                char *name = de->d_name;
+                // Skip '.', '..', and hidden files
+                if (name[0] != '.') {
+                    struct stat st;
+                    if (stat((datadir + base2 + name).string(), &st) == 0 &&
+                        (st.st_mode & S_IFDIR) == S_IFDIR) {
+                        addAvailableLanguages(datadir, base2 + name, langs);
+                    } else {
+                        size_t len = strlen(name);
+                        if (len > extlen && name[len - extlen] == '.' &&
+                            strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
+                            name[len - extlen] = '\0';
+                            langs->push_back(base2 + name);
+                        }
+                    }
+                }
             }
-          }
+            closedir(dir);
         }
-        closedir(dir);
-      }
 #endif
     }
 
 // Compare two STRING values (used for sorting).
     static int CompareSTRING(const void* p1, const void* p2) {
-      const STRING* s1 = static_cast<const STRING*>(p1);
-      const STRING* s2 = static_cast<const STRING*>(p2);
-      return strcmp(s1->c_str(), s2->c_str());
+        const STRING* s1 = static_cast<const STRING*>(p1);
+        const STRING* s2 = static_cast<const STRING*>(p2);
+        return strcmp(s1->c_str(), s2->c_str());
     }
 
     TessBaseAPI::TessBaseAPI()
@@ -204,24 +204,24 @@ namespace tesseract {
               rect_height_(0),
               image_width_(0),
               image_height_(0) {
-      const char *locale;
-      locale = std::setlocale(LC_ALL, nullptr);
-      ASSERT_HOST(!strcmp(locale, "C"));
-      locale = std::setlocale(LC_CTYPE, nullptr);
-      ASSERT_HOST(!strcmp(locale, "C"));
-      locale = std::setlocale(LC_NUMERIC, nullptr);
-      ASSERT_HOST(!strcmp(locale, "C"));
+        const char *locale;
+        locale = std::setlocale(LC_ALL, nullptr);
+        ASSERT_HOST(!strcmp(locale, "C"));
+        locale = std::setlocale(LC_CTYPE, nullptr);
+        ASSERT_HOST(!strcmp(locale, "C"));
+        locale = std::setlocale(LC_NUMERIC, nullptr);
+        ASSERT_HOST(!strcmp(locale, "C"));
     }
 
     TessBaseAPI::~TessBaseAPI() {
-      End();
+        End();
     }
 
 /**
  * Returns the version identifier as a static string. Do not delete.
  */
     const char* TessBaseAPI::Version() {
-      return PACKAGE_VERSION;
+        return PACKAGE_VERSION;
     }
 
 /**
@@ -238,7 +238,7 @@ namespace tesseract {
 #endif
     size_t TessBaseAPI::getOpenCLDevice(void **data) {
 #ifdef USE_OPENCL
-      #ifdef USE_DEVICE_SELECTION
+        #ifdef USE_DEVICE_SELECTION
   ds_device device = OpenclDevice::getDeviceSelection();
   if (device.type == DS_DEVICE_OPENCL_DEVICE) {
     *data = new cl_device_id;
@@ -248,8 +248,8 @@ namespace tesseract {
 #endif
 #endif
 
-      *data = nullptr;
-      return 0;
+        *data = nullptr;
+        return 0;
     }
 
 /**
@@ -258,7 +258,7 @@ namespace tesseract {
  */
     void TessBaseAPI::CatchSignals() {
 #ifdef __linux__
-      struct sigaction action;
+        struct sigaction action;
   memset(&action, 0, sizeof(action));
   action.sa_handler = &signal_exit;
   action.sa_flags = SA_RESETHAND;
@@ -266,8 +266,8 @@ namespace tesseract {
   sigaction(SIGFPE, &action, nullptr);
   sigaction(SIGBUS, &action, nullptr);
 #else
-      // Warn API users that an implementation is needed.
-      tprintf("CatchSignals has no non-linux implementation!\n");
+        // Warn API users that an implementation is needed.
+        tprintf("CatchSignals has no non-linux implementation!\n");
 #endif
     }
 
@@ -276,70 +276,70 @@ namespace tesseract {
  * loading a UNLV zone file.
  */
     void TessBaseAPI::SetInputName(const char* name) {
-      if (input_file_ == nullptr)
-        input_file_ = new STRING(name);
-      else
-        *input_file_ = name;
+        if (input_file_ == nullptr)
+            input_file_ = new STRING(name);
+        else
+            *input_file_ = name;
     }
 
 /** Set the name of the output files. Needed only for debugging. */
     void TessBaseAPI::SetOutputName(const char* name) {
-      if (output_file_ == nullptr)
-        output_file_ = new STRING(name);
-      else
-        *output_file_ = name;
+        if (output_file_ == nullptr)
+            output_file_ = new STRING(name);
+        else
+            *output_file_ = name;
     }
 
     bool TessBaseAPI::SetVariable(const char* name, const char* value) {
-      if (tesseract_ == nullptr) tesseract_ = new Tesseract;
-      return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
-                                  tesseract_->params());
+        if (tesseract_ == nullptr) tesseract_ = new Tesseract;
+        return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
+                                    tesseract_->params());
     }
 
     bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
-      if (tesseract_ == nullptr) tesseract_ = new Tesseract;
-      return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
-                                  tesseract_->params());
+        if (tesseract_ == nullptr) tesseract_ = new Tesseract;
+        return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
+                                    tesseract_->params());
     }
 
     bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
-      IntParam *p = ParamUtils::FindParam<IntParam>(
-              name, GlobalParams()->int_params, tesseract_->params()->int_params);
-      if (p == nullptr) return false;
-      *value = (int32_t)(*p);
-      return true;
+        IntParam *p = ParamUtils::FindParam<IntParam>(
+                name, GlobalParams()->int_params, tesseract_->params()->int_params);
+        if (p == nullptr) return false;
+        *value = (int32_t)(*p);
+        return true;
     }
 
     bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
-      BoolParam *p = ParamUtils::FindParam<BoolParam>(
-              name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
-      if (p == nullptr) return false;
-      *value = (BOOL8)(*p);
-      return true;
+        BoolParam *p = ParamUtils::FindParam<BoolParam>(
+                name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
+        if (p == nullptr) return false;
+        *value = (BOOL8)(*p);
+        return true;
     }
 
     const char *TessBaseAPI::GetStringVariable(const char *name) const {
-      StringParam *p = ParamUtils::FindParam<StringParam>(
-              name, GlobalParams()->string_params, tesseract_->params()->string_params);
-      return (p != nullptr) ? p->string() : nullptr;
+        StringParam *p = ParamUtils::FindParam<StringParam>(
+                name, GlobalParams()->string_params, tesseract_->params()->string_params);
+        return (p != nullptr) ? p->string() : nullptr;
     }
 
     bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
-      DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
-              name, GlobalParams()->double_params, tesseract_->params()->double_params);
-      if (p == nullptr) return false;
-      *value = (double)(*p);
-      return true;
+        DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
+                name, GlobalParams()->double_params, tesseract_->params()->double_params);
+        if (p == nullptr) return false;
+        *value = (double)(*p);
+        return true;
     }
 
 /** Get value of named variable as a string, if it exists. */
     bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
-      return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
+        return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
     }
 
 /** Print Tesseract parameters to the given file. */
     void TessBaseAPI::PrintVariables(FILE *fp) const {
-      ParamUtils::PrintParams(fp, tesseract_->params());
+        ParamUtils::PrintParams(fp, tesseract_->params());
     }
 
 /**
@@ -355,8 +355,8 @@ namespace tesseract {
                           const GenericVector<STRING> *vars_vec,
                           const GenericVector<STRING> *vars_values,
                           bool set_only_non_debug_params) {
-      return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
-                  vars_values, set_only_non_debug_params, nullptr);
+        return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
+                    vars_values, set_only_non_debug_params, nullptr);
     }
 
 // In-memory version reads the traineddata file directly from the given
@@ -367,72 +367,72 @@ namespace tesseract {
                           const GenericVector<STRING>* vars_vec,
                           const GenericVector<STRING>* vars_values,
                           bool set_only_non_debug_params, FileReader reader) {
-      PERF_COUNT_START("TessBaseAPI::Init")
-      // Default language is "eng".
-      if (language == nullptr) language = "eng";
-      STRING datapath = data_size == 0 ? data : language;
-      // If the datapath, OcrEngineMode or the language have changed - start again.
-      // Note that the language_ field stores the last requested language that was
-      // initialized successfully, while tesseract_->lang stores the language
-      // actually used. They differ only if the requested language was nullptr, in
-      // which case tesseract_->lang is set to the Tesseract default ("eng").
-      if (tesseract_ != nullptr &&
-          (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
-           last_oem_requested_ != oem ||
-           (*language_ != language && tesseract_->lang != language))) {
-        delete tesseract_;
-        tesseract_ = nullptr;
-      }
-        // PERF_COUNT_SUB("delete tesseract_")
+        PERF_COUNT_START("TessBaseAPI::Init")
+        // Default language is "eng".
+        if (language == nullptr) language = "eng";
+        STRING datapath = data_size == 0 ? data : language;
+        // If the datapath, OcrEngineMode or the language have changed - start again.
+        // Note that the language_ field stores the last requested language that was
+        // initialized successfully, while tesseract_->lang stores the language
+        // actually used. They differ only if the requested language was nullptr, in
+        // which case tesseract_->lang is set to the Tesseract default ("eng").
+        if (tesseract_ != nullptr &&
+            (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
+             last_oem_requested_ != oem ||
+             (*language_ != language && tesseract_->lang != language))) {
+            delete tesseract_;
+            tesseract_ = nullptr;
+        }
+            // PERF_COUNT_SUB("delete tesseract_")
 #ifdef USE_OPENCL
-        OpenclDevice od;
+            OpenclDevice od;
   od.InitEnv();
 #endif
-      PERF_COUNT_SUB("OD::InitEnv()")
-      bool reset_classifier = true;
-      if (tesseract_ == nullptr) {
-        reset_classifier = false;
-        tesseract_ = new Tesseract;
-        if (reader != nullptr) reader_ = reader;
-        TessdataManager mgr(reader_);
-        if (data_size != 0) {
-          mgr.LoadMemBuffer(language, data, data_size);
-        }
-        if (tesseract_->init_tesseract(
-                datapath.string(),
-                output_file_ != nullptr ? output_file_->string() : nullptr,
-                language, oem, configs, configs_size, vars_vec, vars_values,
-                set_only_non_debug_params, &mgr) != 0) {
-          return -1;
+        PERF_COUNT_SUB("OD::InitEnv()")
+        bool reset_classifier = true;
+        if (tesseract_ == nullptr) {
+            reset_classifier = false;
+            tesseract_ = new Tesseract;
+            if (reader != nullptr) reader_ = reader;
+            TessdataManager mgr(reader_);
+            if (data_size != 0) {
+                mgr.LoadMemBuffer(language, data, data_size);
+            }
+            if (tesseract_->init_tesseract(
+                    datapath.string(),
+                    output_file_ != nullptr ? output_file_->string() : nullptr,
+                    language, oem, configs, configs_size, vars_vec, vars_values,
+                    set_only_non_debug_params, &mgr) != 0) {
+                return -1;
+            }
         }
-      }
 
-      PERF_COUNT_SUB("update tesseract_")
-      // Update datapath and language requested for the last valid initialization.
-      if (datapath_ == nullptr)
-        datapath_ = new STRING(datapath);
-      else
-        *datapath_ = datapath;
-      if ((strcmp(datapath_->string(), "") == 0) &&
-          (strcmp(tesseract_->datadir.string(), "") != 0))
-        *datapath_ = tesseract_->datadir;
-
-      if (language_ == nullptr)
-        language_ = new STRING(language);
-      else
-        *language_ = language;
-      last_oem_requested_ = oem;
+        PERF_COUNT_SUB("update tesseract_")
+        // Update datapath and language requested for the last valid initialization.
+        if (datapath_ == nullptr)
+            datapath_ = new STRING(datapath);
+        else
+            *datapath_ = datapath;
+        if ((strcmp(datapath_->string(), "") == 0) &&
+            (strcmp(tesseract_->datadir.string(), "") != 0))
+            *datapath_ = tesseract_->datadir;
+
+        if (language_ == nullptr)
+            language_ = new STRING(language);
+        else
+            *language_ = language;
+        last_oem_requested_ = oem;
 
 #ifndef DISABLED_LEGACY_ENGINE
-      // PERF_COUNT_SUB("update last_oem_requested_")
-      // For same language and datapath, just reset the adaptive classifier.
-      if (reset_classifier) {
-        tesseract_->ResetAdaptiveClassifier();
-        PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
-      }
+        // PERF_COUNT_SUB("update last_oem_requested_")
+        // For same language and datapath, just reset the adaptive classifier.
+        if (reset_classifier) {
+            tesseract_->ResetAdaptiveClassifier();
+            PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
+        }
 #endif  // ndef DISABLED_LEGACY_ENGINE
-      PERF_COUNT_END
-      return 0;
+        PERF_COUNT_END
+        return 0;
     }
 
 /**
@@ -444,8 +444,8 @@ namespace tesseract {
  * The returned string should NOT be deleted.
  */
     const char* TessBaseAPI::GetInitLanguagesAsString() const {
-      return (language_ == nullptr || language_->string() == nullptr) ?
-             "" : language_->string();
+        return (language_ == nullptr || language_->string() == nullptr) ?
+               "" : language_->string();
     }
 
 /**
@@ -455,13 +455,13 @@ namespace tesseract {
  */
     void TessBaseAPI::GetLoadedLanguagesAsVector(
             GenericVector<STRING>* langs) const {
-      langs->clear();
-      if (tesseract_ != nullptr) {
-        langs->push_back(tesseract_->lang);
-        int num_subs = tesseract_->num_sub_langs();
-        for (int i = 0; i < num_subs; ++i)
-          langs->push_back(tesseract_->get_sub_lang(i)->lang);
-      }
+        langs->clear();
+        if (tesseract_ != nullptr) {
+            langs->push_back(tesseract_->lang);
+            int num_subs = tesseract_->num_sub_langs();
+            for (int i = 0; i < num_subs; ++i)
+                langs->push_back(tesseract_->get_sub_lang(i)->lang);
+        }
     }
 
 /**
@@ -469,11 +469,11 @@ namespace tesseract {
  */
     void TessBaseAPI::GetAvailableLanguagesAsVector(
             GenericVector<STRING>* langs) const {
-      langs->clear();
-      if (tesseract_ != nullptr) {
-        addAvailableLanguages(tesseract_->datadir, "", langs);
-        langs->sort(CompareSTRING);
-      }
+        langs->clear();
+        if (tesseract_ != nullptr) {
+            addAvailableLanguages(tesseract_->datadir, "", langs);
+            langs->sort(CompareSTRING);
+        }
     }
 
 //TODO(amit): Adapt to lstm
@@ -485,12 +485,12 @@ namespace tesseract {
  * in a separate API at some future time.
  */
     int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
-      if (tesseract_ == nullptr)
-        tesseract_ = new Tesseract;
-      else
-        ParamUtils::ResetToDefaults(tesseract_->params());
-      TessdataManager mgr;
-      return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
+        if (tesseract_ == nullptr)
+            tesseract_ = new Tesseract;
+        else
+            ParamUtils::ResetToDefaults(tesseract_->params());
+        TessdataManager mgr;
+        return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
@@ -499,12 +499,12 @@ namespace tesseract {
  * AnalysePage. Calls that attempt recognition will generate an error.
  */
     void TessBaseAPI::InitForAnalysePage() {
-      if (tesseract_ == nullptr) {
-        tesseract_ = new Tesseract;
+        if (tesseract_ == nullptr) {
+            tesseract_ = new Tesseract;
 #ifndef DISABLED_LEGACY_ENGINE
-        tesseract_->InitAdaptiveClassifier(nullptr);
+            tesseract_->InitAdaptiveClassifier(nullptr);
 #endif
-      }
+        }
     }
 
 /**
@@ -513,12 +513,12 @@ namespace tesseract {
  * and also accepts a relative or absolute path name.
  */
     void TessBaseAPI::ReadConfigFile(const char* filename) {
-      tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
+        tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
     }
 
 /** Same as above, but only set debug params from the given config file. */
     void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
-      tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
+        tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
     }
 
 /**
@@ -527,17 +527,17 @@ namespace tesseract {
  * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
  */
     void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
-      if (tesseract_ == nullptr)
-        tesseract_ = new Tesseract;
-      tesseract_->tessedit_pageseg_mode.set_value(mode);
+        if (tesseract_ == nullptr)
+            tesseract_ = new Tesseract;
+        tesseract_->tessedit_pageseg_mode.set_value(mode);
     }
 
 /** Return the current page segmentation mode. */
     PageSegMode TessBaseAPI::GetPageSegMode() const {
-      if (tesseract_ == nullptr)
-        return PSM_SINGLE_BLOCK;
-      return static_cast<PageSegMode>(
-              static_cast<int>(tesseract_->tessedit_pageseg_mode));
+        if (tesseract_ == nullptr)
+            return PSM_SINGLE_BLOCK;
+        return static_cast<PageSegMode>(
+                static_cast<int>(tesseract_->tessedit_pageseg_mode));
     }
 
 /**
@@ -558,17 +558,17 @@ namespace tesseract {
                                      int bytes_per_line,
                                      int left, int top,
                                      int width, int height) {
-      if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
-        return nullptr;  // Nothing worth doing.
+        if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
+            return nullptr;  // Nothing worth doing.
 
-      // Since this original api didn't give the exact size of the image,
-      // we have to invent a reasonable value.
-      int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
-      SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
-               bytes_per_pixel, bytes_per_line);
-      SetRectangle(left, top, width, height);
+        // Since this original api didn't give the exact size of the image,
+        // we have to invent a reasonable value.
+        int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
+        SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
+                 bytes_per_pixel, bytes_per_line);
+        SetRectangle(left, top, width, height);
 
-      return GetUTF8Text();
+        return GetUTF8Text();
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -577,10 +577,10 @@ namespace tesseract {
  * adaptive data.
  */
     void TessBaseAPI::ClearAdaptiveClassifier() {
-      if (tesseract_ == nullptr)
-        return;
-      tesseract_->ResetAdaptiveClassifier();
-      tesseract_->ResetDocumentDictionary();
+        if (tesseract_ == nullptr)
+            return;
+        tesseract_->ResetAdaptiveClassifier();
+        tesseract_->ResetDocumentDictionary();
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
@@ -594,18 +594,18 @@ namespace tesseract {
     void TessBaseAPI::SetImage(const unsigned char* imagedata,
                                int width, int height,
                                int bytes_per_pixel, int bytes_per_line) {
-      if (InternalSetImage()) {
-        thresholder_->SetImage(imagedata, width, height,
-                               bytes_per_pixel, bytes_per_line);
-        SetInputImage(thresholder_->GetPixRect());
-      }
+        if (InternalSetImage()) {
+            thresholder_->SetImage(imagedata, width, height,
+                                   bytes_per_pixel, bytes_per_line);
+            SetInputImage(thresholder_->GetPixRect());
+        }
     }
 
     void TessBaseAPI::SetSourceResolution(int ppi) {
-      if (thresholder_)
-        thresholder_->SetSourceYResolution(ppi);
-      else
-        tprintf("Please call SetImage before SetSourceResolution.\n");
+        if (thresholder_)
+            thresholder_->SetSourceYResolution(ppi);
+        else
+            tprintf("Please call SetImage before SetSourceResolution.\n");
     }
 
 /**
@@ -617,17 +617,17 @@ namespace tesseract {
  * and it is therefore more efficient to provide a Pix directly.
  */
     void TessBaseAPI::SetImage(Pix* pix) {
-      if (InternalSetImage()) {
-        if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
-          // remove alpha channel from png
-          PIX* p1 = pixRemoveAlpha(pix);
-          pixSetSpp(p1, 3);
-          pix = pixCopy(nullptr, p1);
-          pixDestroy(&p1);
-        }
-        thresholder_->SetImage(pix);
-        SetInputImage(thresholder_->GetPixRect());
-      }
+        if (InternalSetImage()) {
+            if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
+                // remove alpha channel from png
+                PIX* p1 = pixRemoveAlpha(pix);
+                pixSetSpp(p1, 3);
+                pix = pixCopy(nullptr, p1);
+                pixDestroy(&p1);
+            }
+            thresholder_->SetImage(pix);
+            SetInputImage(thresholder_->GetPixRect());
+        }
     }
 
 /**
@@ -636,10 +636,10 @@ namespace tesseract {
  * can be recognized with the same image.
  */
     void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
-      if (thresholder_ == nullptr)
-        return;
-      thresholder_->SetRectangle(left, top, width, height);
-      ClearResults();
+        if (thresholder_ == nullptr)
+            return;
+        thresholder_->SetRectangle(left, top, width, height);
+        ClearResults();
     }
 
 /**
@@ -647,12 +647,12 @@ namespace tesseract {
  * Get a copy of the internal thresholded image from Tesseract.
  */
     Pix* TessBaseAPI::GetThresholdedImage() {
-      if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
-      if (tesseract_->pix_binary() == nullptr &&
-          !Threshold(tesseract_->mutable_pix_binary())) {
-        return nullptr;
-      }
-      return pixClone(tesseract_->pix_binary());
+        if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
+        if (tesseract_->pix_binary() == nullptr &&
+            !Threshold(tesseract_->mutable_pix_binary())) {
+            return nullptr;
+        }
+        return pixClone(tesseract_->pix_binary());
     }
 
 /**
@@ -661,7 +661,7 @@ namespace tesseract {
  * Can be called before or after Recognize.
  */
     Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
-      return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
+        return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
     }
 
 /**
@@ -674,8 +674,8 @@ namespace tesseract {
  */
     Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
                                     Pixa** pixa, int** blockids, int** paraids) {
-      return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
-                                pixa, blockids, paraids);
+        return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
+                                  pixa, blockids, paraids);
     }
 
 /**
@@ -687,7 +687,7 @@ namespace tesseract {
  * array of one element per line. delete [] after use.
  */
     Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
-      return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
+        return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
     }
 
 /**
@@ -696,7 +696,7 @@ namespace tesseract {
  * Can be called before or after Recognize.
  */
     Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
-      return GetComponentImages(RIL_WORD, true, pixa, nullptr);
+        return GetComponentImages(RIL_WORD, true, pixa, nullptr);
     }
 
 /**
@@ -706,7 +706,7 @@ namespace tesseract {
  * Can be called before or after Recognize.
  */
     Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
-      return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
+        return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
     }
 
 /**
@@ -722,88 +722,88 @@ namespace tesseract {
                                           const int raw_padding,
                                           Pixa** pixa, int** blockids,
                                           int** paraids) {
-      PageIterator* page_it = GetIterator();
-      if (page_it == nullptr)
-        page_it = AnalyseLayout();
-      if (page_it == nullptr)
-        return nullptr;  // Failed.
-
-      // Count the components to get a size for the arrays.
-      int component_count = 0;
-      int left, top, right, bottom;
-
-      TessResultCallback<bool>* get_bbox = nullptr;
-      if (raw_image) {
-        // Get bounding box in original raw image with padding.
-        get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox,
-                                            level, raw_padding,
-                                            &left, &top, &right, &bottom);
-      } else {
-        // Get bounding box from binarized imaged. Note that this could be
-        // differently scaled from the original image.
-        get_bbox = NewPermanentTessCallback(page_it,
-                                            &PageIterator::BoundingBoxInternal,
-                                            level, &left, &top, &right, &bottom);
-      }
-      do {
-        if (get_bbox->Run() &&
-            (!text_only || PTIsTextType(page_it->BlockType())))
-          ++component_count;
-      } while (page_it->Next(level));
-
-      Boxa* boxa = boxaCreate(component_count);
-      if (pixa != nullptr)
-        *pixa = pixaCreate(component_count);
-      if (blockids != nullptr)
-        *blockids = new int[component_count];
-      if (paraids != nullptr)
-        *paraids = new int[component_count];
-
-      int blockid = 0;
-      int paraid = 0;
-      int component_index = 0;
-      page_it->Begin();
-      do {
-        if (get_bbox->Run() &&
-            (!text_only || PTIsTextType(page_it->BlockType()))) {
-          Box* lbox = boxCreate(left, top, right - left, bottom - top);
-          boxaAddBox(boxa, lbox, L_INSERT);
-          if (pixa != nullptr) {
-            Pix* pix = nullptr;
-            if (raw_image) {
-              pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
-                                      &top);
-            } else {
-              pix = page_it->GetBinaryImage(level);
-            }
-            pixaAddPix(*pixa, pix, L_INSERT);
-            pixaAddBox(*pixa, lbox, L_CLONE);
-          }
-          if (paraids != nullptr) {
-            (*paraids)[component_index] = paraid;
-            if (page_it->IsAtFinalElement(RIL_PARA, level))
-              ++paraid;
-          }
-          if (blockids != nullptr) {
-            (*blockids)[component_index] = blockid;
-            if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
-              ++blockid;
-              paraid = 0;
-            }
-          }
-          ++component_index;
+        PageIterator* page_it = GetIterator();
+        if (page_it == nullptr)
+            page_it = AnalyseLayout();
+        if (page_it == nullptr)
+            return nullptr;  // Failed.
+
+        // Count the components to get a size for the arrays.
+        int component_count = 0;
+        int left, top, right, bottom;
+
+        TessResultCallback<bool>* get_bbox = nullptr;
+        if (raw_image) {
+            // Get bounding box in original raw image with padding.
+            get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox,
+                                                level, raw_padding,
+                                                &left, &top, &right, &bottom);
+        } else {
+            // Get bounding box from binarized imaged. Note that this could be
+            // differently scaled from the original image.
+            get_bbox = NewPermanentTessCallback(page_it,
+                                                &PageIterator::BoundingBoxInternal,
+                                                level, &left, &top, &right, &bottom);
         }
-      } while (page_it->Next(level));
-      delete page_it;
-      delete get_bbox;
-      return boxa;
+        do {
+            if (get_bbox->Run() &&
+                (!text_only || PTIsTextType(page_it->BlockType())))
+                ++component_count;
+        } while (page_it->Next(level));
+
+        Boxa* boxa = boxaCreate(component_count);
+        if (pixa != nullptr)
+            *pixa = pixaCreate(component_count);
+        if (blockids != nullptr)
+            *blockids = new int[component_count];
+        if (paraids != nullptr)
+            *paraids = new int[component_count];
+
+        int blockid = 0;
+        int paraid = 0;
+        int component_index = 0;
+        page_it->Begin();
+        do {
+            if (get_bbox->Run() &&
+                (!text_only || PTIsTextType(page_it->BlockType()))) {
+                Box* lbox = boxCreate(left, top, right - left, bottom - top);
+                boxaAddBox(boxa, lbox, L_INSERT);
+                if (pixa != nullptr) {
+                    Pix* pix = nullptr;
+                    if (raw_image) {
+                        pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
+                                                &top);
+                    } else {
+                        pix = page_it->GetBinaryImage(level);
+                    }
+                    pixaAddPix(*pixa, pix, L_INSERT);
+                    pixaAddBox(*pixa, lbox, L_CLONE);
+                }
+                if (paraids != nullptr) {
+                    (*paraids)[component_index] = paraid;
+                    if (page_it->IsAtFinalElement(RIL_PARA, level))
+                        ++paraid;
+                }
+                if (blockids != nullptr) {
+                    (*blockids)[component_index] = blockid;
+                    if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
+                        ++blockid;
+                        paraid = 0;
+                    }
+                }
+                ++component_index;
+            }
+        } while (page_it->Next(level));
+        delete page_it;
+        delete get_bbox;
+        return boxa;
     }
 
     int TessBaseAPI::GetThresholdedImageScaleFactor() const {
-      if (thresholder_ == nullptr) {
-        return 0;
-      }
-      return thresholder_->GetScaleFactor();
+        if (thresholder_ == nullptr) {
+            return 0;
+        }
+        return thresholder_->GetScaleFactor();
     }
 
 /**
@@ -824,17 +824,17 @@ namespace tesseract {
     PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
 
     PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
-      if (FindLines() == 0) {
-        if (block_list_->empty())
-          return nullptr;  // The page was empty.
-        page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
-        DetectParagraphs(false);
-        return new PageIterator(
-                page_res_, tesseract_, thresholder_->GetScaleFactor(),
-                thresholder_->GetScaledYResolution(),
-                rect_left_, rect_top_, rect_width_, rect_height_);
-      }
-      return nullptr;
+        if (FindLines() == 0) {
+            if (block_list_->empty())
+                return nullptr;  // The page was empty.
+            page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
+            DetectParagraphs(false);
+            return new PageIterator(
+                    page_res_, tesseract_, thresholder_->GetScaleFactor(),
+                    thresholder_->GetScaledYResolution(),
+                    rect_left_, rect_top_, rect_width_, rect_height_);
+        }
+        return nullptr;
     }
 
 /**
@@ -842,125 +842,125 @@ namespace tesseract {
  * internal structures.
  */
     int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
-      if (tesseract_ == nullptr)
-        return -1;
-      if (FindLines() != 0)
-        return -1;
-      delete page_res_;
-      if (block_list_->empty()) {
-        page_res_ = new PAGE_RES(false, block_list_,
-                                 &tesseract_->prev_word_best_choice_);
-        return 0; // Empty page.
-      }
+        if (tesseract_ == nullptr)
+            return -1;
+        if (FindLines() != 0)
+            return -1;
+        delete page_res_;
+        if (block_list_->empty()) {
+            page_res_ = new PAGE_RES(false, block_list_,
+                                     &tesseract_->prev_word_best_choice_);
+            return 0; // Empty page.
+        }
 
-      tesseract_->SetBlackAndWhitelist();
-      recognition_done_ = true;
+        tesseract_->SetBlackAndWhitelist();
+        recognition_done_ = true;
 #ifndef DISABLED_LEGACY_ENGINE
-      if (tesseract_->tessedit_resegment_from_line_boxes) {
-        page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
-      } else if (tesseract_->tessedit_resegment_from_boxes) {
-        page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
-      } else
+        if (tesseract_->tessedit_resegment_from_line_boxes) {
+            page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
+        } else if (tesseract_->tessedit_resegment_from_boxes) {
+            page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
+        } else
 #endif  // ndef DISABLED_LEGACY_ENGINE
-      {
-        page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
-                                 block_list_, &tesseract_->prev_word_best_choice_);
-      }
+        {
+            page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
+                                     block_list_, &tesseract_->prev_word_best_choice_);
+        }
 
-      if (page_res_ == nullptr) {
-        return -1;
-      }
+        if (page_res_ == nullptr) {
+            return -1;
+        }
 
-      if (tesseract_->tessedit_train_line_recognizer) {
-        tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
-        tesseract_->CorrectClassifyWords(page_res_);
-        return 0;
-      }
+        if (tesseract_->tessedit_train_line_recognizer) {
+            tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
+            tesseract_->CorrectClassifyWords(page_res_);
+            return 0;
+        }
 #ifndef DISABLED_LEGACY_ENGINE
-      if (tesseract_->tessedit_make_boxes_from_boxes) {
-        tesseract_->CorrectClassifyWords(page_res_);
-        return 0;
-      }
+        if (tesseract_->tessedit_make_boxes_from_boxes) {
+            tesseract_->CorrectClassifyWords(page_res_);
+            return 0;
+        }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-      if (truth_cb_ != nullptr) {
-        tesseract_->wordrec_run_blamer.set_value(true);
-        PageIterator *page_it = new PageIterator(
-                page_res_, tesseract_, thresholder_->GetScaleFactor(),
-                thresholder_->GetScaledYResolution(),
-                rect_left_, rect_top_, rect_width_, rect_height_);
-        truth_cb_->Run(tesseract_->getDict().getUnicharset(),
-                       image_height_, page_it, this->tesseract()->pix_grey());
-        delete page_it;
-      }
+        if (truth_cb_ != nullptr) {
+            tesseract_->wordrec_run_blamer.set_value(true);
+            PageIterator *page_it = new PageIterator(
+                    page_res_, tesseract_, thresholder_->GetScaleFactor(),
+                    thresholder_->GetScaledYResolution(),
+                    rect_left_, rect_top_, rect_width_, rect_height_);
+            truth_cb_->Run(tesseract_->getDict().getUnicharset(),
+                           image_height_, page_it, this->tesseract()->pix_grey());
+            delete page_it;
+        }
 
-      int result = 0;
-      if (tesseract_->interactive_display_mode) {
+        int result = 0;
+        if (tesseract_->interactive_display_mode) {
 #ifndef GRAPHICS_DISABLED
-        tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
+            tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
 #endif  // GRAPHICS_DISABLED
-        // The page_res is invalid after an interactive session, so cleanup
-        // in a way that lets us continue to the next page without crashing.
-        delete page_res_;
-        page_res_ = nullptr;
-        return -1;
+            // The page_res is invalid after an interactive session, so cleanup
+            // in a way that lets us continue to the next page without crashing.
+            delete page_res_;
+            page_res_ = nullptr;
+            return -1;
 #ifndef DISABLED_LEGACY_ENGINE
-      } else if (tesseract_->tessedit_train_from_boxes) {
-        STRING fontname;
-        ExtractFontName(*output_file_, &fontname);
-        tesseract_->ApplyBoxTraining(fontname, page_res_);
-      } else if (tesseract_->tessedit_ambigs_training) {
-        FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
-        // OCR the page segmented into words by tesseract.
-        tesseract_->recog_training_segmented(
-                *input_file_, page_res_, monitor, training_output_file);
-        fclose(training_output_file);
+        } else if (tesseract_->tessedit_train_from_boxes) {
+            STRING fontname;
+            ExtractFontName(*output_file_, &fontname);
+            tesseract_->ApplyBoxTraining(fontname, page_res_);
+        } else if (tesseract_->tessedit_ambigs_training) {
+            FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
+            // OCR the page segmented into words by tesseract.
+            tesseract_->recog_training_segmented(
+                    *input_file_, page_res_, monitor, training_output_file);
+            fclose(training_output_file);
 #endif  // ndef DISABLED_LEGACY_ENGINE
-      } else {
-        // Now run the main recognition.
-        bool wait_for_text = true;
-        GetBoolVariable("paragraph_text_based", &wait_for_text);
-        if (!wait_for_text) DetectParagraphs(false);
-        if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
-          if (wait_for_text) DetectParagraphs(true);
         } else {
-          result = -1;
+            // Now run the main recognition.
+            bool wait_for_text = true;
+            GetBoolVariable("paragraph_text_based", &wait_for_text);
+            if (!wait_for_text) DetectParagraphs(false);
+            if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
+                if (wait_for_text) DetectParagraphs(true);
+            } else {
+                result = -1;
+            }
         }
-      }
-      return result;
+        return result;
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
 /** Tests the chopper by exhaustively running chop_one_blob. */
     int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
-      if (tesseract_ == nullptr)
-        return -1;
-      if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
-        tprintf("Please call SetImage before attempting recognition.\n");
-        return -1;
-      }
-      if (page_res_ != nullptr)
-        ClearResults();
-      if (FindLines() != 0)
-        return -1;
-      // Additional conditions under which chopper test cannot be run
-      if (tesseract_->interactive_display_mode) return -1;
+        if (tesseract_ == nullptr)
+            return -1;
+        if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
+            tprintf("Please call SetImage before attempting recognition.\n");
+            return -1;
+        }
+        if (page_res_ != nullptr)
+            ClearResults();
+        if (FindLines() != 0)
+            return -1;
+        // Additional conditions under which chopper test cannot be run
+        if (tesseract_->interactive_display_mode) return -1;
 
-      recognition_done_ = true;
+        recognition_done_ = true;
 
-      page_res_ = new PAGE_RES(false, block_list_,
-                               &(tesseract_->prev_word_best_choice_));
+        page_res_ = new PAGE_RES(false, block_list_,
+                                 &(tesseract_->prev_word_best_choice_));
 
-      PAGE_RES_IT page_res_it(page_res_);
+        PAGE_RES_IT page_res_it(page_res_);
 
-      while (page_res_it.word() != nullptr) {
-        WERD_RES *word_res = page_res_it.word();
-        GenericVector<TBOX> boxes;
-        tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
-                                      page_res_it.row()->row, word_res);
-        page_res_it.forward();
-      }
-      return 0;
+        while (page_res_it.word() != nullptr) {
+            WERD_RES *word_res = page_res_it.word();
+            GenericVector<TBOX> boxes;
+            tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
+                                          page_res_it.row()->row, word_res);
+            page_res_it.forward();
+        }
+        return 0;
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
@@ -970,17 +970,17 @@ namespace tesseract {
     Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
 
     const char * TessBaseAPI::GetInputName() {
-      if (input_file_)
-        return input_file_->c_str();
-      return nullptr;
+        if (input_file_)
+            return input_file_->c_str();
+        return nullptr;
     }
 
     const char *  TessBaseAPI::GetDatapath() {
-      return tesseract_->datadir.c_str();
+        return tesseract_->datadir.c_str();
     }
 
     int TessBaseAPI::GetSourceYResolution() {
-      return thresholder_->GetSourceYResolution();
+        return thresholder_->GetSourceYResolution();
     }
 
 // If flist exists, get data from there. Otherwise get data from buf.
@@ -993,56 +993,56 @@ namespace tesseract {
                                            int timeout_millisec,
                                            TessResultRenderer* renderer,
                                            int tessedit_page_number) {
-      if (!flist && !buf) return false;
-      int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
-      char pagename[MAX_PATH];
-
-      GenericVector<STRING> lines;
-      if (!flist) {
-        buf->split('\n', &lines);
-        if (lines.empty()) return false;
-      }
+        if (!flist && !buf) return false;
+        int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+        char pagename[MAX_PATH];
+
+        GenericVector<STRING> lines;
+        if (!flist) {
+            buf->split('\n', &lines);
+            if (lines.empty()) return false;
+        }
 
-      // Skip to the requested page number.
-      for (int i = 0; i < page; i++) {
-        if (flist) {
-          if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+        // Skip to the requested page number.
+        for (int i = 0; i < page; i++) {
+            if (flist) {
+                if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+            }
         }
-      }
 
-      // Begin producing output
-      if (renderer && !renderer->BeginDocument(unknown_title_)) {
-        return false;
-      }
+        // Begin producing output
+        if (renderer && !renderer->BeginDocument(unknown_title_)) {
+            return false;
+        }
 
-      // Loop over all pages - or just the requested one
-      while (true) {
-        if (flist) {
-          if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
-        } else {
-          if (page >= lines.size()) break;
-          snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
-        }
-        chomp_string(pagename);
-        Pix *pix = pixRead(pagename);
-        if (pix == nullptr) {
-          tprintf("Image file %s cannot be read!\n", pagename);
-          return false;
-        }
-        tprintf("Page %d : %s\n", page, pagename);
-        bool r = ProcessPage(pix, page, pagename, retry_config,
-                             timeout_millisec, renderer);
-        pixDestroy(&pix);
-        if (!r) return false;
-        if (tessedit_page_number >= 0) break;
-        ++page;
-      }
+        // Loop over all pages - or just the requested one
+        while (true) {
+            if (flist) {
+                if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+            } else {
+                if (page >= lines.size()) break;
+                snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
+            }
+            chomp_string(pagename);
+            Pix *pix = pixRead(pagename);
+            if (pix == nullptr) {
+                tprintf("Image file %s cannot be read!\n", pagename);
+                return false;
+            }
+            tprintf("Page %d : %s\n", page, pagename);
+            bool r = ProcessPage(pix, page, pagename, retry_config,
+                                 timeout_millisec, renderer);
+            pixDestroy(&pix);
+            if (!r) return false;
+            if (tessedit_page_number >= 0) break;
+            ++page;
+        }
 
-      // Finish producing output
-      if (renderer && !renderer->EndDocument()) {
-        return false;
-      }
-      return true;
+        // Finish producing output
+        if (renderer && !renderer->EndDocument()) {
+            return false;
+        }
+        return true;
     }
 
     bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
@@ -1053,29 +1053,29 @@ namespace tesseract {
                                                 TessResultRenderer* renderer,
                                                 int tessedit_page_number) {
 #ifndef ANDROID_BUILD
-      Pix *pix = nullptr;
-      int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
-      size_t offset = 0;
-      for (; ; ++page) {
-        if (tessedit_page_number >= 0)
-          page = tessedit_page_number;
-        pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
-                     : pixReadFromMultipageTiff(filename, &offset);
-        if (pix == nullptr) break;
-        tprintf("Page %d\n", page + 1);
-        char page_str[kMaxIntSize];
-        snprintf(page_str, kMaxIntSize - 1, "%d", page);
-        SetVariable("applybox_page", page_str);
-        bool r = ProcessPage(pix, page, filename, retry_config,
-                             timeout_millisec, renderer);
-        pixDestroy(&pix);
-        if (!r) return false;
-        if (tessedit_page_number >= 0) break;
-        if (!offset) break;
-      }
-      return true;
+        Pix *pix = nullptr;
+        int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+        size_t offset = 0;
+        for (; ; ++page) {
+            if (tessedit_page_number >= 0)
+                page = tessedit_page_number;
+            pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
+                         : pixReadFromMultipageTiff(filename, &offset);
+            if (pix == nullptr) break;
+            tprintf("Page %d\n", page + 1);
+            char page_str[kMaxIntSize];
+            snprintf(page_str, kMaxIntSize - 1, "%d", page);
+            SetVariable("applybox_page", page_str);
+            bool r = ProcessPage(pix, page, filename, retry_config,
+                                 timeout_millisec, renderer);
+            pixDestroy(&pix);
+            if (!r) return false;
+            if (tessedit_page_number >= 0) break;
+            if (!offset) break;
+        }
+        return true;
 #else
-      return false;
+        return false;
 #endif
     }
 
@@ -1084,18 +1084,18 @@ namespace tesseract {
     bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
                                    int timeout_millisec,
                                    TessResultRenderer* renderer) {
-      bool result =
-              ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
+        bool result =
+                ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
 #ifndef DISABLED_LEGACY_ENGINE
-      if (result) {
-        if (tesseract_->tessedit_train_from_boxes &&
-            !tesseract_->WriteTRFile(*output_file_)) {
-          tprintf("Write of TR file failed: %s\n", output_file_->string());
-          return false;
+        if (result) {
+            if (tesseract_->tessedit_train_from_boxes &&
+                !tesseract_->WriteTRFile(*output_file_)) {
+                tprintf("Write of TR file failed: %s\n", output_file_->string());
+                return false;
+            }
         }
-      }
 #endif  // ndef DISABLED_LEGACY_ENGINE
-      return result;
+        return result;
     }
 
 // In the ideal scenario, Tesseract will start working on data as soon
@@ -1113,166 +1113,166 @@ namespace tesseract {
                                            const char* retry_config,
                                            int timeout_millisec,
                                            TessResultRenderer* renderer) {
-      PERF_COUNT_START("ProcessPages")
-      bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
-      if (stdInput) {
+        PERF_COUNT_START("ProcessPages")
+        bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
+        if (stdInput) {
 #ifdef WIN32
-        if (_setmode(_fileno(stdin), _O_BINARY) == -1)
+            if (_setmode(_fileno(stdin), _O_BINARY) == -1)
       tprintf("ERROR: cin to binary: %s", strerror(errno));
 #endif  // WIN32
-      }
-
-      if (stream_filelist) {
-        return ProcessPagesFileList(stdin, nullptr, retry_config,
-                                    timeout_millisec, renderer,
-                                    tesseract_->tessedit_page_number);
-      }
-
-      // At this point we are officially in autodection territory.
-      // That means any data in stdin must be buffered, to make it
-      // seekable.
-      std::string buf;
-      const l_uint8 *data = nullptr;
-      if (stdInput) {
-        buf.assign((std::istreambuf_iterator<char>(std::cin)),
-                   (std::istreambuf_iterator<char>()));
-        data = reinterpret_cast<const l_uint8 *>(buf.data());
-      } else {
-        // Check whether the input file can be read.
-        if (FILE* file = fopen(filename, "rb")) {
-          fclose(file);
-        } else {
-          fprintf(stderr, "Error, cannot read input file %s: %s\n",
-                  filename, strerror(errno));
-          return false;
         }
-      }
 
-      // Here is our autodetection
-      int format;
-      int r = (stdInput) ?
-              findFileFormatBuffer(data, &format) :
-              findFileFormat(filename, &format);
+        if (stream_filelist) {
+            return ProcessPagesFileList(stdin, nullptr, retry_config,
+                                        timeout_millisec, renderer,
+                                        tesseract_->tessedit_page_number);
+        }
 
-      // Maybe we have a filelist
-      if (r != 0 || format == IFF_UNKNOWN) {
-        STRING s;
+        // At this point we are officially in autodection territory.
+        // That means any data in stdin must be buffered, to make it
+        // seekable.
+        std::string buf;
+        const l_uint8 *data = nullptr;
         if (stdInput) {
-          s = buf.c_str();
+            buf.assign((std::istreambuf_iterator<char>(std::cin)),
+                       (std::istreambuf_iterator<char>()));
+            data = reinterpret_cast<const l_uint8 *>(buf.data());
         } else {
-          std::ifstream t(filename);
-          std::string u((std::istreambuf_iterator<char>(t)),
-                        std::istreambuf_iterator<char>());
-          s = u.c_str();
-        }
-        return ProcessPagesFileList(nullptr, &s, retry_config,
-                                    timeout_millisec, renderer,
-                                    tesseract_->tessedit_page_number);
-      }
+            // Check whether the input file can be read.
+            if (FILE* file = fopen(filename, "rb")) {
+                fclose(file);
+            } else {
+                fprintf(stderr, "Error, cannot read input file %s: %s\n",
+                        filename, strerror(errno));
+                return false;
+            }
+        }
 
-      // Maybe we have a TIFF which is potentially multipage
-      bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
-                   format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
-                   format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
-                   format == IFF_TIFF_ZIP);
+        // Here is our autodetection
+        int format;
+        int r = (stdInput) ?
+                findFileFormatBuffer(data, &format) :
+                findFileFormat(filename, &format);
+
+        // Maybe we have a filelist
+        if (r != 0 || format == IFF_UNKNOWN) {
+            STRING s;
+            if (stdInput) {
+                s = buf.c_str();
+            } else {
+                std::ifstream t(filename);
+                std::string u((std::istreambuf_iterator<char>(t)),
+                              std::istreambuf_iterator<char>());
+                s = u.c_str();
+            }
+            return ProcessPagesFileList(nullptr, &s, retry_config,
+                                        timeout_millisec, renderer,
+                                        tesseract_->tessedit_page_number);
+        }
 
-      // Fail early if we can, before producing any output
-      Pix *pix = nullptr;
-      if (!tiff) {
-        pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
-        if (pix == nullptr) {
-          return false;
+        // Maybe we have a TIFF which is potentially multipage
+        bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
+                     format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
+                     format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
+                     format == IFF_TIFF_ZIP);
+
+        // Fail early if we can, before producing any output
+        Pix *pix = nullptr;
+        if (!tiff) {
+            pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
+            if (pix == nullptr) {
+                return false;
+            }
         }
-      }
 
-      // Begin the output
-      if (renderer && !renderer->BeginDocument(unknown_title_)) {
-        pixDestroy(&pix);
-        return false;
-      }
+        // Begin the output
+        if (renderer && !renderer->BeginDocument(unknown_title_)) {
+            pixDestroy(&pix);
+            return false;
+        }
 
-      // Produce output
-      r = (tiff) ?
-          ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
-                                    timeout_millisec, renderer,
-                                    tesseract_->tessedit_page_number) :
-          ProcessPage(pix, 0, filename, retry_config,
-                      timeout_millisec, renderer);
+        // Produce output
+        r = (tiff) ?
+            ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
+                                      timeout_millisec, renderer,
+                                      tesseract_->tessedit_page_number) :
+            ProcessPage(pix, 0, filename, retry_config,
+                        timeout_millisec, renderer);
 
-      // Clean up memory as needed
-      pixDestroy(&pix);
+        // Clean up memory as needed
+        pixDestroy(&pix);
 
-      // End the output
-      if (!r || (renderer && !renderer->EndDocument())) {
-        return false;
-      }
-      PERF_COUNT_END
-      return true;
+        // End the output
+        if (!r || (renderer && !renderer->EndDocument())) {
+            return false;
+        }
+        PERF_COUNT_END
+        return true;
     }
 
     bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
                                   const char* retry_config, int timeout_millisec,
                                   TessResultRenderer* renderer) {
-      PERF_COUNT_START("ProcessPage")
-      SetInputName(filename);
-      SetImage(pix);
-      bool failed = false;
+        PERF_COUNT_START("ProcessPage")
+        SetInputName(filename);
+        SetImage(pix);
+        bool failed = false;
 
-      if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
-        // Disabled character recognition
-        PageIterator* it = AnalyseLayout();
+        if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
+            // Disabled character recognition
+            PageIterator* it = AnalyseLayout();
 
-        if (it == nullptr) {
-          failed = true;
+            if (it == nullptr) {
+                failed = true;
+            } else {
+                delete it;
+            }
+        } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
+            failed = FindLines() != 0;
+        } else if (timeout_millisec > 0) {
+            // Running with a timeout.
+            ETEXT_DESC monitor;
+            monitor.cancel = nullptr;
+            monitor.cancel_this = nullptr;
+            monitor.set_deadline_msecs(timeout_millisec);
+
+            // Now run the main recognition.
+            failed = Recognize(&monitor) < 0;
         } else {
-          delete it;
-        }
-      } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
-        failed = FindLines() != 0;
-      } else if (timeout_millisec > 0) {
-        // Running with a timeout.
-        ETEXT_DESC monitor;
-        monitor.cancel = nullptr;
-        monitor.cancel_this = nullptr;
-        monitor.set_deadline_msecs(timeout_millisec);
-
-        // Now run the main recognition.
-        failed = Recognize(&monitor) < 0;
-      } else {
-        // Normal layout and character recognition with no timeout.
-        failed = Recognize(nullptr) < 0;
-      }
+            // Normal layout and character recognition with no timeout.
+            failed = Recognize(nullptr) < 0;
+        }
 
-      if (tesseract_->tessedit_write_images) {
+        if (tesseract_->tessedit_write_images) {
 #ifndef ANDROID_BUILD
-        Pix* page_pix = GetThresholdedImage();
-        pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
+            Pix* page_pix = GetThresholdedImage();
+            pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
 #endif  // ANDROID_BUILD
-      }
+        }
 
-      if (failed && retry_config != nullptr && retry_config[0] != '\0') {
-        // Save current config variables before switching modes.
-        FILE* fp = fopen(kOldVarsFile, "wb");
-        if (fp == nullptr) {
-          tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
-        } else {
-          PrintVariables(fp);
-          fclose(fp);
+        if (failed && retry_config != nullptr && retry_config[0] != '\0') {
+            // Save current config variables before switching modes.
+            FILE* fp = fopen(kOldVarsFile, "wb");
+            if (fp == nullptr) {
+                tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
+            } else {
+                PrintVariables(fp);
+                fclose(fp);
+            }
+            // Switch to alternate mode for retry.
+            ReadConfigFile(retry_config);
+            SetImage(pix);
+            Recognize(nullptr);
+            // Restore saved config variables.
+            ReadConfigFile(kOldVarsFile);
         }
-        // Switch to alternate mode for retry.
-        ReadConfigFile(retry_config);
-        SetImage(pix);
-        Recognize(nullptr);
-        // Restore saved config variables.
-        ReadConfigFile(kOldVarsFile);
-      }
 
-      if (renderer && !failed) {
-        failed = !renderer->AddImage(this);
-      }
+        if (renderer && !failed) {
+            failed = !renderer->AddImage(this);
+        }
 
-      PERF_COUNT_END
-      return !failed;
+        PERF_COUNT_END
+        return !failed;
     }
 
 /**
@@ -1280,12 +1280,12 @@ namespace tesseract {
  * Recognize. The returned iterator must be deleted after use.
  */
     LTRResultIterator* TessBaseAPI::GetLTRIterator() {
-      if (tesseract_ == nullptr || page_res_ == nullptr)
-        return nullptr;
-      return new LTRResultIterator(
-              page_res_, tesseract_,
-              thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
-              rect_left_, rect_top_, rect_width_, rect_height_);
+        if (tesseract_ == nullptr || page_res_ == nullptr)
+            return nullptr;
+        return new LTRResultIterator(
+                page_res_, tesseract_,
+                thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
+                rect_left_, rect_top_, rect_width_, rect_height_);
     }
 
 /**
@@ -1297,12 +1297,12 @@ namespace tesseract {
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
     ResultIterator* TessBaseAPI::GetIterator() {
-      if (tesseract_ == nullptr || page_res_ == nullptr)
-        return nullptr;
-      return ResultIterator::StartOfParagraph(LTRResultIterator(
-              page_res_, tesseract_,
-              thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
-              rect_left_, rect_top_, rect_width_, rect_height_));
+        if (tesseract_ == nullptr || page_res_ == nullptr)
+            return nullptr;
+        return ResultIterator::StartOfParagraph(LTRResultIterator(
+                page_res_, tesseract_,
+                thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
+                rect_left_, rect_top_, rect_width_, rect_height_));
     }
 
 /**
@@ -1314,43 +1314,43 @@ namespace tesseract {
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
     MutableIterator* TessBaseAPI::GetMutableIterator() {
-      if (tesseract_ == nullptr || page_res_ == nullptr)
-        return nullptr;
-      return new MutableIterator(page_res_, tesseract_,
-                                 thresholder_->GetScaleFactor(),
-                                 thresholder_->GetScaledYResolution(),
-                                 rect_left_, rect_top_, rect_width_, rect_height_);
+        if (tesseract_ == nullptr || page_res_ == nullptr)
+            return nullptr;
+        return new MutableIterator(page_res_, tesseract_,
+                                   thresholder_->GetScaleFactor(),
+                                   thresholder_->GetScaledYResolution(),
+                                   rect_left_, rect_top_, rect_width_, rect_height_);
     }
 
 /** Make a text string from the internal data structures. */
     char* TessBaseAPI::GetUTF8Text() {
-      if (tesseract_ == nullptr ||
-          (!recognition_done_ && Recognize(nullptr) < 0))
-        return nullptr;
-      STRING text("");
-      ResultIterator *it = GetIterator();
-      do {
-        if (it->Empty(RIL_PARA)) continue;
-        const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
-        text += para_text.get();
-      } while (it->Next(RIL_PARA));
-      char* result = new char[text.length() + 1];
-      strncpy(result, text.string(), text.length() + 1);
-      delete it;
-      return result;
+        if (tesseract_ == nullptr ||
+            (!recognition_done_ && Recognize(nullptr) < 0))
+            return nullptr;
+        STRING text("");
+        ResultIterator *it = GetIterator();
+        do {
+            if (it->Empty(RIL_PARA)) continue;
+            const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
+            text += para_text.get();
+        } while (it->Next(RIL_PARA));
+        char* result = new char[text.length() + 1];
+        strncpy(result, text.string(), text.length() + 1);
+        delete it;
+        return result;
     }
 
 /**
  * Gets the block orientation at the current iterator position.
  */
     static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
-      tesseract::Orientation orientation;
-      tesseract::WritingDirection writing_direction;
-      tesseract::TextlineOrder textline_order;
-      float deskew_angle;
-      it->Orientation(&orientation, &writing_direction, &textline_order,
-                      &deskew_angle);
-      return orientation;
+        tesseract::Orientation orientation;
+        tesseract::WritingDirection writing_direction;
+        tesseract::TextlineOrder textline_order;
+        float deskew_angle;
+        it->Orientation(&orientation, &writing_direction, &textline_order,
+                        &deskew_angle);
+        return orientation;
     }
 
 /**
@@ -1364,100 +1364,100 @@ namespace tesseract {
     static void AddBaselineCoordsTohOCR(const PageIterator *it,
                                         PageIteratorLevel level,
                                         STRING* hocr_str) {
-      tesseract::Orientation orientation = GetBlockTextOrientation(it);
-      if (orientation != ORIENTATION_PAGE_UP) {
-        hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
-        return;
-      }
+        tesseract::Orientation orientation = GetBlockTextOrientation(it);
+        if (orientation != ORIENTATION_PAGE_UP) {
+            hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
+            return;
+        }
 
-      int left, top, right, bottom;
-      it->BoundingBox(level, &left, &top, &right, &bottom);
-
-      // Try to get the baseline coordinates at this level.
-      int x1, y1, x2, y2;
-      if (!it->Baseline(level, &x1, &y1, &x2, &y2))
-        return;
-      // Following the description of this field of the hOCR spec, we convert the
-      // baseline coordinates so that "the bottom left of the bounding box is the
-      // origin".
-      x1 -= left;
-      x2 -= left;
-      y1 -= bottom;
-      y2 -= bottom;
-
-      // Now fit a line through the points so we can extract coefficients for the
-      // equation:  y = p1 x + p0
-      double p1 = 0;
-      double p0 = 0;
-      if (x1 == x2) {
-        // Problem computing the polynomial coefficients.
-        return;
-      }
-      p1 = (y2 - y1) / static_cast<double>(x2 - x1);
-      p0 = y1 - static_cast<double>(p1 * x1);
+        int left, top, right, bottom;
+        it->BoundingBox(level, &left, &top, &right, &bottom);
+
+        // Try to get the baseline coordinates at this level.
+        int x1, y1, x2, y2;
+        if (!it->Baseline(level, &x1, &y1, &x2, &y2))
+            return;
+        // Following the description of this field of the hOCR spec, we convert the
+        // baseline coordinates so that "the bottom left of the bounding box is the
+        // origin".
+        x1 -= left;
+        x2 -= left;
+        y1 -= bottom;
+        y2 -= bottom;
+
+        // Now fit a line through the points so we can extract coefficients for the
+        // equation:  y = p1 x + p0
+        double p1 = 0;
+        double p0 = 0;
+        if (x1 == x2) {
+            // Problem computing the polynomial coefficients.
+            return;
+        }
+        p1 = (y2 - y1) / static_cast<double>(x2 - x1);
+        p0 = y1 - static_cast<double>(p1 * x1);
 
-      hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
-      hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
+        hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
+        hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
     }
 
     static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
                             int num2) {
-      const size_t BUFSIZE = 64;
-      char id_buffer[BUFSIZE];
-      if (num2 >= 0) {
-        snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
-      } else {
-        snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
-      }
-      id_buffer[BUFSIZE - 1] = '\0';
-      *hocr_str += " id='";
-      *hocr_str += id_buffer;
-      *hocr_str += "'";
+        const size_t BUFSIZE = 64;
+        char id_buffer[BUFSIZE];
+        if (num2 >= 0) {
+            snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
+        } else {
+            snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
+        }
+        id_buffer[BUFSIZE - 1] = '\0';
+        *hocr_str += " id='";
+        *hocr_str += id_buffer;
+        *hocr_str += "'";
     }
 
     static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
                             int num2, int num3) {
-      const size_t BUFSIZE = 64;
-      char id_buffer[BUFSIZE];
-      snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
-      id_buffer[BUFSIZE - 1] = '\0';
-      *hocr_str += " id='";
-      *hocr_str += id_buffer;
-      *hocr_str += "'";
+        const size_t BUFSIZE = 64;
+        char id_buffer[BUFSIZE];
+        snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
+        id_buffer[BUFSIZE - 1] = '\0';
+        *hocr_str += " id='";
+        *hocr_str += id_buffer;
+        *hocr_str += "'";
     }
 
     static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
                              STRING* hocr_str) {
-      int left, top, right, bottom;
-      it->BoundingBox(level, &left, &top, &right, &bottom);
-      // This is the only place we use double quotes instead of single quotes,
-      // but it may too late to change for consistency
-      hocr_str->add_str_int(" title=\"bbox ", left);
-      hocr_str->add_str_int(" ", top);
-      hocr_str->add_str_int(" ", right);
-      hocr_str->add_str_int(" ", bottom);
-      // Add baseline coordinates & heights for textlines only.
-      if (level == RIL_TEXTLINE) {
-        AddBaselineCoordsTohOCR(it, level, hocr_str);
-        // add custom height measures
-        float row_height, descenders, ascenders;  // row attributes
-        it->RowAttributes(&row_height, &descenders, &ascenders);
-        // TODO(rays): Do we want to limit these to a single decimal place?
-        hocr_str->add_str_double("; x_size ", row_height);
-        hocr_str->add_str_double("; x_descenders ", descenders * -1);
-        hocr_str->add_str_double("; x_ascenders ", ascenders);
-      }
-      *hocr_str += "\">";
+        int left, top, right, bottom;
+        it->BoundingBox(level, &left, &top, &right, &bottom);
+        // This is the only place we use double quotes instead of single quotes,
+        // but it may too late to change for consistency
+        hocr_str->add_str_int(" title=\"bbox ", left);
+        hocr_str->add_str_int(" ", top);
+        hocr_str->add_str_int(" ", right);
+        hocr_str->add_str_int(" ", bottom);
+        // Add baseline coordinates & heights for textlines only.
+        if (level == RIL_TEXTLINE) {
+            AddBaselineCoordsTohOCR(it, level, hocr_str);
+            // add custom height measures
+            float row_height, descenders, ascenders;  // row attributes
+            it->RowAttributes(&row_height, &descenders, &ascenders);
+            // TODO(rays): Do we want to limit these to a single decimal place?
+            hocr_str->add_str_double("; x_size ", row_height);
+            hocr_str->add_str_double("; x_descenders ", descenders * -1);
+            hocr_str->add_str_double("; x_ascenders ", ascenders);
+        }
+        *hocr_str += "\">";
     }
 
     static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
                             STRING* hocr_str) {
-      int left, top, right, bottom;
-      it->BoundingBox(level, &left, &top, &right, &bottom);
-      hocr_str->add_str_int("\t", left);
-      hocr_str->add_str_int("\t", top);
-      hocr_str->add_str_int("\t", right - left);
-      hocr_str->add_str_int("\t", bottom - top);
+        int left, top, right, bottom;
+        it->BoundingBox(level, &left, &top, &right, &bottom);
+        hocr_str->add_str_int("\t", left);
+        hocr_str->add_str_int("\t", top);
+        hocr_str->add_str_int("\t", right - left);
+        hocr_str->add_str_int("\t", bottom - top);
     }
 
 /**
@@ -1470,7 +1470,7 @@ namespace tesseract {
  * Returned string must be freed with the delete [] operator.
  */
     char* TessBaseAPI::GetHOCRText(int page_number) {
-      return GetHOCRText(nullptr, page_number);
+        return GetHOCRText(nullptr, page_number);
     }
 
 /**
@@ -1483,23 +1483,23 @@ namespace tesseract {
  * Returned string must be freed with the delete [] operator.
  */
     char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
-      if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
-        return nullptr;
+        if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
+            return nullptr;
 
-      int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
-      int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
-      bool para_is_ltr = true;        // Default direction is LTR
-      const char* paragraph_lang = nullptr;
-      bool font_info = false;
-      GetBoolVariable("hocr_font_info", &font_info);
+        int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
+        int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
+        bool para_is_ltr = true;        // Default direction is LTR
+        const char* paragraph_lang = nullptr;
+        bool font_info = false;
+        GetBoolVariable("hocr_font_info", &font_info);
 
-      STRING hocr_str("");
+        STRING hocr_str("");
 
-      if (input_file_ == nullptr)
-        SetInputName(nullptr);
+        if (input_file_ == nullptr)
+            SetInputName(nullptr);
 
 #ifdef _WIN32
-      // convert input name from ANSI encoding to utf-8
+        // convert input name from ANSI encoding to utf-8
   int str16_len =
       MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
   wchar_t *uni16_str = new WCHAR[str16_len];
@@ -1515,302 +1515,300 @@ namespace tesseract {
   delete[] utf8_str;
 #endif
 
-      hocr_str += "  <div class='ocr_page'";
-      AddIdTohOCR(&hocr_str, "page", page_id, -1);
-      hocr_str += " title='image \"";
-      if (input_file_) {
-        hocr_str += HOcrEscape(input_file_->string());
-      } else {
-        hocr_str += "unknown";
-      }
-      hocr_str.add_str_int("\"; bbox ", rect_left_);
-      hocr_str.add_str_int(" ", rect_top_);
-      hocr_str.add_str_int(" ", rect_width_);
-      hocr_str.add_str_int(" ", rect_height_);
-      hocr_str.add_str_int("; ppageno ", page_number);
-      hocr_str += "'>\n";
-
-      ResultIterator *res_it = GetIterator();
-      while (!res_it->Empty(RIL_BLOCK)) {
-        if (res_it->Empty(RIL_WORD)) {
-          res_it->Next(RIL_WORD);
-          continue;
-        }
-
-        // Open any new block/paragraph/textline.
-        if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-          para_is_ltr = true;  // reset to default direction
-          hocr_str += "   <div class='ocr_carea'";
-          AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
-          AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
-        }
-        if (res_it->IsAtBeginningOf(RIL_PARA)) {
-          hocr_str += "\n    <p class='ocr_par'";
-          para_is_ltr = res_it->ParagraphIsLtr();
-          if (!para_is_ltr) {
-            hocr_str += " dir='rtl'";
-          }
-          AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
-          paragraph_lang = res_it->WordRecognitionLanguage();
-          if (paragraph_lang) {
-            hocr_str += " lang='";
-            hocr_str += paragraph_lang;
-            hocr_str += "'";
-          }
-          AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
-        }
-        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-          hocr_str += "\n     <span class='ocr_line'";
-          AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
-          AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
+        hocr_str += "  <div class='ocr_page'";
+        AddIdTohOCR(&hocr_str, "page", page_id, -1);
+        hocr_str += " title='image \"";
+        if (input_file_) {
+            hocr_str += HOcrEscape(input_file_->string());
+        } else {
+            hocr_str += "unknown";
         }
+        hocr_str.add_str_int("\"; bbox ", rect_left_);
+        hocr_str.add_str_int(" ", rect_top_);
+        hocr_str.add_str_int(" ", rect_width_);
+        hocr_str.add_str_int(" ", rect_height_);
+        hocr_str.add_str_int("; ppageno ", page_number);
+        hocr_str += "'>\n";
+
+        ResultIterator *res_it = GetIterator();
+        while (!res_it->Empty(RIL_BLOCK)) {
+            if (res_it->Empty(RIL_WORD)) {
+                res_it->Next(RIL_WORD);
+                continue;
+            }
 
-        // Now, process the word...
-        std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
-        if (tesseract_->lstm_choice_mode) {
-          confidencemap = res_it->GetBestLSTMSymbolChoices();
-        }
-        hocr_str += "\n      <span class='ocrx_word'";
-        AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
-        int left, top, right, bottom;
-        bool bold, italic, underlined, monospace, serif, smallcaps;
-        int pointsize, font_id;
-        const char *font_name;
-        res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-        font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
-                                               &monospace, &serif, &smallcaps,
-                                               &pointsize, &font_id);
-        hocr_str.add_str_int(" title='bbox ", left);
-        hocr_str.add_str_int(" ", top);
-        hocr_str.add_str_int(" ", right);
-        hocr_str.add_str_int(" ", bottom);
-        hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
-        if (font_info) {
-          if (font_name) {
-            hocr_str += "; x_font ";
-            hocr_str += HOcrEscape(font_name);
-          }
-          hocr_str.add_str_int("; x_fsize ", pointsize);
-        }
-        hocr_str += "'";
-        const char* lang = res_it->WordRecognitionLanguage();
-        if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
-          hocr_str += " lang='";
-          hocr_str += lang;
-          hocr_str += "'";
-        }
-        switch (res_it->WordDirection()) {
-          // Only emit direction if different from current paragraph direction
-          case DIR_LEFT_TO_RIGHT:
-            if (!para_is_ltr) hocr_str += " dir='ltr'";
-                break;
-          case DIR_RIGHT_TO_LEFT:
-            if (para_is_ltr) hocr_str += " dir='rtl'";
-                break;
-          case DIR_MIX:
-          case DIR_NEUTRAL:
-          default:  // Do nothing.
-            break;
-        }
-        hocr_str += ">";
-        bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
-        bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
-        bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
-        if (bold) hocr_str += "<strong>";
-        if (italic) hocr_str += "<em>";
-        do {
-          const std::unique_ptr<const char[]> grapheme(
-                  res_it->GetUTF8Text(RIL_SYMBOL));
-          if (grapheme && grapheme[0] != 0) {
-            hocr_str += HOcrEscape(grapheme.get());
-          }
-          res_it->Next(RIL_SYMBOL);
-        } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-        if (italic) hocr_str += "</em>";
-        if (bold) hocr_str += "</strong>";
-        // If the lstm choice mode is required it is added here
-        if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
-          for (size_t i = 0; i < confidencemap->size(); i++) {
-            hocr_str += "\n       <span class='ocrx_cinfo'";
-            AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
+            // Open any new block/paragraph/textline.
+            if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+                para_is_ltr = true;  // reset to default direction
+                hocr_str += "   <div class='ocr_carea'";
+                AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
+                AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
+            }
+            if (res_it->IsAtBeginningOf(RIL_PARA)) {
+                hocr_str += "\n    <p class='ocr_par'";
+                para_is_ltr = res_it->ParagraphIsLtr();
+                if (!para_is_ltr) {
+                    hocr_str += " dir='rtl'";
+                }
+                AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
+                paragraph_lang = res_it->WordRecognitionLanguage();
+                if (paragraph_lang) {
+                    hocr_str += " lang='";
+                    hocr_str += paragraph_lang;
+                    hocr_str += "'";
+                }
+                AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
+            }
+            if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+                hocr_str += "\n     <span class='ocr_line'";
+                AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
+                AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
+            }
+
+            // Now, process the word...
+            std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
+            if (tesseract_->lstm_choice_mode) {
+                confidencemap = res_it->GetBestLSTMSymbolChoices();
+            }
+            hocr_str += "\n      <span class='ocrx_word'";
+            AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
+            int left, top, right, bottom;
+            bool bold, italic, underlined, monospace, serif, smallcaps;
+            int pointsize, font_id;
+            const char *font_name;
+            res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
+            font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
+                                                   &monospace, &serif, &smallcaps,
+                                                   &pointsize, &font_id);
+            hocr_str.add_str_int(" title='bbox ", left);
+            hocr_str.add_str_int(" ", top);
+            hocr_str.add_str_int(" ", right);
+            hocr_str.add_str_int(" ", bottom);
+            hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
+            if (font_info) {
+                if (font_name) {
+                    hocr_str += "; x_font ";
+                    hocr_str += HOcrEscape(font_name);
+                }
+                hocr_str.add_str_int("; x_fsize ", pointsize);
+            }
+            hocr_str += "'";
+            const char* lang = res_it->WordRecognitionLanguage();
+            if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
+                hocr_str += " lang='";
+                hocr_str += lang;
+                hocr_str += "'";
+            }
+            switch (res_it->WordDirection()) {
+                // Only emit direction if different from current paragraph direction
+                case DIR_LEFT_TO_RIGHT:
+                    if (!para_is_ltr) hocr_str += " dir='ltr'";
+                    break;
+                case DIR_RIGHT_TO_LEFT:
+                    if (para_is_ltr) hocr_str += " dir='rtl'";
+                    break;
+                case DIR_MIX:
+                case DIR_NEUTRAL:
+                default:  // Do nothing.
+                    break;
+            }
             hocr_str += ">";
-            std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
-            for (std::pair<const char*, float> conf : timestep) {
-              hocr_str += "<span class='ocr_glyph'";
-              AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
-              hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
-              hocr_str += "'";
-              hocr_str += ">";
-              hocr_str += conf.first;
-              hocr_str += "</span>";
-              gcnt++;
+            bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
+            bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
+            bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
+            if (bold) hocr_str += "<strong>";
+            if (italic) hocr_str += "<em>";
+            do {
+                const std::unique_ptr<const char[]> grapheme(
+                        res_it->GetUTF8Text(RIL_SYMBOL));
+                if (grapheme && grapheme[0] != 0) {
+                    hocr_str += HOcrEscape(grapheme.get());
+                }
+                res_it->Next(RIL_SYMBOL);
+            } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+            if (italic) hocr_str += "</em>";
+            if (bold) hocr_str += "</strong>";
+            // If the lstm choice mode is required it is added here
+            if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
+                for (size_t i = 0; i < confidencemap->size(); i++) {
+                    hocr_str += "\n       <span class='ocrx_cinfo'";
+                    AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
+                    hocr_str += ">";
+                    std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
+                    for (std::pair<const char*, float> conf : timestep) {
+                        hocr_str += "<span class='ocr_glyph'";
+                        AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
+                        hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
+                        hocr_str += "'";
+                        hocr_str += ">";
+                        hocr_str += conf.first;
+                        hocr_str += "</span>";
+                        gcnt++;
+                    }
+                    hocr_str += "</span>";
+                    tcnt++;
+                }
+            } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
+                for (size_t i = 0; i < confidencemap->size(); i++) {
+                    std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
+                    if (timestep.size() > 0) {
+                        hocr_str += "\n       <span class='ocrx_cinfo'";
+                        AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
+                        hocr_str += " chosen='";
+                        hocr_str += timestep[0].first;
+                        hocr_str += "'>";
+                        for (size_t j = 1; j < timestep.size(); j++) {
+                            hocr_str += "<span class='ocr_glyph'";
+                            AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
+                            hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
+                            hocr_str += "'";
+                            hocr_str += ">";
+                            hocr_str += timestep[j].first;
+                            hocr_str += "</span>";
+                            gcnt++;
+                        }
+                        hocr_str += "</span>";
+                        tcnt++;
+                    }
+                }
             }
             hocr_str += "</span>";
-            tcnt++;
-          }
-        } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
-          for (size_t i = 0; i < confidencemap->size(); i++) {
-            std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
-            if (timestep.size() > 0) {
-              hocr_str += "\n       <span class='ocrx_cinfo'";
-              AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
-              hocr_str += " chosen='";
-              hocr_str += timestep[0].first;
-              hocr_str += "'>";
-              for (size_t j = 1; j < timestep.size(); j++) {
-                hocr_str += "<span class='ocr_glyph'";
-                AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
-                hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
-                hocr_str += "'";
-                hocr_str += ">";
-                hocr_str += timestep[j].first;
-                hocr_str += "</span>";
-                gcnt++;
-              }
-              hocr_str += "</span>";
-              tcnt++;
+            tcnt = 1;
+            gcnt = 1;
+            wcnt++;
+            // Close any ending block/paragraph/textline.
+            if (last_word_in_line) {
+                hocr_str += "\n     </span>";
+                lcnt++;
+            }
+            if (last_word_in_para) {
+                hocr_str += "\n    </p>\n";
+                pcnt++;
+                para_is_ltr = true;  // back to default direction
+            }
+            if (last_word_in_block) {
+                hocr_str += "   </div>\n";
+                bcnt++;
             }
-          }
-        }
-        hocr_str += "</span>";
-        tcnt = 1;
-        gcnt = 1;
-        wcnt++;
-        // Close any ending block/paragraph/textline.
-        if (last_word_in_line) {
-          hocr_str += "\n     </span>";
-          lcnt++;
-        }
-        if (last_word_in_para) {
-          hocr_str += "\n    </p>\n";
-          pcnt++;
-          para_is_ltr = true;  // back to default direction
-        }
-        if (last_word_in_block) {
-          hocr_str += "   </div>\n";
-          bcnt++;
         }
-      }
-      hocr_str += "  </div>\n";
+        hocr_str += "  </div>\n";
 
-      char *ret = new char[hocr_str.length() + 1];
-      strcpy(ret, hocr_str.string());
-      delete res_it;
-      return ret;
+        char *ret = new char[hocr_str.length() + 1];
+        strcpy(ret, hocr_str.string());
+        delete res_it;
+        return ret;
     }
 
-
-
 /**
  * Make a TSV-formatted string from the internal data structures.
  * page_number is 0-based but will appear in the output as 1-based.
  * Returned string must be freed with the delete [] operator.
  */
     char* TessBaseAPI::GetTSVText(int page_number) {
-      if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
-        return nullptr;
+        if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
+            return nullptr;
 
-      int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
-      int page_id = page_number + 1;  // we use 1-based page numbers.
-
-      STRING tsv_str("");
-
-      int page_num = page_id;
-      int block_num = 0;
-      int par_num = 0;
-      int line_num = 0;
-      int word_num = 0;
-
-      tsv_str.add_str_int("1\t", page_num);  // level 1 - page
-      tsv_str.add_str_int("\t", block_num);
-      tsv_str.add_str_int("\t", par_num);
-      tsv_str.add_str_int("\t", line_num);
-      tsv_str.add_str_int("\t", word_num);
-      tsv_str.add_str_int("\t", rect_left_);
-      tsv_str.add_str_int("\t", rect_top_);
-      tsv_str.add_str_int("\t", rect_width_);
-      tsv_str.add_str_int("\t", rect_height_);
-      tsv_str += "\t-1\t\n";
-
-      ResultIterator* res_it = GetIterator();
-      while (!res_it->Empty(RIL_BLOCK)) {
-        if (res_it->Empty(RIL_WORD)) {
-          res_it->Next(RIL_WORD);
-          continue;
-        }
-
-        // Add rows for any new block/paragraph/textline.
-        if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-          block_num++;
-          par_num = 0;
-          line_num = 0;
-          word_num = 0;
-          tsv_str.add_str_int("2\t", page_num);  // level 2 - block
-          tsv_str.add_str_int("\t", block_num);
-          tsv_str.add_str_int("\t", par_num);
-          tsv_str.add_str_int("\t", line_num);
-          tsv_str.add_str_int("\t", word_num);
-          AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
-          tsv_str += "\t-1\t\n";  // end of row for block
-        }
-        if (res_it->IsAtBeginningOf(RIL_PARA)) {
-          par_num++;
-          line_num = 0;
-          word_num = 0;
-          tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
-          tsv_str.add_str_int("\t", block_num);
-          tsv_str.add_str_int("\t", par_num);
-          tsv_str.add_str_int("\t", line_num);
-          tsv_str.add_str_int("\t", word_num);
-          AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
-          tsv_str += "\t-1\t\n";  // end of row for para
-        }
-        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-          line_num++;
-          word_num = 0;
-          tsv_str.add_str_int("4\t", page_num);  // level 4 - line
-          tsv_str.add_str_int("\t", block_num);
-          tsv_str.add_str_int("\t", par_num);
-          tsv_str.add_str_int("\t", line_num);
-          tsv_str.add_str_int("\t", word_num);
-          AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
-          tsv_str += "\t-1\t\n";  // end of row for line
-        }
-
-        // Now, process the word...
-        int left, top, right, bottom;
-        res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-        word_num++;
-        tsv_str.add_str_int("5\t", page_num);  // level 5 - word
+        int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
+        int page_id = page_number + 1;  // we use 1-based page numbers.
+
+        STRING tsv_str("");
+
+        int page_num = page_id;
+        int block_num = 0;
+        int par_num = 0;
+        int line_num = 0;
+        int word_num = 0;
+
+        tsv_str.add_str_int("1\t", page_num);  // level 1 - page
         tsv_str.add_str_int("\t", block_num);
         tsv_str.add_str_int("\t", par_num);
         tsv_str.add_str_int("\t", line_num);
         tsv_str.add_str_int("\t", word_num);
-        tsv_str.add_str_int("\t", left);
-        tsv_str.add_str_int("\t", top);
-        tsv_str.add_str_int("\t", right - left);
-        tsv_str.add_str_int("\t", bottom - top);
-        tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
-        tsv_str += "\t";
-
-        // Increment counts if at end of block/paragraph/textline.
-        if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
-        if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
-        if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
+        tsv_str.add_str_int("\t", rect_left_);
+        tsv_str.add_str_int("\t", rect_top_);
+        tsv_str.add_str_int("\t", rect_width_);
+        tsv_str.add_str_int("\t", rect_height_);
+        tsv_str += "\t-1\t\n";
+
+        ResultIterator* res_it = GetIterator();
+        while (!res_it->Empty(RIL_BLOCK)) {
+            if (res_it->Empty(RIL_WORD)) {
+                res_it->Next(RIL_WORD);
+                continue;
+            }
 
-        do {
-          tsv_str +=
-                  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
-          res_it->Next(RIL_SYMBOL);
-        } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-        tsv_str += "\n";  // end of row
-        wcnt++;
-      }
+            // Add rows for any new block/paragraph/textline.
+            if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+                block_num++;
+                par_num = 0;
+                line_num = 0;
+                word_num = 0;
+                tsv_str.add_str_int("2\t", page_num);  // level 2 - block
+                tsv_str.add_str_int("\t", block_num);
+                tsv_str.add_str_int("\t", par_num);
+                tsv_str.add_str_int("\t", line_num);
+                tsv_str.add_str_int("\t", word_num);
+                AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
+                tsv_str += "\t-1\t\n";  // end of row for block
+            }
+            if (res_it->IsAtBeginningOf(RIL_PARA)) {
+                par_num++;
+                line_num = 0;
+                word_num = 0;
+                tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
+                tsv_str.add_str_int("\t", block_num);
+                tsv_str.add_str_int("\t", par_num);
+                tsv_str.add_str_int("\t", line_num);
+                tsv_str.add_str_int("\t", word_num);
+                AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
+                tsv_str += "\t-1\t\n";  // end of row for para
+            }
+            if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+                line_num++;
+                word_num = 0;
+                tsv_str.add_str_int("4\t", page_num);  // level 4 - line
+                tsv_str.add_str_int("\t", block_num);
+                tsv_str.add_str_int("\t", par_num);
+                tsv_str.add_str_int("\t", line_num);
+                tsv_str.add_str_int("\t", word_num);
+                AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
+                tsv_str += "\t-1\t\n";  // end of row for line
+            }
+
+            // Now, process the word...
+            int left, top, right, bottom;
+            res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
+            word_num++;
+            tsv_str.add_str_int("5\t", page_num);  // level 5 - word
+            tsv_str.add_str_int("\t", block_num);
+            tsv_str.add_str_int("\t", par_num);
+            tsv_str.add_str_int("\t", line_num);
+            tsv_str.add_str_int("\t", word_num);
+            tsv_str.add_str_int("\t", left);
+            tsv_str.add_str_int("\t", top);
+            tsv_str.add_str_int("\t", right - left);
+            tsv_str.add_str_int("\t", bottom - top);
+            tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
+            tsv_str += "\t";
+
+            // Increment counts if at end of block/paragraph/textline.
+            if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
+            if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
+            if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
+
+            do {
+                tsv_str +=
+                        std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
+                res_it->Next(RIL_SYMBOL);
+            } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+            tsv_str += "\n";  // end of row
+            wcnt++;
+        }
 
-      char* ret = new char[tsv_str.length() + 1];
-      strcpy(ret, tsv_str.string());
-      delete res_it;
-      return ret;
+        char* ret = new char[tsv_str.length() + 1];
+        strcpy(ret, tsv_str.string());
+        delete res_it;
+        return ret;
     }
 
 /** The 5 numbers output for each box (the usual 4 and a page number.) */
@@ -1844,39 +1842,39 @@ namespace tesseract {
  * Returned string must be freed with the delete [] operator.
  */
     char* TessBaseAPI::GetBoxText(int page_number) {
-      if (tesseract_ == nullptr ||
-          (!recognition_done_ && Recognize(nullptr) < 0))
-        return nullptr;
-      int blob_count;
-      int utf8_length = TextLength(&blob_count);
-      int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
-                         kMaxBytesPerLine;
-      char* result = new char[total_length];
-      result[0] = '\0';
-      int output_length = 0;
-      LTRResultIterator* it = GetLTRIterator();
-      do {
-        int left, top, right, bottom;
-        if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
-          const std::unique_ptr</*non-const*/ char[]> text(
-                  it->GetUTF8Text(RIL_SYMBOL));
-          // Tesseract uses space for recognition failure. Fix to a reject
-          // character, kTesseractReject so we don't create illegal box files.
-          for (int i = 0; text[i] != '\0'; ++i) {
-            if (text[i] == ' ')
-              text[i] = kTesseractReject;
-          }
-          snprintf(result + output_length, total_length - output_length,
-                   "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
-                   right, image_height_ - top, page_number);
-          output_length += strlen(result + output_length);
-          // Just in case...
-          if (output_length + kMaxBytesPerLine > total_length)
-            break;
-        }
-      } while (it->Next(RIL_SYMBOL));
-      delete it;
-      return result;
+        if (tesseract_ == nullptr ||
+            (!recognition_done_ && Recognize(nullptr) < 0))
+            return nullptr;
+        int blob_count;
+        int utf8_length = TextLength(&blob_count);
+        int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
+                           kMaxBytesPerLine;
+        char* result = new char[total_length];
+        result[0] = '\0';
+        int output_length = 0;
+        LTRResultIterator* it = GetLTRIterator();
+        do {
+            int left, top, right, bottom;
+            if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
+                const std::unique_ptr</*non-const*/ char[]> text(
+                        it->GetUTF8Text(RIL_SYMBOL));
+                // Tesseract uses space for recognition failure. Fix to a reject
+                // character, kTesseractReject so we don't create illegal box files.
+                for (int i = 0; text[i] != '\0'; ++i) {
+                    if (text[i] == ' ')
+                        text[i] = kTesseractReject;
+                }
+                snprintf(result + output_length, total_length - output_length,
+                         "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
+                         right, image_height_ - top, page_number);
+                output_length += strlen(result + output_length);
+                // Just in case...
+                if (output_length + kMaxBytesPerLine > total_length)
+                    break;
+            }
+        } while (it->Next(RIL_SYMBOL));
+        delete it;
+        return result;
     }
 
 /**
@@ -1898,104 +1896,104 @@ namespace tesseract {
  * Returned string must be freed with the delete [] operator.
  */
     char* TessBaseAPI::GetUNLVText() {
-      if (tesseract_ == nullptr ||
-          (!recognition_done_ && Recognize(nullptr) < 0))
-        return nullptr;
-      bool tilde_crunch_written = false;
-      bool last_char_was_newline = true;
-      bool last_char_was_tilde = false;
-
-      int total_length = TextLength(nullptr);
-      PAGE_RES_IT   page_res_it(page_res_);
-      char* result = new char[total_length];
-      char* ptr = result;
-      for (page_res_it.restart_page(); page_res_it.word () != nullptr;
-           page_res_it.forward()) {
-        WERD_RES *word = page_res_it.word();
-        // Process the current word.
-        if (word->unlv_crunch_mode != CR_NONE) {
-          if (word->unlv_crunch_mode != CR_DELETE &&
-              (!tilde_crunch_written ||
-               (word->unlv_crunch_mode == CR_KEEP_SPACE &&
-                word->word->space() > 0 &&
-                !word->word->flag(W_FUZZY_NON) &&
-                !word->word->flag(W_FUZZY_SP)))) {
-            if (!word->word->flag(W_BOL) &&
-                word->word->space() > 0 &&
-                !word->word->flag(W_FUZZY_NON) &&
-                !word->word->flag(W_FUZZY_SP)) {
-              /* Write a space to separate from preceding good text */
-              *ptr++ = ' ';
-              last_char_was_tilde = false;
-            }
-            if (!last_char_was_tilde) {
-              // Write a reject char.
-              last_char_was_tilde = true;
-              *ptr++ = kUNLVReject;
-              tilde_crunch_written = true;
-              last_char_was_newline = false;
-            }
-          }
-        } else {
-          // NORMAL PROCESSING of non tilde crunched words.
-          tilde_crunch_written = false;
-          tesseract_->set_unlv_suspects(word);
-          const char* wordstr = word->best_choice->unichar_string().string();
-          const STRING& lengths = word->best_choice->unichar_lengths();
-          int length = lengths.length();
-          int i = 0;
-          int offset = 0;
-
-          if (last_char_was_tilde &&
-              word->word->space() == 0 && wordstr[offset] == ' ') {
-            // Prevent adjacent tilde across words - we know that adjacent tildes
-            // within words have been removed.
-            // Skip the first character.
-            offset = lengths[i++];
-          }
-          if (i < length && wordstr[offset] != 0) {
-            if (!last_char_was_newline)
-              *ptr++ = ' ';
-            else
-              last_char_was_newline = false;
-            for (; i < length; offset += lengths[i++]) {
-              if (wordstr[offset] == ' ' ||
-                  wordstr[offset] == kTesseractReject) {
-                *ptr++ = kUNLVReject;
-                last_char_was_tilde = true;
-              } else {
-                if (word->reject_map[i].rejected())
-                  *ptr++ = kUNLVSuspect;
-                UNICHAR ch(wordstr + offset, lengths[i]);
-                int uni_ch = ch.first_uni();
-                for (int j = 0; kUniChs[j] != 0; ++j) {
-                  if (kUniChs[j] == uni_ch) {
-                    uni_ch = kLatinChs[j];
-                    break;
-                  }
+        if (tesseract_ == nullptr ||
+            (!recognition_done_ && Recognize(nullptr) < 0))
+            return nullptr;
+        bool tilde_crunch_written = false;
+        bool last_char_was_newline = true;
+        bool last_char_was_tilde = false;
+
+        int total_length = TextLength(nullptr);
+        PAGE_RES_IT   page_res_it(page_res_);
+        char* result = new char[total_length];
+        char* ptr = result;
+        for (page_res_it.restart_page(); page_res_it.word () != nullptr;
+             page_res_it.forward()) {
+            WERD_RES *word = page_res_it.word();
+            // Process the current word.
+            if (word->unlv_crunch_mode != CR_NONE) {
+                if (word->unlv_crunch_mode != CR_DELETE &&
+                    (!tilde_crunch_written ||
+                     (word->unlv_crunch_mode == CR_KEEP_SPACE &&
+                      word->word->space() > 0 &&
+                      !word->word->flag(W_FUZZY_NON) &&
+                      !word->word->flag(W_FUZZY_SP)))) {
+                    if (!word->word->flag(W_BOL) &&
+                        word->word->space() > 0 &&
+                        !word->word->flag(W_FUZZY_NON) &&
+                        !word->word->flag(W_FUZZY_SP)) {
+                        /* Write a space to separate from preceding good text */
+                        *ptr++ = ' ';
+                        last_char_was_tilde = false;
+                    }
+                    if (!last_char_was_tilde) {
+                        // Write a reject char.
+                        last_char_was_tilde = true;
+                        *ptr++ = kUNLVReject;
+                        tilde_crunch_written = true;
+                        last_char_was_newline = false;
+                    }
                 }
-                if (uni_ch <= 0xff) {
-                  *ptr++ = static_cast<char>(uni_ch);
-                  last_char_was_tilde = false;
-                } else {
-                  *ptr++ = kUNLVReject;
-                  last_char_was_tilde = true;
+            } else {
+                // NORMAL PROCESSING of non tilde crunched words.
+                tilde_crunch_written = false;
+                tesseract_->set_unlv_suspects(word);
+                const char* wordstr = word->best_choice->unichar_string().string();
+                const STRING& lengths = word->best_choice->unichar_lengths();
+                int length = lengths.length();
+                int i = 0;
+                int offset = 0;
+
+                if (last_char_was_tilde &&
+                    word->word->space() == 0 && wordstr[offset] == ' ') {
+                    // Prevent adjacent tilde across words - we know that adjacent tildes
+                    // within words have been removed.
+                    // Skip the first character.
+                    offset = lengths[i++];
+                }
+                if (i < length && wordstr[offset] != 0) {
+                    if (!last_char_was_newline)
+                        *ptr++ = ' ';
+                    else
+                        last_char_was_newline = false;
+                    for (; i < length; offset += lengths[i++]) {
+                        if (wordstr[offset] == ' ' ||
+                            wordstr[offset] == kTesseractReject) {
+                            *ptr++ = kUNLVReject;
+                            last_char_was_tilde = true;
+                        } else {
+                            if (word->reject_map[i].rejected())
+                                *ptr++ = kUNLVSuspect;
+                            UNICHAR ch(wordstr + offset, lengths[i]);
+                            int uni_ch = ch.first_uni();
+                            for (int j = 0; kUniChs[j] != 0; ++j) {
+                                if (kUniChs[j] == uni_ch) {
+                                    uni_ch = kLatinChs[j];
+                                    break;
+                                }
+                            }
+                            if (uni_ch <= 0xff) {
+                                *ptr++ = static_cast<char>(uni_ch);
+                                last_char_was_tilde = false;
+                            } else {
+                                *ptr++ = kUNLVReject;
+                                last_char_was_tilde = true;
+                            }
+                        }
+                    }
                 }
-              }
             }
-          }
-        }
-        if (word->word->flag(W_EOL) && !last_char_was_newline) {
-          /* Add a new line output */
-          *ptr++ = '\n';
-          tilde_crunch_written = false;
-          last_char_was_newline = true;
-          last_char_was_tilde = false;
+            if (word->word->flag(W_EOL) && !last_char_was_newline) {
+                /* Add a new line output */
+                *ptr++ = '\n';
+                tilde_crunch_written = false;
+                last_char_was_newline = true;
+                last_char_was_tilde = false;
+            }
         }
-      }
-      *ptr++ = '\n';
-      *ptr = '\0';
-      return result;
+        *ptr++ = '\n';
+        *ptr = '\0';
+        return result;
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -2012,27 +2010,27 @@ namespace tesseract {
     bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
                                               const char** script_name,
                                               float* script_conf) {
-      OSResults osr;
+        OSResults osr;
 
-      bool osd = DetectOS(&osr);
-      if (!osd) {
-        return false;
-      }
+        bool osd = DetectOS(&osr);
+        if (!osd) {
+            return false;
+        }
 
-      int orient_id = osr.best_result.orientation_id;
-      int script_id = osr.get_best_script(orient_id);
-      if (orient_conf) *orient_conf = osr.best_result.oconfidence;
-      if (orient_deg) *orient_deg = orient_id * 90;  // convert quadrant to degrees
+        int orient_id = osr.best_result.orientation_id;
+        int script_id = osr.get_best_script(orient_id);
+        if (orient_conf) *orient_conf = osr.best_result.oconfidence;
+        if (orient_deg) *orient_deg = orient_id * 90;  // convert quadrant to degrees
 
-      if (script_name) {
-        const char* script = osr.unicharset->get_script_from_script_id(script_id);
+        if (script_name) {
+            const char* script = osr.unicharset->get_script_from_script_id(script_id);
 
-        *script_name = script;
-      }
+            *script_name = script;
+        }
 
-      if (script_conf) *script_conf = osr.best_result.sconfidence;
+        if (script_conf) *script_conf = osr.best_result.sconfidence;
 
-      return true;
+        return true;
     }
 
 /**
@@ -2041,70 +2039,70 @@ namespace tesseract {
  * page_number is a 0-based page index that will appear in the osd file.
  */
     char* TessBaseAPI::GetOsdText(int page_number) {
-      int orient_deg;
-      float orient_conf;
-      const char* script_name;
-      float script_conf;
+        int orient_deg;
+        float orient_conf;
+        const char* script_name;
+        float script_conf;
 
-      if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
-                                   &script_conf))
-        return nullptr;
+        if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
+                                     &script_conf))
+            return nullptr;
 
-      // clockwise rotation needed to make the page upright
-      int rotate = OrientationIdToValue(orient_deg / 90);
+        // clockwise rotation needed to make the page upright
+        int rotate = OrientationIdToValue(orient_deg / 90);
 
-      const int kOsdBufsize = 255;
-      char* osd_buf = new char[kOsdBufsize];
-      snprintf(osd_buf, kOsdBufsize,
-               "Page number: %d\n"
-               "Orientation in degrees: %d\n"
-               "Rotate: %d\n"
-               "Orientation confidence: %.2f\n"
-               "Script: %s\n"
-               "Script confidence: %.2f\n",
-               page_number, orient_deg, rotate, orient_conf, script_name,
-               script_conf);
+        const int kOsdBufsize = 255;
+        char* osd_buf = new char[kOsdBufsize];
+        snprintf(osd_buf, kOsdBufsize,
+                 "Page number: %d\n"
+                 "Orientation in degrees: %d\n"
+                 "Rotate: %d\n"
+                 "Orientation confidence: %.2f\n"
+                 "Script: %s\n"
+                 "Script confidence: %.2f\n",
+                 page_number, orient_deg, rotate, orient_conf, script_name,
+                 script_conf);
 
-      return osd_buf;
+        return osd_buf;
     }
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
 /** Returns the average word confidence for Tesseract page result. */
     int TessBaseAPI::MeanTextConf() {
-      int* conf = AllWordConfidences();
-      if (!conf) return 0;
-      int sum = 0;
-      int *pt = conf;
-      while (*pt >= 0) sum += *pt++;
-      if (pt != conf) sum /= pt - conf;
-      delete [] conf;
-      return sum;
+        int* conf = AllWordConfidences();
+        if (!conf) return 0;
+        int sum = 0;
+        int *pt = conf;
+        while (*pt >= 0) sum += *pt++;
+        if (pt != conf) sum /= pt - conf;
+        delete [] conf;
+        return sum;
     }
 
 /** Returns an array of all word confidences, terminated by -1. */
     int* TessBaseAPI::AllWordConfidences() {
-      if (tesseract_ == nullptr ||
-          (!recognition_done_ && Recognize(nullptr) < 0))
-        return nullptr;
-      int n_word = 0;
-      PAGE_RES_IT res_it(page_res_);
-      for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
-        n_word++;
-
-      int* conf = new int[n_word+1];
-      n_word = 0;
-      for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
-        WERD_RES *word = res_it.word();
-        WERD_CHOICE* choice = word->best_choice;
-        int w_conf = static_cast<int>(100 + 5 * choice->certainty());
-        // This is the eq for converting Tesseract confidence to 1..100
-        if (w_conf < 0) w_conf = 0;
-        if (w_conf > 100) w_conf = 100;
-        conf[n_word++] = w_conf;
-      }
-      conf[n_word] = -1;
-      return conf;
+        if (tesseract_ == nullptr ||
+            (!recognition_done_ && Recognize(nullptr) < 0))
+            return nullptr;
+        int n_word = 0;
+        PAGE_RES_IT res_it(page_res_);
+        for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
+            n_word++;
+
+        int* conf = new int[n_word+1];
+        n_word = 0;
+        for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
+            WERD_RES *word = res_it.word();
+            WERD_CHOICE* choice = word->best_choice;
+            int w_conf = static_cast<int>(100 + 5 * choice->certainty());
+            // This is the eq for converting Tesseract confidence to 1..100
+            if (w_conf < 0) w_conf = 0;
+            if (w_conf > 100) w_conf = 100;
+            conf[n_word++] = w_conf;
+        }
+        conf[n_word] = -1;
+        return conf;
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -2119,59 +2117,59 @@ namespace tesseract {
  * Returns false if adaption was not possible for some reason.
  */
     bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
-      int debug = 0;
-      GetIntVariable("applybox_debug", &debug);
-      bool success = true;
-      PageSegMode current_psm = GetPageSegMode();
-      SetPageSegMode(mode);
-      SetVariable("classify_enable_learning", "0");
-      const std::unique_ptr<const char[]> text(GetUTF8Text());
-      if (debug) {
-        tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
-      }
-      if (text != nullptr) {
-        PAGE_RES_IT it(page_res_);
-        WERD_RES* word_res = it.word();
-        if (word_res != nullptr) {
-          word_res->word->set_text(wordstr);
-          // Check to see if text matches wordstr.
-          int w = 0;
-          int t;
-          for (t = 0; text[t] != '\0'; ++t) {
-            if (text[t] == '\n' || text[t] == ' ')
-              continue;
-            while (wordstr[w] == ' ') ++w;
-            if (text[t] != wordstr[w])
-              break;
-            ++w;
-          }
-          if (text[t] != '\0' || wordstr[w] != '\0') {
-            // No match.
-            delete page_res_;
-            GenericVector<TBOX> boxes;
-            page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
-            tesseract_->ReSegmentByClassification(page_res_);
-            tesseract_->TidyUp(page_res_);
-            PAGE_RES_IT pr_it(page_res_);
-            if (pr_it.word() == nullptr)
-              success = false;
-            else
-              word_res = pr_it.word();
-          } else {
-            word_res->BestChoiceToCorrectText();
-          }
-          if (success) {
-            tesseract_->EnableLearning = true;
-            tesseract_->LearnWord(nullptr, word_res);
-          }
+        int debug = 0;
+        GetIntVariable("applybox_debug", &debug);
+        bool success = true;
+        PageSegMode current_psm = GetPageSegMode();
+        SetPageSegMode(mode);
+        SetVariable("classify_enable_learning", "0");
+        const std::unique_ptr<const char[]> text(GetUTF8Text());
+        if (debug) {
+            tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
+        }
+        if (text != nullptr) {
+            PAGE_RES_IT it(page_res_);
+            WERD_RES* word_res = it.word();
+            if (word_res != nullptr) {
+                word_res->word->set_text(wordstr);
+                // Check to see if text matches wordstr.
+                int w = 0;
+                int t;
+                for (t = 0; text[t] != '\0'; ++t) {
+                    if (text[t] == '\n' || text[t] == ' ')
+                        continue;
+                    while (wordstr[w] == ' ') ++w;
+                    if (text[t] != wordstr[w])
+                        break;
+                    ++w;
+                }
+                if (text[t] != '\0' || wordstr[w] != '\0') {
+                    // No match.
+                    delete page_res_;
+                    GenericVector<TBOX> boxes;
+                    page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
+                    tesseract_->ReSegmentByClassification(page_res_);
+                    tesseract_->TidyUp(page_res_);
+                    PAGE_RES_IT pr_it(page_res_);
+                    if (pr_it.word() == nullptr)
+                        success = false;
+                    else
+                        word_res = pr_it.word();
+                } else {
+                    word_res->BestChoiceToCorrectText();
+                }
+                if (success) {
+                    tesseract_->EnableLearning = true;
+                    tesseract_->LearnWord(nullptr, word_res);
+                }
+            } else {
+                success = false;
+            }
         } else {
-          success = false;
+            success = false;
         }
-      } else {
-        success = false;
-      }
-      SetPageSegMode(current_psm);
-      return success;
+        SetPageSegMode(current_psm);
+        return success;
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
@@ -2182,10 +2180,10 @@ namespace tesseract {
  * any Recognize or Get* operation.
  */
     void TessBaseAPI::Clear() {
-      if (thresholder_ != nullptr)
-        thresholder_->Clear();
-      ClearResults();
-      if (tesseract_ != nullptr) SetInputImage(nullptr);
+        if (thresholder_ != nullptr)
+            thresholder_->Clear();
+        ClearResults();
+        if (tesseract_ != nullptr) SetInputImage(nullptr);
     }
 
 /**
@@ -2195,33 +2193,33 @@ namespace tesseract {
  * other than Init and anything declared above it in the class definition.
  */
     void TessBaseAPI::End() {
-      Clear();
-      delete thresholder_;
-      thresholder_ = nullptr;
-      delete page_res_;
-      page_res_ = nullptr;
-      delete block_list_;
-      block_list_ = nullptr;
-      if (paragraph_models_ != nullptr) {
-        paragraph_models_->delete_data_pointers();
-        delete paragraph_models_;
-        paragraph_models_ = nullptr;
-      }
-      if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
-      delete tesseract_;
-      tesseract_ = nullptr;
-      delete osd_tesseract_;
-      osd_tesseract_ = nullptr;
-      delete equ_detect_;
-      equ_detect_ = nullptr;
-      delete input_file_;
-      input_file_ = nullptr;
-      delete output_file_;
-      output_file_ = nullptr;
-      delete datapath_;
-      datapath_ = nullptr;
-      delete language_;
-      language_ = nullptr;
+        Clear();
+        delete thresholder_;
+        thresholder_ = nullptr;
+        delete page_res_;
+        page_res_ = nullptr;
+        delete block_list_;
+        block_list_ = nullptr;
+        if (paragraph_models_ != nullptr) {
+            paragraph_models_->delete_data_pointers();
+            delete paragraph_models_;
+            paragraph_models_ = nullptr;
+        }
+        if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
+        delete tesseract_;
+        tesseract_ = nullptr;
+        delete osd_tesseract_;
+        osd_tesseract_ = nullptr;
+        delete equ_detect_;
+        equ_detect_ = nullptr;
+        delete input_file_;
+        input_file_ = nullptr;
+        delete output_file_;
+        output_file_ = nullptr;
+        delete datapath_;
+        datapath_ = nullptr;
+        delete language_;
+        language_ = nullptr;
     }
 
 // Clear any library-level memory caches.
@@ -2230,7 +2228,7 @@ namespace tesseract {
 // and End() of individual TessBaseAPI's.  This function allows the clearing
 // of these caches.
     void TessBaseAPI::ClearPersistentCache() {
-      Dict::GlobalDawgCache()->DeleteUnusedDawgs();
+        Dict::GlobalDawgCache()->DeleteUnusedDawgs();
     }
 
 /**
@@ -2238,55 +2236,55 @@ namespace tesseract {
  * returns 0 if the word is invalid, non-zero if valid
  */
     int TessBaseAPI::IsValidWord(const char *word) {
-      return tesseract_->getDict().valid_word(word);
+        return tesseract_->getDict().valid_word(word);
     }
 // Returns true if utf8_character is defined in the UniCharset.
     bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
-      return tesseract_->unicharset.contains_unichar(utf8_character);
+        return tesseract_->unicharset.contains_unichar(utf8_character);
     }
 
 
 // TODO(rays) Obsolete this function and replace with a more aptly named
 // function that returns image coordinates rather than tesseract coordinates.
     bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
-      PageIterator* it = AnalyseLayout();
-      if (it == nullptr) {
-        return false;
-      }
-      int x1, x2, y1, y2;
-      it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
-      // Calculate offset and slope (NOTE: Kind of ugly)
-      if (x2 <= x1) x2 = x1 + 1;
-      // Convert the point pair to slope/offset of the baseline (in image coords.)
-      *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
-      *out_offset = static_cast<int>(y1 - *out_slope * x1);
-      // Get the y-coord of the baseline at the left and right edges of the
-      // textline's bounding box.
-      int left, top, right, bottom;
-      if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
+        PageIterator* it = AnalyseLayout();
+        if (it == nullptr) {
+            return false;
+        }
+        int x1, x2, y1, y2;
+        it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
+        // Calculate offset and slope (NOTE: Kind of ugly)
+        if (x2 <= x1) x2 = x1 + 1;
+        // Convert the point pair to slope/offset of the baseline (in image coords.)
+        *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
+        *out_offset = static_cast<int>(y1 - *out_slope * x1);
+        // Get the y-coord of the baseline at the left and right edges of the
+        // textline's bounding box.
+        int left, top, right, bottom;
+        if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
+            delete it;
+            return false;
+        }
+        int left_y = IntCastRounded(*out_slope * left + *out_offset);
+        int right_y = IntCastRounded(*out_slope * right + *out_offset);
+        // Shift the baseline down so it passes through the nearest bottom-corner
+        // of the textline's bounding box. This is the difference between the y
+        // at the lowest (max) edge of the box and the actual box bottom.
+        *out_offset += bottom - std::max(left_y, right_y);
+        // Switch back to bottom-up tesseract coordinates. Requires negation of
+        // the slope and height - offset for the offset.
+        *out_slope = -*out_slope;
+        *out_offset = rect_height_ - *out_offset;
         delete it;
-        return false;
-      }
-      int left_y = IntCastRounded(*out_slope * left + *out_offset);
-      int right_y = IntCastRounded(*out_slope * right + *out_offset);
-      // Shift the baseline down so it passes through the nearest bottom-corner
-      // of the textline's bounding box. This is the difference between the y
-      // at the lowest (max) edge of the box and the actual box bottom.
-      *out_offset += bottom - std::max(left_y, right_y);
-      // Switch back to bottom-up tesseract coordinates. Requires negation of
-      // the slope and height - offset for the offset.
-      *out_slope = -*out_slope;
-      *out_offset = rect_height_ - *out_offset;
-      delete it;
 
-      return true;
+        return true;
     }
 
 /** Sets Dict::letter_is_okay_ function to point to the given function. */
     void TessBaseAPI::SetDictFunc(DictFunc f) {
-      if (tesseract_ != nullptr) {
-        tesseract_->getDict().letter_is_okay_ = f;
-      }
+        if (tesseract_ != nullptr) {
+            tesseract_->getDict().letter_is_okay_ = f;
+        }
     }
 
 /**
@@ -2298,33 +2296,33 @@ namespace tesseract {
  * utf-8 string.
  */
     void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
-      if (tesseract_ != nullptr) {
-        tesseract_->getDict().probability_in_context_ = f;
-        // Set it for the sublangs too.
-        int num_subs = tesseract_->num_sub_langs();
-        for (int i = 0; i < num_subs; ++i) {
-          tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
+        if (tesseract_ != nullptr) {
+            tesseract_->getDict().probability_in_context_ = f;
+            // Set it for the sublangs too.
+            int num_subs = tesseract_->num_sub_langs();
+            for (int i = 0; i < num_subs; ++i) {
+                tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
+            }
         }
-      }
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
 /** Sets Wordrec::fill_lattice_ function to point to the given function. */
     void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
-      if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
+        if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /** Common code for setting the image. */
     bool TessBaseAPI::InternalSetImage() {
-      if (tesseract_ == nullptr) {
-        tprintf("Please call Init before attempting to set an image.\n");
-        return false;
-      }
-      if (thresholder_ == nullptr)
-        thresholder_ = new ImageThresholder;
-      ClearResults();
-      return true;
+        if (tesseract_ == nullptr) {
+            tprintf("Please call Init before attempting to set an image.\n");
+            return false;
+        }
+        if (thresholder_ == nullptr)
+            thresholder_ = new ImageThresholder;
+        ClearResults();
+        return true;
     }
 
 /**
@@ -2334,153 +2332,153 @@ namespace tesseract {
  * The usual argument to Threshold is Tesseract::mutable_pix_binary().
  */
     bool TessBaseAPI::Threshold(Pix** pix) {
-      ASSERT_HOST(pix != nullptr);
-      if (*pix != nullptr)
-        pixDestroy(pix);
-      // Zero resolution messes up the algorithms, so make sure it is credible.
-      int user_dpi = 0;
-      bool a = GetIntVariable("user_defined_dpi", &user_dpi);
-      int y_res = thresholder_->GetScaledYResolution();
-      if (user_dpi && (user_dpi < kMinCredibleResolution ||
-                       user_dpi > kMaxCredibleResolution)) {
-        tprintf("Warning: User defined image dpi is outside of expected range "
-                "(%d - %d)!\n",
-                kMinCredibleResolution, kMaxCredibleResolution);
-      }
-      // Always use user defined dpi
-      if (user_dpi) {
-        thresholder_->SetSourceYResolution(user_dpi);
-      } else if (y_res < kMinCredibleResolution ||
-                 y_res > kMaxCredibleResolution) {
-        tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
-                y_res, kMinCredibleResolution);
-        thresholder_->SetSourceYResolution(kMinCredibleResolution);
-      }
-      PageSegMode pageseg_mode =
-              static_cast<PageSegMode>(
-                      static_cast<int>(tesseract_->tessedit_pageseg_mode));
-      if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
-      thresholder_->GetImageSizes(&rect_left_, &rect_top_,
-                                  &rect_width_, &rect_height_,
-                                  &image_width_, &image_height_);
-      if (!thresholder_->IsBinary()) {
-        tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
-        tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
-      } else {
-        tesseract_->set_pix_thresholds(nullptr);
-        tesseract_->set_pix_grey(nullptr);
-      }
-      // Set the internal resolution that is used for layout parameters from the
-      // estimated resolution, rather than the image resolution, which may be
-      // fabricated, but we will use the image resolution, if there is one, to
-      // report output point sizes.
-      int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
-                                      kMinCredibleResolution,
-                                      kMaxCredibleResolution);
-      if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
-        tprintf("Estimated internal resolution %d out of range! "
-                "Corrected to %d.\n",
-                thresholder_->GetScaledEstimatedResolution(), estimated_res);
-      }
-      tesseract_->set_source_resolution(estimated_res);
-      SavePixForCrash(estimated_res, *pix);
-      return true;
+        ASSERT_HOST(pix != nullptr);
+        if (*pix != nullptr)
+            pixDestroy(pix);
+        // Zero resolution messes up the algorithms, so make sure it is credible.
+        int user_dpi = 0;
+        bool a = GetIntVariable("user_defined_dpi", &user_dpi);
+        int y_res = thresholder_->GetScaledYResolution();
+        if (user_dpi && (user_dpi < kMinCredibleResolution ||
+                         user_dpi > kMaxCredibleResolution)) {
+            tprintf("Warning: User defined image dpi is outside of expected range "
+                    "(%d - %d)!\n",
+                    kMinCredibleResolution, kMaxCredibleResolution);
+        }
+        // Always use user defined dpi
+        if (user_dpi) {
+            thresholder_->SetSourceYResolution(user_dpi);
+        } else if (y_res < kMinCredibleResolution ||
+                   y_res > kMaxCredibleResolution) {
+            tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
+                    y_res, kMinCredibleResolution);
+            thresholder_->SetSourceYResolution(kMinCredibleResolution);
+        }
+        PageSegMode pageseg_mode =
+                static_cast<PageSegMode>(
+                        static_cast<int>(tesseract_->tessedit_pageseg_mode));
+        if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
+        thresholder_->GetImageSizes(&rect_left_, &rect_top_,
+                                    &rect_width_, &rect_height_,
+                                    &image_width_, &image_height_);
+        if (!thresholder_->IsBinary()) {
+            tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
+            tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
+        } else {
+            tesseract_->set_pix_thresholds(nullptr);
+            tesseract_->set_pix_grey(nullptr);
+        }
+        // Set the internal resolution that is used for layout parameters from the
+        // estimated resolution, rather than the image resolution, which may be
+        // fabricated, but we will use the image resolution, if there is one, to
+        // report output point sizes.
+        int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
+                                        kMinCredibleResolution,
+                                        kMaxCredibleResolution);
+        if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
+            tprintf("Estimated internal resolution %d out of range! "
+                    "Corrected to %d.\n",
+                    thresholder_->GetScaledEstimatedResolution(), estimated_res);
+        }
+        tesseract_->set_source_resolution(estimated_res);
+        SavePixForCrash(estimated_res, *pix);
+        return true;
     }
 
 /** Find lines from the image making the BLOCK_LIST. */
     int TessBaseAPI::FindLines() {
-      if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
-        tprintf("Please call SetImage before attempting recognition.\n");
-        return -1;
-      }
-      if (recognition_done_)
-        ClearResults();
-      if (!block_list_->empty()) {
-        return 0;
-      }
-      if (tesseract_ == nullptr) {
-        tesseract_ = new Tesseract;
+        if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
+            tprintf("Please call SetImage before attempting recognition.\n");
+            return -1;
+        }
+        if (recognition_done_)
+            ClearResults();
+        if (!block_list_->empty()) {
+            return 0;
+        }
+        if (tesseract_ == nullptr) {
+            tesseract_ = new Tesseract;
 #ifndef DISABLED_LEGACY_ENGINE
-        tesseract_->InitAdaptiveClassifier(nullptr);
+            tesseract_->InitAdaptiveClassifier(nullptr);
 #endif
-      }
-      if (tesseract_->pix_binary() == nullptr &&
-          !Threshold(tesseract_->mutable_pix_binary())) {
-        return -1;
-      }
+        }
+        if (tesseract_->pix_binary() == nullptr &&
+            !Threshold(tesseract_->mutable_pix_binary())) {
+            return -1;
+        }
 
-      tesseract_->PrepareForPageseg();
+        tesseract_->PrepareForPageseg();
 
 #ifndef DISABLED_LEGACY_ENGINE
-      if (tesseract_->textord_equation_detect) {
-        if (equ_detect_ == nullptr && datapath_ != nullptr) {
-          equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
-        }
-        if (equ_detect_ == nullptr) {
-          tprintf("Warning: Could not set equation detector\n");
-        } else {
-          tesseract_->SetEquationDetect(equ_detect_);
+        if (tesseract_->textord_equation_detect) {
+            if (equ_detect_ == nullptr && datapath_ != nullptr) {
+                equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
+            }
+            if (equ_detect_ == nullptr) {
+                tprintf("Warning: Could not set equation detector\n");
+            } else {
+                tesseract_->SetEquationDetect(equ_detect_);
+            }
         }
-      }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-      Tesseract* osd_tess = osd_tesseract_;
-      OSResults osr;
-      if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
-          osd_tess == nullptr) {
-        if (strcmp(language_->string(), "osd") == 0) {
-          osd_tess = tesseract_;
-        } else {
-          osd_tesseract_ = new Tesseract;
-          TessdataManager mgr(reader_);
-          if (datapath_ == nullptr) {
-            tprintf("Warning: Auto orientation and script detection requested,"
-                    " but data path is undefined\n");
-            delete osd_tesseract_;
-            osd_tesseract_ = nullptr;
-          } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
-                                                    "osd", OEM_TESSERACT_ONLY,
-                                                    nullptr, 0, nullptr, nullptr,
-                                                    false, &mgr) == 0) {
-            osd_tess = osd_tesseract_;
-            osd_tesseract_->set_source_resolution(
-                    thresholder_->GetSourceYResolution());
-          } else {
-            tprintf("Warning: Auto orientation and script detection requested,"
-                    " but osd language failed to load\n");
-            delete osd_tesseract_;
-            osd_tesseract_ = nullptr;
-          }
+        Tesseract* osd_tess = osd_tesseract_;
+        OSResults osr;
+        if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
+            osd_tess == nullptr) {
+            if (strcmp(language_->string(), "osd") == 0) {
+                osd_tess = tesseract_;
+            } else {
+                osd_tesseract_ = new Tesseract;
+                TessdataManager mgr(reader_);
+                if (datapath_ == nullptr) {
+                    tprintf("Warning: Auto orientation and script detection requested,"
+                            " but data path is undefined\n");
+                    delete osd_tesseract_;
+                    osd_tesseract_ = nullptr;
+                } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
+                                                          "osd", OEM_TESSERACT_ONLY,
+                                                          nullptr, 0, nullptr, nullptr,
+                                                          false, &mgr) == 0) {
+                    osd_tess = osd_tesseract_;
+                    osd_tesseract_->set_source_resolution(
+                            thresholder_->GetSourceYResolution());
+                } else {
+                    tprintf("Warning: Auto orientation and script detection requested,"
+                            " but osd language failed to load\n");
+                    delete osd_tesseract_;
+                    osd_tesseract_ = nullptr;
+                }
+            }
         }
-      }
 
-      if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
-        return -1;
+        if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
+            return -1;
 
-      // If Devanagari is being recognized, we use different images for page seg
-      // and for OCR.
-      tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
-      return 0;
+        // If Devanagari is being recognized, we use different images for page seg
+        // and for OCR.
+        tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
+        return 0;
     }
 
 /** Delete the pageres and clear the block list ready for a new page. */
     void TessBaseAPI::ClearResults() {
-      if (tesseract_ != nullptr) {
-        tesseract_->Clear();
-      }
-      delete page_res_;
-      page_res_ = nullptr;
-      recognition_done_ = false;
-      if (block_list_ == nullptr)
-        block_list_ = new BLOCK_LIST;
-      else
-        block_list_->clear();
-      if (paragraph_models_ != nullptr) {
-        paragraph_models_->delete_data_pointers();
-        delete paragraph_models_;
-        paragraph_models_ = nullptr;
-      }
-      SavePixForCrash(0, nullptr);
+        if (tesseract_ != nullptr) {
+            tesseract_->Clear();
+        }
+        delete page_res_;
+        page_res_ = nullptr;
+        recognition_done_ = false;
+        if (block_list_ == nullptr)
+            block_list_ = new BLOCK_LIST;
+        else
+            block_list_->clear();
+        if (paragraph_models_ != nullptr) {
+            paragraph_models_->delete_data_pointers();
+            delete paragraph_models_;
+            paragraph_models_ = nullptr;
+        }
+        SavePixForCrash(0, nullptr);
     }
 
 /**
@@ -2491,29 +2489,29 @@ namespace tesseract {
  * Also return the number of recognized blobs in blob_count.
  */
     int TessBaseAPI::TextLength(int* blob_count) {
-      if (tesseract_ == nullptr || page_res_ == nullptr)
-        return 0;
-
-      PAGE_RES_IT   page_res_it(page_res_);
-      int total_length = 2;
-      int total_blobs = 0;
-      // Iterate over the data structures to extract the recognition result.
-      for (page_res_it.restart_page(); page_res_it.word () != nullptr;
-           page_res_it.forward()) {
-        WERD_RES *word = page_res_it.word();
-        WERD_CHOICE* choice = word->best_choice;
-        if (choice != nullptr) {
-          total_blobs += choice->length() + 2;
-          total_length += choice->unichar_string().length() + 2;
-          for (int i = 0; i < word->reject_map.length(); ++i) {
-            if (word->reject_map[i].rejected())
-              ++total_length;
-          }
+        if (tesseract_ == nullptr || page_res_ == nullptr)
+            return 0;
+
+        PAGE_RES_IT   page_res_it(page_res_);
+        int total_length = 2;
+        int total_blobs = 0;
+        // Iterate over the data structures to extract the recognition result.
+        for (page_res_it.restart_page(); page_res_it.word () != nullptr;
+             page_res_it.forward()) {
+            WERD_RES *word = page_res_it.word();
+            WERD_CHOICE* choice = word->best_choice;
+            if (choice != nullptr) {
+                total_blobs += choice->length() + 2;
+                total_length += choice->unichar_string().length() + 2;
+                for (int i = 0; i < word->reject_map.length(); ++i) {
+                    if (word->reject_map[i].rejected())
+                        ++total_length;
+                }
+            }
         }
-      }
-      if (blob_count != nullptr)
-        *blob_count = total_blobs;
-      return total_length;
+        if (blob_count != nullptr)
+            *blob_count = total_blobs;
+        return total_length;
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -2522,22 +2520,22 @@ namespace tesseract {
  * Returns true if the image was processed successfully.
  */
     bool TessBaseAPI::DetectOS(OSResults* osr) {
-      if (tesseract_ == nullptr)
-        return false;
-      ClearResults();
-      if (tesseract_->pix_binary() == nullptr &&
-          !Threshold(tesseract_->mutable_pix_binary())) {
-        return false;
-      }
+        if (tesseract_ == nullptr)
+            return false;
+        ClearResults();
+        if (tesseract_->pix_binary() == nullptr &&
+            !Threshold(tesseract_->mutable_pix_binary())) {
+            return false;
+        }
 
-      if (input_file_ == nullptr)
-        input_file_ = new STRING(kInputFile);
-      return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
+        if (input_file_ == nullptr)
+            input_file_ = new STRING(kInputFile);
+        return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
     void TessBaseAPI::set_min_orientation_margin(double margin) {
-      tesseract_->min_orientation_margin.set_value(margin);
+        tesseract_->min_orientation_margin.set_value(margin);
     }
 
 /**
@@ -2556,95 +2554,95 @@ namespace tesseract {
  */
     void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
                                                bool** vertical_writing) {
-      delete[] *block_orientation;
-      *block_orientation = nullptr;
-      delete[] *vertical_writing;
-      *vertical_writing = nullptr;
-      BLOCK_IT block_it(block_list_);
-
-      block_it.move_to_first();
-      int num_blocks = 0;
-      for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
-        if (!block_it.data()->pdblk.poly_block()->IsText()) {
-          continue;
-        }
-        ++num_blocks;
-      }
-      if (!num_blocks) {
-        tprintf("WARNING: Found no blocks\n");
-        return;
-      }
-      *block_orientation = new int[num_blocks];
-      *vertical_writing = new bool[num_blocks];
-      block_it.move_to_first();
-      int i = 0;
-      for (block_it.mark_cycle_pt(); !block_it.cycled_list();
-           block_it.forward()) {
-        if (!block_it.data()->pdblk.poly_block()->IsText()) {
-          continue;
-        }
-        FCOORD re_rotation = block_it.data()->re_rotation();
-        float re_theta = re_rotation.angle();
-        FCOORD classify_rotation = block_it.data()->classify_rotation();
-        float classify_theta = classify_rotation.angle();
-        double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
-        if (rot_theta < 0) rot_theta += 4;
-        int num_rotations = static_cast<int>(rot_theta + 0.5);
-        (*block_orientation)[i] = num_rotations;
-        // The classify_rotation is non-zero only if the text has vertical
-        // writing direction.
-        (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
-        ++i;
-      }
+        delete[] *block_orientation;
+        *block_orientation = nullptr;
+        delete[] *vertical_writing;
+        *vertical_writing = nullptr;
+        BLOCK_IT block_it(block_list_);
+
+        block_it.move_to_first();
+        int num_blocks = 0;
+        for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+            if (!block_it.data()->pdblk.poly_block()->IsText()) {
+                continue;
+            }
+            ++num_blocks;
+        }
+        if (!num_blocks) {
+            tprintf("WARNING: Found no blocks\n");
+            return;
+        }
+        *block_orientation = new int[num_blocks];
+        *vertical_writing = new bool[num_blocks];
+        block_it.move_to_first();
+        int i = 0;
+        for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+             block_it.forward()) {
+            if (!block_it.data()->pdblk.poly_block()->IsText()) {
+                continue;
+            }
+            FCOORD re_rotation = block_it.data()->re_rotation();
+            float re_theta = re_rotation.angle();
+            FCOORD classify_rotation = block_it.data()->classify_rotation();
+            float classify_theta = classify_rotation.angle();
+            double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
+            if (rot_theta < 0) rot_theta += 4;
+            int num_rotations = static_cast<int>(rot_theta + 0.5);
+            (*block_orientation)[i] = num_rotations;
+            // The classify_rotation is non-zero only if the text has vertical
+            // writing direction.
+            (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
+            ++i;
+        }
     }
 
 
     void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
-      int debug_level = 0;
-      GetIntVariable("paragraph_debug_level", &debug_level);
-      if (paragraph_models_ == nullptr)
-        paragraph_models_ = new GenericVector<ParagraphModel*>;
-      MutableIterator *result_it = GetMutableIterator();
-      do {  // Detect paragraphs for this block
-        GenericVector<ParagraphModel *> models;
-        ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
-                                      result_it, &models);
-        *paragraph_models_ += models;
-      } while (result_it->Next(RIL_BLOCK));
-      delete result_it;
+        int debug_level = 0;
+        GetIntVariable("paragraph_debug_level", &debug_level);
+        if (paragraph_models_ == nullptr)
+            paragraph_models_ = new GenericVector<ParagraphModel*>;
+        MutableIterator *result_it = GetMutableIterator();
+        do {  // Detect paragraphs for this block
+            GenericVector<ParagraphModel *> models;
+            ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
+                                          result_it, &models);
+            *paragraph_models_ += models;
+        } while (result_it->Next(RIL_BLOCK));
+        delete result_it;
     }
 
 /** This method returns the string form of the specified unichar. */
     const char* TessBaseAPI::GetUnichar(int unichar_id) {
-      return tesseract_->unicharset.id_to_unichar(unichar_id);
+        return tesseract_->unicharset.id_to_unichar(unichar_id);
     }
 
 /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
     const Dawg *TessBaseAPI::GetDawg(int i) const {
-      if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
-      return tesseract_->getDict().GetDawg(i);
+        if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
+        return tesseract_->getDict().GetDawg(i);
     }
 
 /** Return the number of dawgs loaded into tesseract_ object. */
     int TessBaseAPI::NumDawgs() const {
-      return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
+        return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
     }
 
 /** Escape a char string - remove <>&"' with HTML codes. */
     STRING HOcrEscape(const char* text) {
-      STRING ret;
-      const char *ptr;
-      for (ptr = text; *ptr; ptr++) {
-        switch (*ptr) {
-          case '<': ret += "&lt;"; break;
-          case '>': ret += "&gt;"; break;
-          case '&': ret += "&amp;"; break;
-          case '"': ret += "&quot;"; break;
-          case '\'': ret += "&#39;"; break;
-          default: ret += *ptr;
+        STRING ret;
+        const char *ptr;
+        for (ptr = text; *ptr; ptr++) {
+            switch (*ptr) {
+                case '<': ret += "&lt;"; break;
+                case '>': ret += "&gt;"; break;
+                case '&': ret += "&amp;"; break;
+                case '"': ret += "&quot;"; break;
+                case '\'': ret += "&#39;"; break;
+                default: ret += *ptr;
+            }
         }
-      }
-      return ret;
+        return ret;
     }
 
 
@@ -2656,10 +2654,10 @@ namespace tesseract {
 
 /** Find lines from the image making the BLOCK_LIST. */
     BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
-      ASSERT_HOST(FindLines() == 0);
-      BLOCK_LIST* result = block_list_;
-      block_list_ = nullptr;
-      return result;
+        ASSERT_HOST(FindLines() == 0);
+        BLOCK_LIST* result = block_list_;
+        block_list_ = nullptr;
+        return result;
     }
 
 /**
@@ -2668,7 +2666,7 @@ namespace tesseract {
  * and let go of including the other headers.
  */
     void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
-      delete block_list;
+        delete block_list;
     }
 
 
@@ -2676,42 +2674,42 @@ namespace tesseract {
                                      float xheight,
                                      float descender,
                                      float ascender) {
-      int32_t xstarts[] = {-32000};
-      double quad_coeffs[] = {0, 0, baseline};
-      return new ROW(1,
-                     xstarts,
-                     quad_coeffs,
-                     xheight,
-                     ascender - (baseline + xheight),
-                     descender - baseline,
-                     0,
-                     0);
+        int32_t xstarts[] = {-32000};
+        double quad_coeffs[] = {0, 0, baseline};
+        return new ROW(1,
+                       xstarts,
+                       quad_coeffs,
+                       xheight,
+                       ascender - (baseline + xheight),
+                       descender - baseline,
+                       0,
+                       0);
     }
 
 /** Creates a TBLOB* from the whole pix. */
     TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
-      int width = pixGetWidth(pix);
-      int height = pixGetHeight(pix);
-      BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
-
-      // Create C_BLOBs from the page
-      extract_edges(pix, &block);
-
-      // Merge all C_BLOBs
-      C_BLOB_LIST *list = block.blob_list();
-      C_BLOB_IT c_blob_it(list);
-      if (c_blob_it.empty())
-        return nullptr;
-      // Move all the outlines to the first blob.
-      C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
-      for (c_blob_it.forward();
-           !c_blob_it.at_first();
-           c_blob_it.forward()) {
-        C_BLOB *c_blob = c_blob_it.data();
-        ol_it.add_list_after(c_blob->out_list());
-      }
-      // Convert the first blob to the output TBLOB.
-      return TBLOB::PolygonalCopy(false, c_blob_it.data());
+        int width = pixGetWidth(pix);
+        int height = pixGetHeight(pix);
+        BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
+
+        // Create C_BLOBs from the page
+        extract_edges(pix, &block);
+
+        // Merge all C_BLOBs
+        C_BLOB_LIST *list = block.blob_list();
+        C_BLOB_IT c_blob_it(list);
+        if (c_blob_it.empty())
+            return nullptr;
+        // Move all the outlines to the first blob.
+        C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
+        for (c_blob_it.forward();
+             !c_blob_it.at_first();
+             c_blob_it.forward()) {
+            C_BLOB *c_blob = c_blob_it.data();
+            ol_it.add_list_after(c_blob->out_list());
+        }
+        // Convert the first blob to the output TBLOB.
+        return TBLOB::PolygonalCopy(false, c_blob_it.data());
     }
 
 /**
@@ -2720,12 +2718,12 @@ namespace tesseract {
  * normalization-antidote is returned.
  */
     void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
-      TBOX box = tblob->bounding_box();
-      float x_center = (box.left() + box.right()) / 2.0f;
-      float baseline = row->base_line(x_center);
-      float scale = kBlnXHeight / row->x_height();
-      tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
-                       0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
+        TBOX box = tblob->bounding_box();
+        float x_center = (box.left() + box.right()) / 2.0f;
+        float baseline = row->base_line(x_center);
+        float scale = kBlnXHeight / row->x_height();
+        tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
+                         0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
     }
 
 /**
@@ -2735,14 +2733,14 @@ namespace tesseract {
     static TBLOB *make_tesseract_blob(float baseline, float xheight,
                                       float descender, float ascender,
                                       bool numeric_mode, Pix* pix) {
-      TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
+        TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
 
-      // Normalize TBLOB
-      ROW *row =
-              TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
-      TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
-      delete row;
-      return tblob;
+        // Normalize TBLOB
+        ROW *row =
+                TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
+        TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
+        delete row;
+        return tblob;
     }
 
 /**
@@ -2756,49 +2754,49 @@ namespace tesseract {
                                        float xheight,
                                        float descender,
                                        float ascender) {
-      UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
-      TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
-                                        tesseract_->classify_bln_numeric_mode,
-                                        tesseract_->pix_binary());
-      float threshold;
-      float best_rating = -100;
-
-
-      // Classify to get a raw choice.
-      BLOB_CHOICE_LIST choices;
-      tesseract_->AdaptiveClassifier(blob, &choices);
-      BLOB_CHOICE_IT choice_it;
-      choice_it.set_to_list(&choices);
-      for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
-           choice_it.forward()) {
-        if (choice_it.data()->rating() > best_rating) {
-          best_rating = choice_it.data()->rating();
+        UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
+        TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
+                                          tesseract_->classify_bln_numeric_mode,
+                                          tesseract_->pix_binary());
+        float threshold;
+        float best_rating = -100;
+
+
+        // Classify to get a raw choice.
+        BLOB_CHOICE_LIST choices;
+        tesseract_->AdaptiveClassifier(blob, &choices);
+        BLOB_CHOICE_IT choice_it;
+        choice_it.set_to_list(&choices);
+        for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
+             choice_it.forward()) {
+            if (choice_it.data()->rating() > best_rating) {
+                best_rating = choice_it.data()->rating();
+            }
         }
-      }
 
-      threshold = tesseract_->matcher_good_threshold;
+        threshold = tesseract_->matcher_good_threshold;
 
-      if (blob->outlines)
-        tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
-                                tesseract_->AdaptedTemplates);
-      delete blob;
+        if (blob->outlines)
+            tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
+                                    tesseract_->AdaptedTemplates);
+        delete blob;
     }
 
 
     PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
-      PAGE_RES *page_res = new PAGE_RES(false, block_list,
-                                        &(tesseract_->prev_word_best_choice_));
-      tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
-      return page_res;
+        PAGE_RES *page_res = new PAGE_RES(false, block_list,
+                                          &(tesseract_->prev_word_best_choice_));
+        tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
+        return page_res;
     }
 
     PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
                                             PAGE_RES* pass1_result) {
-      if (!pass1_result)
-        pass1_result = new PAGE_RES(false, block_list,
-                                    &(tesseract_->prev_word_best_choice_));
-      tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
-      return pass1_result;
+        if (!pass1_result)
+            pass1_result = new PAGE_RES(false, block_list,
+                                        &(tesseract_->prev_word_best_choice_));
+        tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
+        return pass1_result;
     }
 
     struct TESS_CHAR : ELIST_LINK {
@@ -2808,9 +2806,9 @@ namespace tesseract {
         TBOX box;
 
         TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
-          length = (len == -1 ? strlen(repr) : len);
-          unicode_repr = new char[length + 1];
-          strncpy(unicode_repr, repr, length);
+            length = (len == -1 ? strlen(repr) : len);
+            unicode_repr = new char[length + 1];
+            strncpy(unicode_repr, repr, length);
         }
 
         TESS_CHAR()
@@ -2820,7 +2818,7 @@ namespace tesseract {
         {  // Satisfies ELISTIZE.
         }
         ~TESS_CHAR() {
-          delete [] unicode_repr;
+            delete [] unicode_repr;
         }
     };
 
@@ -2828,18 +2826,18 @@ namespace tesseract {
     ELISTIZE(TESS_CHAR)
 
     static void add_space(TESS_CHAR_IT* it) {
-      TESS_CHAR *t = new TESS_CHAR(0, " ");
-      it->add_after_then_move(t);
+        TESS_CHAR *t = new TESS_CHAR(0, " ");
+        it->add_after_then_move(t);
     }
 
 
     static float rating_to_cost(float rating) {
-      rating = 100 + rating;
-      // cuddled that to save from coverage profiler
-      // (I have never seen ratings worse than -100,
-      //  but the check won't hurt)
-      if (rating < 0) rating = 0;
-      return rating;
+        rating = 100 + rating;
+        // cuddled that to save from coverage profiler
+        // (I have never seen ratings worse than -100,
+        //  but the check won't hurt)
+        if (rating < 0) rating = 0;
+        return rating;
     }
 
 /**
@@ -2848,28 +2846,28 @@ namespace tesseract {
  */
     static void extract_result(TESS_CHAR_IT* out,
                                PAGE_RES* page_res) {
-      PAGE_RES_IT page_res_it(page_res);
-      int word_count = 0;
-      while (page_res_it.word() != nullptr) {
-        WERD_RES *word = page_res_it.word();
-        const char *str = word->best_choice->unichar_string().string();
-        const char *len = word->best_choice->unichar_lengths().string();
-        TBOX real_rect = word->word->bounding_box();
-
-        if (word_count)
-          add_space(out);
-        int n = strlen(len);
-        for (int i = 0; i < n; i++) {
-          TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
-                                        str, *len);
-          tc->box = real_rect.intersection(word->box_word->BlobBox(i));
-          out->add_after_then_move(tc);
-          str += *len;
-          len++;
-        }
-        page_res_it.forward();
-        word_count++;
-      }
+        PAGE_RES_IT page_res_it(page_res);
+        int word_count = 0;
+        while (page_res_it.word() != nullptr) {
+            WERD_RES *word = page_res_it.word();
+            const char *str = word->best_choice->unichar_string().string();
+            const char *len = word->best_choice->unichar_lengths().string();
+            TBOX real_rect = word->word->bounding_box();
+
+            if (word_count)
+                add_space(out);
+            int n = strlen(len);
+            for (int i = 0; i < n; i++) {
+                TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
+                                              str, *len);
+                tc->box = real_rect.intersection(word->box_word->BlobBox(i));
+                out->add_after_then_move(tc);
+                str += *len;
+                len++;
+            }
+            page_res_it.forward();
+            word_count++;
+        }
     }
 
 /**
@@ -2884,41 +2882,41 @@ namespace tesseract {
                                             int** x1,
                                             int** y1,
                                             PAGE_RES* page_res) {
-      TESS_CHAR_LIST tess_chars;
-      TESS_CHAR_IT tess_chars_it(&tess_chars);
-      extract_result(&tess_chars_it, page_res);
-      tess_chars_it.move_to_first();
-      int n = tess_chars.length();
-      int text_len = 0;
-      *lengths = new int[n];
-      *costs = new float[n];
-      *x0 = new int[n];
-      *y0 = new int[n];
-      *x1 = new int[n];
-      *y1 = new int[n];
-      int i = 0;
-      for (tess_chars_it.mark_cycle_pt();
-           !tess_chars_it.cycled_list();
-           tess_chars_it.forward(), i++) {
-        TESS_CHAR *tc = tess_chars_it.data();
-        text_len += (*lengths)[i] = tc->length;
-        (*costs)[i] = tc->cost;
-        (*x0)[i] = tc->box.left();
-        (*y0)[i] = tc->box.bottom();
-        (*x1)[i] = tc->box.right();
-        (*y1)[i] = tc->box.top();
-      }
-      char *p = *text = new char[text_len];
-
-      tess_chars_it.move_to_first();
-      for (tess_chars_it.mark_cycle_pt();
-           !tess_chars_it.cycled_list();
-           tess_chars_it.forward()) {
-        TESS_CHAR *tc = tess_chars_it.data();
-        strncpy(p, tc->unicode_repr, tc->length);
-        p += tc->length;
-      }
-      return n;
+        TESS_CHAR_LIST tess_chars;
+        TESS_CHAR_IT tess_chars_it(&tess_chars);
+        extract_result(&tess_chars_it, page_res);
+        tess_chars_it.move_to_first();
+        int n = tess_chars.length();
+        int text_len = 0;
+        *lengths = new int[n];
+        *costs = new float[n];
+        *x0 = new int[n];
+        *y0 = new int[n];
+        *x1 = new int[n];
+        *y1 = new int[n];
+        int i = 0;
+        for (tess_chars_it.mark_cycle_pt();
+             !tess_chars_it.cycled_list();
+             tess_chars_it.forward(), i++) {
+            TESS_CHAR *tc = tess_chars_it.data();
+            text_len += (*lengths)[i] = tc->length;
+            (*costs)[i] = tc->cost;
+            (*x0)[i] = tc->box.left();
+            (*y0)[i] = tc->box.bottom();
+            (*x1)[i] = tc->box.right();
+            (*y1)[i] = tc->box.top();
+        }
+        char *p = *text = new char[text_len];
+
+        tess_chars_it.move_to_first();
+        for (tess_chars_it.mark_cycle_pt();
+             !tess_chars_it.cycled_list();
+             tess_chars_it.forward()) {
+            TESS_CHAR *tc = tess_chars_it.data();
+            strncpy(p, tc->unicode_repr, tc->length);
+            p += tc->length;
+        }
+        return n;
     }
 
 /** This method returns the features associated with the input blob. */
@@ -2932,52 +2930,52 @@ namespace tesseract {
                                          INT_FEATURE_STRUCT* int_features,
                                          int* num_features,
                                          int* feature_outline_index) {
-      GenericVector<int> outline_counts;
-      GenericVector<INT_FEATURE_STRUCT> bl_features;
-      GenericVector<INT_FEATURE_STRUCT> cn_features;
-      INT_FX_RESULT_STRUCT fx_info;
-      tesseract_->ExtractFeatures(*blob, false, &bl_features,
-                                  &cn_features, &fx_info, &outline_counts);
-      if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
-        *num_features = 0;
-        return;  // Feature extraction failed.
-      }
-      *num_features = cn_features.size();
-      memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
-      // TODO(rays) Pass outline_counts back and simplify the calling code.
-      if (feature_outline_index != nullptr) {
-        int f = 0;
-        for (int i = 0; i < outline_counts.size(); ++i) {
-          while (f < outline_counts[i])
-            feature_outline_index[f++] = i;
+        GenericVector<int> outline_counts;
+        GenericVector<INT_FEATURE_STRUCT> bl_features;
+        GenericVector<INT_FEATURE_STRUCT> cn_features;
+        INT_FX_RESULT_STRUCT fx_info;
+        tesseract_->ExtractFeatures(*blob, false, &bl_features,
+                                    &cn_features, &fx_info, &outline_counts);
+        if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
+            *num_features = 0;
+            return;  // Feature extraction failed.
+        }
+        *num_features = cn_features.size();
+        memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
+        // TODO(rays) Pass outline_counts back and simplify the calling code.
+        if (feature_outline_index != nullptr) {
+            int f = 0;
+            for (int i = 0; i < outline_counts.size(); ++i) {
+                while (f < outline_counts[i])
+                    feature_outline_index[f++] = i;
+            }
         }
-      }
     }
 
 // This method returns the row to which a box of specified dimensions would
 // belong. If no good match is found, it returns nullptr.
     ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
                                     int left, int top, int right, int bottom) {
-      TBOX box(left, bottom, right, top);
-      BLOCK_IT b_it(blocks);
-      for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
-        BLOCK* block = b_it.data();
-        if (!box.major_overlap(block->pdblk.bounding_box()))
-          continue;
-        ROW_IT r_it(block->row_list());
-        for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
-          ROW* row = r_it.data();
-          if (!box.major_overlap(row->bounding_box()))
-            continue;
-          WERD_IT w_it(row->word_list());
-          for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
-            WERD* word = w_it.data();
-            if (box.major_overlap(word->bounding_box()))
-              return row;
-          }
+        TBOX box(left, bottom, right, top);
+        BLOCK_IT b_it(blocks);
+        for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+            BLOCK* block = b_it.data();
+            if (!box.major_overlap(block->pdblk.bounding_box()))
+                continue;
+            ROW_IT r_it(block->row_list());
+            for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+                ROW* row = r_it.data();
+                if (!box.major_overlap(row->bounding_box()))
+                    continue;
+                WERD_IT w_it(row->word_list());
+                for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+                    WERD* word = w_it.data();
+                    if (box.major_overlap(word->bounding_box()))
+                        return row;
+                }
+            }
         }
-      }
-      return nullptr;
+        return nullptr;
     }
 
 /** Method to run adaptive classifier on a blob. */
@@ -2986,21 +2984,21 @@ namespace tesseract {
                                             int* unichar_ids,
                                             float* ratings,
                                             int* num_matches_returned) {
-      BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
-      tesseract_->AdaptiveClassifier(blob, choices);
-      BLOB_CHOICE_IT choices_it(choices);
-      int& index = *num_matches_returned;
-      index = 0;
-      for (choices_it.mark_cycle_pt();
-           !choices_it.cycled_list() && index < num_max_matches;
-           choices_it.forward()) {
-        BLOB_CHOICE* choice = choices_it.data();
-        unichar_ids[index] = choice->unichar_id();
-        ratings[index] = choice->rating();
-        ++index;
-      }
-      *num_matches_returned = index;
-      delete choices;
+        BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
+        tesseract_->AdaptiveClassifier(blob, choices);
+        BLOB_CHOICE_IT choices_it(choices);
+        int& index = *num_matches_returned;
+        index = 0;
+        for (choices_it.mark_cycle_pt();
+             !choices_it.cycled_list() && index < num_max_matches;
+             choices_it.forward()) {
+            BLOB_CHOICE* choice = choices_it.data();
+            unichar_ids[index] = choice->unichar_id();
+            ratings[index] = choice->rating();
+            ++index;
+        }
+        *num_matches_returned = index;
+        delete choices;
     }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
diff --git a/src/api/baseapi.h b/src/api/baseapi.h
index 6da486011a..f82dfa2d56 100644
--- a/src/api/baseapi.h
+++ b/src/api/baseapi.h
@@ -229,10 +229,10 @@ namespace tesseract {
                  const GenericVector<STRING> *vars_values,
                  bool set_only_non_debug_params);
         int Init(const char* datapath, const char* language, OcrEngineMode oem) {
-          return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
+            return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
         }
         int Init(const char* datapath, const char* language) {
-          return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
+            return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
         }
         // In-memory version reads the traineddata file directly from the given
         // data[data_size] array, and/or reads data via a FileReader.
@@ -374,9 +374,9 @@ namespace tesseract {
          * delete it when it it is replaced or the API is destructed.
          */
         void SetThresholder(ImageThresholder* thresholder) {
-          delete thresholder_;
-          thresholder_ = thresholder;
-          ClearResults();
+            delete thresholder_;
+            thresholder_ = thresholder;
+            ClearResults();
         }
 
         /**
@@ -410,7 +410,7 @@ namespace tesseract {
            Helper method to extract from the thresholded image. (most common usage)
         */
         Boxa* GetTextlines(Pixa** pixa, int** blockids) {
-          return GetTextlines(false, 0, pixa, blockids, nullptr);
+            return GetTextlines(false, 0, pixa, blockids, nullptr);
         }
 
         /**
@@ -461,7 +461,7 @@ namespace tesseract {
         Boxa* GetComponentImages(const PageIteratorLevel level,
                                  const bool text_only,
                                  Pixa** pixa, int** blockids) {
-          return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
+            return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
         }
 
         /**
@@ -586,12 +586,6 @@ namespace tesseract {
          */
         char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
 
-        /**
-       * Make an XML-formatted string with Alto markup from the internal
-       * data structures.
-       */
-        char* GetAltoText(ETEXT_DESC* monitor, int page_number);
-
         /**
          * Make a HTML-formatted string with hOCR markup from the internal
          * data structures.
@@ -600,6 +594,13 @@ namespace tesseract {
          */
         char* GetHOCRText(int page_number);
 
+        /**
+        * Make an XML-formatted string with Alto markup from the internal
+        * data structures.
+        */
+        char* GetAltoText(ETEXT_DESC* monitor, int page_number);
+
+
         /**
        * Make an XML-formatted string with Alto markup from the internal
        * data structures.
diff --git a/src/api/capi.cpp b/src/api/capi.cpp
index c9216b8ede..1bbf621c25 100644
--- a/src/api/capi.cpp
+++ b/src/api/capi.cpp
@@ -71,11 +71,6 @@ TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* output
     return new TessHOcrRenderer(outputbase);
 }
 
-TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info)
-{
-    return new TessHOcrRenderer(outputbase, font_info);
-}
-
 TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
                                                              BOOL textonly)
 {
diff --git a/src/api/capi.h b/src/api/capi.h
index 85908c78b0..8f999e8536 100644
--- a/src/api/capi.h
+++ b/src/api/capi.h
@@ -126,9 +126,8 @@ TESS_API void  TESS_CALL TessDeleteIntArray(int* arr);
 /* Renderer API */
 TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase);
 TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
-TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase);
 TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
-TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate2(const char* outputbase, BOOL font_info);
+TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase);
 TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
                                                              BOOL textonly);
 TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
@@ -279,6 +278,7 @@ TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
 
 TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
 TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
+
 TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number);
 
 TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number);
diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp
index e2cf91003e..c4c24e032f 100644
--- a/src/api/renderer.cpp
+++ b/src/api/renderer.cpp
@@ -37,82 +37,82 @@ namespace tesseract {
               fout_(stdout),
               next_(nullptr),
               happy_(true) {
-      if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
-        STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
-        fout_ = fopen(outfile.string(), "wb");
-        if (fout_ == nullptr) {
-          happy_ = false;
+        if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
+            STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
+            fout_ = fopen(outfile.string(), "wb");
+            if (fout_ == nullptr) {
+                happy_ = false;
+            }
         }
-      }
     }
 
     TessResultRenderer::~TessResultRenderer() {
-      if (fout_ != nullptr) {
-        if (fout_ != stdout)
-          fclose(fout_);
-        else
-          clearerr(fout_);
-      }
-      delete next_;
+        if (fout_ != nullptr) {
+            if (fout_ != stdout)
+                fclose(fout_);
+            else
+                clearerr(fout_);
+        }
+        delete next_;
     }
 
     void TessResultRenderer::insert(TessResultRenderer* next) {
-      if (next == nullptr) return;
-
-      TessResultRenderer* remainder = next_;
-      next_ = next;
-      if (remainder) {
-        while (next->next_ != nullptr) {
-          next = next->next_;
+        if (next == nullptr) return;
+
+        TessResultRenderer* remainder = next_;
+        next_ = next;
+        if (remainder) {
+            while (next->next_ != nullptr) {
+                next = next->next_;
+            }
+            next->next_ = remainder;
         }
-        next->next_ = remainder;
-      }
     }
 
     bool TessResultRenderer::BeginDocument(const char* title) {
-      if (!happy_) return false;
-      title_ = title;
-      imagenum_ = -1;
-      bool ok = BeginDocumentHandler();
-      if (next_) {
-        ok = next_->BeginDocument(title) && ok;
-      }
-      return ok;
+        if (!happy_) return false;
+        title_ = title;
+        imagenum_ = -1;
+        bool ok = BeginDocumentHandler();
+        if (next_) {
+            ok = next_->BeginDocument(title) && ok;
+        }
+        return ok;
     }
 
     bool TessResultRenderer::AddImage(TessBaseAPI* api) {
-      if (!happy_) return false;
-      ++imagenum_;
-      bool ok = AddImageHandler(api);
-      if (next_) {
-        ok = next_->AddImage(api) && ok;
-      }
-      return ok;
+        if (!happy_) return false;
+        ++imagenum_;
+        bool ok = AddImageHandler(api);
+        if (next_) {
+            ok = next_->AddImage(api) && ok;
+        }
+        return ok;
     }
 
     bool TessResultRenderer::EndDocument() {
-      if (!happy_) return false;
-      bool ok = EndDocumentHandler();
-      if (next_) {
-        ok = next_->EndDocument() && ok;
-      }
-      return ok;
+        if (!happy_) return false;
+        bool ok = EndDocumentHandler();
+        if (next_) {
+            ok = next_->EndDocument() && ok;
+        }
+        return ok;
     }
 
     void TessResultRenderer::AppendString(const char* s) {
-      AppendData(s, strlen(s));
+        AppendData(s, strlen(s));
     }
 
     void TessResultRenderer::AppendData(const char* s, int len) {
-      if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
+        if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
     }
 
     bool TessResultRenderer::BeginDocumentHandler() {
-      return happy_;
+        return happy_;
     }
 
     bool TessResultRenderer::EndDocumentHandler() {
-      return happy_;
+        return happy_;
     }
 
 
@@ -124,19 +124,19 @@ namespace tesseract {
     }
 
     bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
-      const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
-      if (utf8 == nullptr) {
-        return false;
-      }
+        const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
+        if (utf8 == nullptr) {
+            return false;
+        }
 
-      AppendString(utf8.get());
+        AppendString(utf8.get());
 
-      const char* pageSeparator = api->GetStringVariable("page_separator");
-      if (pageSeparator != nullptr && *pageSeparator != '\0') {
-        AppendString(pageSeparator);
-      }
+        const char* pageSeparator = api->GetStringVariable("page_separator");
+        if (pageSeparator != nullptr && *pageSeparator != '\0') {
+            AppendString(pageSeparator);
+        }
 
-      return true;
+        return true;
     }
 
 /**********************************************************************
@@ -144,53 +144,53 @@ namespace tesseract {
  **********************************************************************/
     TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
             : TessResultRenderer(outputbase, "hocr") {
-      font_info_ = false;
+        font_info_ = false;
     }
 
     TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
             : TessResultRenderer(outputbase, "hocr") {
-      font_info_ = font_info;
+        font_info_ = font_info;
     }
 
     bool TessHOcrRenderer::BeginDocumentHandler() {
-      AppendString(
-              "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-              "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
-              "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
-              "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
-              "lang=\"en\">\n <head>\n  <title>");
-      AppendString(title());
-      AppendString(
-              "</title>\n"
-              "<meta http-equiv=\"Content-Type\" content=\"text/html;"
-              "charset=utf-8\" />\n"
-              "  <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
-              "' />\n"
-              "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
-              " ocr_line ocrx_word ocrp_wconf");
-      if (font_info_)
         AppendString(
-                " ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
-      AppendString(
-              "'/>\n"
-              "</head>\n<body>\n");
+                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+                "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
+                "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+                "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
+                "lang=\"en\">\n <head>\n  <title>");
+        AppendString(title());
+        AppendString(
+                "</title>\n"
+                "<meta http-equiv=\"Content-Type\" content=\"text/html;"
+                "charset=utf-8\" />\n"
+                "  <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
+                "' />\n"
+                "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
+                " ocr_line ocrx_word ocrp_wconf");
+        if (font_info_)
+            AppendString(
+                    " ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
+        AppendString(
+                "'/>\n"
+                "</head>\n<body>\n");
 
-      return true;
+        return true;
     }
 
     bool TessHOcrRenderer::EndDocumentHandler() {
-      AppendString(" </body>\n</html>\n");
+        AppendString(" </body>\n</html>\n");
 
-      return true;
+        return true;
     }
 
     bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
-      const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
-      if (hocr == nullptr) return false;
+        const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
+        if (hocr == nullptr) return false;
 
-      AppendString(hocr.get());
+        AppendString(hocr.get());
 
-      return true;
+        return true;
     }
 
 /**********************************************************************
@@ -198,31 +198,31 @@ namespace tesseract {
  **********************************************************************/
     TessTsvRenderer::TessTsvRenderer(const char* outputbase)
             : TessResultRenderer(outputbase, "tsv") {
-      font_info_ = false;
+        font_info_ = false;
     }
 
     TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
             : TessResultRenderer(outputbase, "tsv") {
-      font_info_ = font_info;
+        font_info_ = font_info;
     }
 
     bool TessTsvRenderer::BeginDocumentHandler() {
-      // Output TSV column headings
-      AppendString(
-              "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
-              "num\tleft\ttop\twidth\theight\tconf\ttext\n");
-      return true;
+        // Output TSV column headings
+        AppendString(
+                "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
+                "num\tleft\ttop\twidth\theight\tconf\ttext\n");
+        return true;
     }
 
     bool TessTsvRenderer::EndDocumentHandler() { return true; }
 
     bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
-      const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
-      if (tsv == nullptr) return false;
+        const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
+        if (tsv == nullptr) return false;
 
-      AppendString(tsv.get());
+        AppendString(tsv.get());
 
-      return true;
+        return true;
     }
 
 /**********************************************************************
@@ -233,12 +233,12 @@ namespace tesseract {
     }
 
     bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
-      const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
-      if (unlv == nullptr) return false;
+        const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
+        if (unlv == nullptr) return false;
 
-      AppendString(unlv.get());
+        AppendString(unlv.get());
 
-      return true;
+        return true;
     }
 
 /**********************************************************************
@@ -249,12 +249,12 @@ namespace tesseract {
     }
 
     bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
-      const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
-      if (text == nullptr) return false;
+        const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
+        if (text == nullptr) return false;
 
-      AppendString(text.get());
+        AppendString(text.get());
 
-      return true;
+        return true;
     }
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -266,13 +266,13 @@ namespace tesseract {
             : TessResultRenderer(outputbase, "osd") {}
 
     bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
-      char* osd = api->GetOsdText(imagenum());
-      if (osd == nullptr) return false;
+        char* osd = api->GetOsdText(imagenum());
+        if (osd == nullptr) return false;
 
-      AppendString(osd);
-      delete[] osd;
+        AppendString(osd);
+        delete[] osd;
 
-      return true;
+        return true;
     }
 
 #endif // ndef DISABLED_LEGACY_ENGINE
diff --git a/src/api/renderer.h b/src/api/renderer.h
index f2313c31fd..cb91f3e005 100644
--- a/src/api/renderer.h
+++ b/src/api/renderer.h
@@ -180,7 +180,6 @@ namespace tesseract {
 
     };
 
-
 /**
  * Renders Tesseract output into a TSV string
  */

From 2a9137941abfabf78b53253bebcf5b3e5e61c5bf Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Sat, 24 Nov 2018 09:27:43 -0500
Subject: [PATCH 4/9] Remove changes unrelated to ALTO

---
 src/api/baseapi.cpp       | 4408 ++++++++++++++++++-------------------
 src/api/baseapi.h         | 1706 +++++++-------
 src/api/capi.cpp          |   34 +-
 src/api/capi.h            |   92 +-
 src/api/renderer.cpp      |  392 ++--
 src/api/renderer.h        |  348 +--
 src/api/tesseractmain.cpp |  872 ++++----
 7 files changed, 3926 insertions(+), 3926 deletions(-)

diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 5a7945154d..4caf4428f8 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -94,34 +94,34 @@ BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin");
 namespace tesseract {
 
 /** Minimum sensible image size to be worth running tesseract. */
-    const int kMinRectSize = 10;
+const int kMinRectSize = 10;
 /** Character returned when Tesseract couldn't recognize as anything. */
-    const char kTesseractReject = '~';
+const char kTesseractReject = '~';
 /** Character used by UNLV error counter as a reject. */
-    const char kUNLVReject = '~';
+const char kUNLVReject = '~';
 /** Character used by UNLV as a suspect marker. */
-    const char kUNLVSuspect = '^';
+const char kUNLVSuspect = '^';
 /**
  * Filename used for input image file, from which to derive a name to search
  * for a possible UNLV zone file, if none is specified by SetInputName.
  */
-    const char* kInputFile = "noname.tif";
+const char* kInputFile = "noname.tif";
 /**
  * Temp file used for storing current parameters before applying retry values.
  */
-    const char* kOldVarsFile = "failed_vars.txt";
+const char* kOldVarsFile = "failed_vars.txt";
 /** Max string length of an int.  */
-    const int kMaxIntSize = 22;
+const int kMaxIntSize = 22;
 
 /* Add all available languages recursively.
 */
-    static void addAvailableLanguages(const STRING &datadir, const STRING &base,
-                                      GenericVector<STRING>* langs)
-    {
-        const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
-        const size_t extlen = sizeof(kTrainedDataSuffix);
+static void addAvailableLanguages(const STRING &datadir, const STRING &base,
+                                  GenericVector<STRING>* langs)
+{
+  const STRING base2 = (base.string()[0] == '\0') ? base : base + "/";
+  const size_t extlen = sizeof(kTrainedDataSuffix);
 #ifdef _WIN32
-        WIN32_FIND_DATA data;
+    WIN32_FIND_DATA data;
     HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data);
     if (handle != INVALID_HANDLE_VALUE) {
       BOOL result = TRUE;
@@ -146,83 +146,83 @@ namespace tesseract {
       FindClose(handle);
     }
 #else  // _WIN32
-        DIR* dir = opendir((datadir + base).string());
-        if (dir != nullptr) {
-            dirent *de;
-            while ((de = readdir(dir))) {
-                char *name = de->d_name;
-                // Skip '.', '..', and hidden files
-                if (name[0] != '.') {
-                    struct stat st;
-                    if (stat((datadir + base2 + name).string(), &st) == 0 &&
-                        (st.st_mode & S_IFDIR) == S_IFDIR) {
-                        addAvailableLanguages(datadir, base2 + name, langs);
-                    } else {
-                        size_t len = strlen(name);
-                        if (len > extlen && name[len - extlen] == '.' &&
-                            strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
-                            name[len - extlen] = '\0';
-                            langs->push_back(base2 + name);
-                        }
-                    }
-                }
-            }
-            closedir(dir);
+  DIR* dir = opendir((datadir + base).string());
+  if (dir != nullptr) {
+    dirent *de;
+    while ((de = readdir(dir))) {
+      char *name = de->d_name;
+      // Skip '.', '..', and hidden files
+      if (name[0] != '.') {
+        struct stat st;
+        if (stat((datadir + base2 + name).string(), &st) == 0 &&
+            (st.st_mode & S_IFDIR) == S_IFDIR) {
+          addAvailableLanguages(datadir, base2 + name, langs);
+        } else {
+          size_t len = strlen(name);
+          if (len > extlen && name[len - extlen] == '.' &&
+              strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
+            name[len - extlen] = '\0';
+            langs->push_back(base2 + name);
+          }
         }
-#endif
+      }
     }
+    closedir(dir);
+  }
+#endif
+}
 
 // Compare two STRING values (used for sorting).
-    static int CompareSTRING(const void* p1, const void* p2) {
-        const STRING* s1 = static_cast<const STRING*>(p1);
-        const STRING* s2 = static_cast<const STRING*>(p2);
-        return strcmp(s1->c_str(), s2->c_str());
-    }
-
-    TessBaseAPI::TessBaseAPI()
-            : tesseract_(nullptr),
-              osd_tesseract_(nullptr),
-              equ_detect_(nullptr),
-              reader_(nullptr),
-            // Thresholder is initialized to nullptr here, but will be set before use by:
-            // A constructor of a derived API,  SetThresholder(), or
-            // created implicitly when used in InternalSetImage.
-              thresholder_(nullptr),
-              paragraph_models_(nullptr),
-              block_list_(nullptr),
-              page_res_(nullptr),
-              input_file_(nullptr),
-              output_file_(nullptr),
-              datapath_(nullptr),
-              language_(nullptr),
-              last_oem_requested_(OEM_DEFAULT),
-              recognition_done_(false),
-              truth_cb_(nullptr),
-              rect_left_(0),
-              rect_top_(0),
-              rect_width_(0),
-              rect_height_(0),
-              image_width_(0),
-              image_height_(0) {
-        const char *locale;
-        locale = std::setlocale(LC_ALL, nullptr);
-        ASSERT_HOST(!strcmp(locale, "C"));
-        locale = std::setlocale(LC_CTYPE, nullptr);
-        ASSERT_HOST(!strcmp(locale, "C"));
-        locale = std::setlocale(LC_NUMERIC, nullptr);
-        ASSERT_HOST(!strcmp(locale, "C"));
-    }
-
-    TessBaseAPI::~TessBaseAPI() {
-        End();
-    }
+static int CompareSTRING(const void* p1, const void* p2) {
+  const STRING* s1 = static_cast<const STRING*>(p1);
+  const STRING* s2 = static_cast<const STRING*>(p2);
+  return strcmp(s1->c_str(), s2->c_str());
+}
+
+TessBaseAPI::TessBaseAPI()
+    : tesseract_(nullptr),
+      osd_tesseract_(nullptr),
+      equ_detect_(nullptr),
+      reader_(nullptr),
+      // Thresholder is initialized to nullptr here, but will be set before use by:
+      // A constructor of a derived API,  SetThresholder(), or
+      // created implicitly when used in InternalSetImage.
+      thresholder_(nullptr),
+      paragraph_models_(nullptr),
+      block_list_(nullptr),
+      page_res_(nullptr),
+      input_file_(nullptr),
+      output_file_(nullptr),
+      datapath_(nullptr),
+      language_(nullptr),
+      last_oem_requested_(OEM_DEFAULT),
+      recognition_done_(false),
+      truth_cb_(nullptr),
+      rect_left_(0),
+      rect_top_(0),
+      rect_width_(0),
+      rect_height_(0),
+      image_width_(0),
+      image_height_(0) {
+  const char *locale;
+  locale = std::setlocale(LC_ALL, nullptr);
+  ASSERT_HOST(!strcmp(locale, "C"));
+  locale = std::setlocale(LC_CTYPE, nullptr);
+  ASSERT_HOST(!strcmp(locale, "C"));
+  locale = std::setlocale(LC_NUMERIC, nullptr);
+  ASSERT_HOST(!strcmp(locale, "C"));
+}
+
+TessBaseAPI::~TessBaseAPI() {
+  End();
+}
 
 /**
  * Returns the version identifier as a static string. Do not delete.
  */
-    const char* TessBaseAPI::Version() {
-        return PACKAGE_VERSION;
-    }
+const char* TessBaseAPI::Version() {
+  return PACKAGE_VERSION;
+}
 
 /**
  * If compiled with OpenCL AND an available OpenCL
@@ -232,13 +232,13 @@ namespace tesseract {
  * otherwise *device=nullptr and returns 0.
  */
 #ifdef USE_OPENCL
-    #ifdef USE_DEVICE_SELECTION
+#ifdef USE_DEVICE_SELECTION
 #include "opencl_device_selection.h"
 #endif
 #endif
-    size_t TessBaseAPI::getOpenCLDevice(void **data) {
+size_t TessBaseAPI::getOpenCLDevice(void **data) {
 #ifdef USE_OPENCL
-        #ifdef USE_DEVICE_SELECTION
+#ifdef USE_DEVICE_SELECTION
   ds_device device = OpenclDevice::getDeviceSelection();
   if (device.type == DS_DEVICE_OPENCL_DEVICE) {
     *data = new cl_device_id;
@@ -248,17 +248,17 @@ namespace tesseract {
 #endif
 #endif
 
-        *data = nullptr;
-        return 0;
-    }
+  *data = nullptr;
+  return 0;
+}
 
 /**
  * Writes the thresholded image to stderr as a PBM file on receipt of a
  * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
  */
-    void TessBaseAPI::CatchSignals() {
+void TessBaseAPI::CatchSignals() {
 #ifdef __linux__
-        struct sigaction action;
+  struct sigaction action;
   memset(&action, 0, sizeof(action));
   action.sa_handler = &signal_exit;
   action.sa_flags = SA_RESETHAND;
@@ -266,81 +266,81 @@ namespace tesseract {
   sigaction(SIGFPE, &action, nullptr);
   sigaction(SIGBUS, &action, nullptr);
 #else
-        // Warn API users that an implementation is needed.
-        tprintf("CatchSignals has no non-linux implementation!\n");
+  // Warn API users that an implementation is needed.
+  tprintf("CatchSignals has no non-linux implementation!\n");
 #endif
-    }
+}
 
 /**
  * Set the name of the input file. Needed only for training and
  * loading a UNLV zone file.
  */
-    void TessBaseAPI::SetInputName(const char* name) {
-        if (input_file_ == nullptr)
-            input_file_ = new STRING(name);
-        else
-            *input_file_ = name;
-    }
+void TessBaseAPI::SetInputName(const char* name) {
+  if (input_file_ == nullptr)
+    input_file_ = new STRING(name);
+  else
+    *input_file_ = name;
+}
 
 /** Set the name of the output files. Needed only for debugging. */
-    void TessBaseAPI::SetOutputName(const char* name) {
-        if (output_file_ == nullptr)
-            output_file_ = new STRING(name);
-        else
-            *output_file_ = name;
-    }
-
-    bool TessBaseAPI::SetVariable(const char* name, const char* value) {
-        if (tesseract_ == nullptr) tesseract_ = new Tesseract;
-        return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
-                                    tesseract_->params());
-    }
-
-    bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
-        if (tesseract_ == nullptr) tesseract_ = new Tesseract;
-        return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
-                                    tesseract_->params());
-    }
-
-    bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
-        IntParam *p = ParamUtils::FindParam<IntParam>(
-                name, GlobalParams()->int_params, tesseract_->params()->int_params);
-        if (p == nullptr) return false;
-        *value = (int32_t)(*p);
-        return true;
-    }
-
-    bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
-        BoolParam *p = ParamUtils::FindParam<BoolParam>(
-                name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
-        if (p == nullptr) return false;
-        *value = (BOOL8)(*p);
-        return true;
-    }
-
-    const char *TessBaseAPI::GetStringVariable(const char *name) const {
-        StringParam *p = ParamUtils::FindParam<StringParam>(
-                name, GlobalParams()->string_params, tesseract_->params()->string_params);
-        return (p != nullptr) ? p->string() : nullptr;
-    }
-
-    bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
-        DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
-                name, GlobalParams()->double_params, tesseract_->params()->double_params);
-        if (p == nullptr) return false;
-        *value = (double)(*p);
-        return true;
-    }
+void TessBaseAPI::SetOutputName(const char* name) {
+  if (output_file_ == nullptr)
+    output_file_ = new STRING(name);
+  else
+    *output_file_ = name;
+}
+
+bool TessBaseAPI::SetVariable(const char* name, const char* value) {
+  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
+  return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
+                              tesseract_->params());
+}
+
+bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
+  if (tesseract_ == nullptr) tesseract_ = new Tesseract;
+  return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
+                              tesseract_->params());
+}
+
+bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
+  IntParam *p = ParamUtils::FindParam<IntParam>(
+      name, GlobalParams()->int_params, tesseract_->params()->int_params);
+  if (p == nullptr) return false;
+  *value = (int32_t)(*p);
+  return true;
+}
+
+bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
+  BoolParam *p = ParamUtils::FindParam<BoolParam>(
+      name, GlobalParams()->bool_params, tesseract_->params()->bool_params);
+  if (p == nullptr) return false;
+  *value = (BOOL8)(*p);
+  return true;
+}
+
+const char *TessBaseAPI::GetStringVariable(const char *name) const {
+  StringParam *p = ParamUtils::FindParam<StringParam>(
+      name, GlobalParams()->string_params, tesseract_->params()->string_params);
+  return (p != nullptr) ? p->string() : nullptr;
+}
+
+bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
+  DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
+      name, GlobalParams()->double_params, tesseract_->params()->double_params);
+  if (p == nullptr) return false;
+  *value = (double)(*p);
+  return true;
+}
 
 /** Get value of named variable as a string, if it exists. */
-    bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
-        return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
-    }
+bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
+  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
+}
 
 /** Print Tesseract parameters to the given file. */
-    void TessBaseAPI::PrintVariables(FILE *fp) const {
-        ParamUtils::PrintParams(fp, tesseract_->params());
-    }
+void TessBaseAPI::PrintVariables(FILE *fp) const {
+  ParamUtils::PrintParams(fp, tesseract_->params());
+}
 
 /**
  * The datapath must be the name of the data directory (no ending /) or
@@ -350,90 +350,90 @@ namespace tesseract {
  * be returned.
  * @return: 0 on success and -1 on initialization failure.
  */
-    int TessBaseAPI::Init(const char* datapath, const char* language,
-                          OcrEngineMode oem, char **configs, int configs_size,
-                          const GenericVector<STRING> *vars_vec,
-                          const GenericVector<STRING> *vars_values,
-                          bool set_only_non_debug_params) {
-        return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
-                    vars_values, set_only_non_debug_params, nullptr);
-    }
+int TessBaseAPI::Init(const char* datapath, const char* language,
+                      OcrEngineMode oem, char **configs, int configs_size,
+                      const GenericVector<STRING> *vars_vec,
+                      const GenericVector<STRING> *vars_values,
+                      bool set_only_non_debug_params) {
+  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
+              vars_values, set_only_non_debug_params, nullptr);
+}
 
 // In-memory version reads the traineddata file directly from the given
 // data[data_size] array. Also implements the version with a datapath in data,
 // flagged by data_size = 0.
-    int TessBaseAPI::Init(const char* data, int data_size, const char* language,
-                          OcrEngineMode oem, char** configs, int configs_size,
-                          const GenericVector<STRING>* vars_vec,
-                          const GenericVector<STRING>* vars_values,
-                          bool set_only_non_debug_params, FileReader reader) {
-        PERF_COUNT_START("TessBaseAPI::Init")
-        // Default language is "eng".
-        if (language == nullptr) language = "eng";
-        STRING datapath = data_size == 0 ? data : language;
-        // If the datapath, OcrEngineMode or the language have changed - start again.
-        // Note that the language_ field stores the last requested language that was
-        // initialized successfully, while tesseract_->lang stores the language
-        // actually used. They differ only if the requested language was nullptr, in
-        // which case tesseract_->lang is set to the Tesseract default ("eng").
-        if (tesseract_ != nullptr &&
-            (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
-             last_oem_requested_ != oem ||
-             (*language_ != language && tesseract_->lang != language))) {
-            delete tesseract_;
-            tesseract_ = nullptr;
-        }
-            // PERF_COUNT_SUB("delete tesseract_")
+int TessBaseAPI::Init(const char* data, int data_size, const char* language,
+                      OcrEngineMode oem, char** configs, int configs_size,
+                      const GenericVector<STRING>* vars_vec,
+                      const GenericVector<STRING>* vars_values,
+                      bool set_only_non_debug_params, FileReader reader) {
+  PERF_COUNT_START("TessBaseAPI::Init")
+  // Default language is "eng".
+  if (language == nullptr) language = "eng";
+  STRING datapath = data_size == 0 ? data : language;
+  // If the datapath, OcrEngineMode or the language have changed - start again.
+  // Note that the language_ field stores the last requested language that was
+  // initialized successfully, while tesseract_->lang stores the language
+  // actually used. They differ only if the requested language was nullptr, in
+  // which case tesseract_->lang is set to the Tesseract default ("eng").
+  if (tesseract_ != nullptr &&
+      (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
+       last_oem_requested_ != oem ||
+       (*language_ != language && tesseract_->lang != language))) {
+    delete tesseract_;
+    tesseract_ = nullptr;
+  }
+  // PERF_COUNT_SUB("delete tesseract_")
 #ifdef USE_OPENCL
-            OpenclDevice od;
+  OpenclDevice od;
   od.InitEnv();
 #endif
-        PERF_COUNT_SUB("OD::InitEnv()")
-        bool reset_classifier = true;
-        if (tesseract_ == nullptr) {
-            reset_classifier = false;
-            tesseract_ = new Tesseract;
-            if (reader != nullptr) reader_ = reader;
-            TessdataManager mgr(reader_);
-            if (data_size != 0) {
-                mgr.LoadMemBuffer(language, data, data_size);
-            }
-            if (tesseract_->init_tesseract(
-                    datapath.string(),
-                    output_file_ != nullptr ? output_file_->string() : nullptr,
-                    language, oem, configs, configs_size, vars_vec, vars_values,
-                    set_only_non_debug_params, &mgr) != 0) {
-                return -1;
-            }
-        }
-
-        PERF_COUNT_SUB("update tesseract_")
-        // Update datapath and language requested for the last valid initialization.
-        if (datapath_ == nullptr)
-            datapath_ = new STRING(datapath);
-        else
-            *datapath_ = datapath;
-        if ((strcmp(datapath_->string(), "") == 0) &&
-            (strcmp(tesseract_->datadir.string(), "") != 0))
-            *datapath_ = tesseract_->datadir;
+  PERF_COUNT_SUB("OD::InitEnv()")
+  bool reset_classifier = true;
+  if (tesseract_ == nullptr) {
+    reset_classifier = false;
+    tesseract_ = new Tesseract;
+    if (reader != nullptr) reader_ = reader;
+    TessdataManager mgr(reader_);
+    if (data_size != 0) {
+      mgr.LoadMemBuffer(language, data, data_size);
+    }
+    if (tesseract_->init_tesseract(
+            datapath.string(),
+            output_file_ != nullptr ? output_file_->string() : nullptr,
+            language, oem, configs, configs_size, vars_vec, vars_values,
+            set_only_non_debug_params, &mgr) != 0) {
+      return -1;
+    }
+  }
 
-        if (language_ == nullptr)
-            language_ = new STRING(language);
-        else
-            *language_ = language;
-        last_oem_requested_ = oem;
+  PERF_COUNT_SUB("update tesseract_")
+  // Update datapath and language requested for the last valid initialization.
+  if (datapath_ == nullptr)
+    datapath_ = new STRING(datapath);
+  else
+    *datapath_ = datapath;
+  if ((strcmp(datapath_->string(), "") == 0) &&
+      (strcmp(tesseract_->datadir.string(), "") != 0))
+     *datapath_ = tesseract_->datadir;
+
+  if (language_ == nullptr)
+    language_ = new STRING(language);
+  else
+    *language_ = language;
+  last_oem_requested_ = oem;
 
 #ifndef DISABLED_LEGACY_ENGINE
-        // PERF_COUNT_SUB("update last_oem_requested_")
-        // For same language and datapath, just reset the adaptive classifier.
-        if (reset_classifier) {
-            tesseract_->ResetAdaptiveClassifier();
-            PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
-        }
+  // PERF_COUNT_SUB("update last_oem_requested_")
+  // For same language and datapath, just reset the adaptive classifier.
+  if (reset_classifier) {
+    tesseract_->ResetAdaptiveClassifier();
+    PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
+  }
 #endif  // ndef DISABLED_LEGACY_ENGINE
-        PERF_COUNT_END
-        return 0;
-    }
+  PERF_COUNT_END
+  return 0;
+}
 
 /**
  * Returns the languages string used in the last valid initialization.
@@ -443,38 +443,38 @@ namespace tesseract {
  * loaded use GetLoadedLanguagesAsVector.
  * The returned string should NOT be deleted.
  */
-    const char* TessBaseAPI::GetInitLanguagesAsString() const {
-        return (language_ == nullptr || language_->string() == nullptr) ?
-               "" : language_->string();
-    }
+const char* TessBaseAPI::GetInitLanguagesAsString() const {
+  return (language_ == nullptr || language_->string() == nullptr) ?
+      "" : language_->string();
+}
 
 /**
  * Returns the loaded languages in the vector of STRINGs.
  * Includes all languages loaded by the last Init, including those loaded
  * as dependencies of other loaded languages.
  */
-    void TessBaseAPI::GetLoadedLanguagesAsVector(
-            GenericVector<STRING>* langs) const {
-        langs->clear();
-        if (tesseract_ != nullptr) {
-            langs->push_back(tesseract_->lang);
-            int num_subs = tesseract_->num_sub_langs();
-            for (int i = 0; i < num_subs; ++i)
-                langs->push_back(tesseract_->get_sub_lang(i)->lang);
-        }
-    }
+void TessBaseAPI::GetLoadedLanguagesAsVector(
+    GenericVector<STRING>* langs) const {
+  langs->clear();
+  if (tesseract_ != nullptr) {
+    langs->push_back(tesseract_->lang);
+    int num_subs = tesseract_->num_sub_langs();
+    for (int i = 0; i < num_subs; ++i)
+      langs->push_back(tesseract_->get_sub_lang(i)->lang);
+  }
+}
 
 /**
  * Returns the available languages in the sorted vector of STRINGs.
  */
-    void TessBaseAPI::GetAvailableLanguagesAsVector(
-            GenericVector<STRING>* langs) const {
-        langs->clear();
-        if (tesseract_ != nullptr) {
-            addAvailableLanguages(tesseract_->datadir, "", langs);
-            langs->sort(CompareSTRING);
-        }
-    }
+void TessBaseAPI::GetAvailableLanguagesAsVector(
+    GenericVector<STRING>* langs) const {
+  langs->clear();
+  if (tesseract_ != nullptr) {
+    addAvailableLanguages(tesseract_->datadir, "", langs);
+    langs->sort(CompareSTRING);
+  }
+}
 
 //TODO(amit): Adapt to lstm
 #ifndef DISABLED_LEGACY_ENGINE
@@ -484,61 +484,61 @@ namespace tesseract {
  * WARNING: temporary! This function will be removed from here and placed
  * in a separate API at some future time.
  */
-    int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
-        if (tesseract_ == nullptr)
-            tesseract_ = new Tesseract;
-        else
-            ParamUtils::ResetToDefaults(tesseract_->params());
-        TessdataManager mgr;
-        return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
-    }
+int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
+  if (tesseract_ == nullptr)
+    tesseract_ = new Tesseract;
+  else
+    ParamUtils::ResetToDefaults(tesseract_->params());
+  TessdataManager mgr;
+  return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /**
  * Init only for page layout analysis. Use only for calls to SetImage and
  * AnalysePage. Calls that attempt recognition will generate an error.
  */
-    void TessBaseAPI::InitForAnalysePage() {
-        if (tesseract_ == nullptr) {
-            tesseract_ = new Tesseract;
-#ifndef DISABLED_LEGACY_ENGINE
-            tesseract_->InitAdaptiveClassifier(nullptr);
-#endif
-        }
-    }
+void TessBaseAPI::InitForAnalysePage() {
+  if (tesseract_ == nullptr) {
+    tesseract_ = new Tesseract;
+    #ifndef DISABLED_LEGACY_ENGINE
+    tesseract_->InitAdaptiveClassifier(nullptr);
+    #endif
+  }
+}
 
 /**
  * Read a "config" file containing a set of parameter name, value pairs.
  * Searches the standard places: tessdata/configs, tessdata/tessconfigs
  * and also accepts a relative or absolute path name.
  */
-    void TessBaseAPI::ReadConfigFile(const char* filename) {
-        tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
-    }
+void TessBaseAPI::ReadConfigFile(const char* filename) {
+  tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
+}
 
 /** Same as above, but only set debug params from the given config file. */
-    void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
-        tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
-    }
+void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
+  tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
+}
 
 /**
  * Set the current page segmentation mode. Defaults to PSM_AUTO.
  * The mode is stored as an IntParam so it can also be modified by
  * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
  */
-    void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
-        if (tesseract_ == nullptr)
-            tesseract_ = new Tesseract;
-        tesseract_->tessedit_pageseg_mode.set_value(mode);
-    }
+void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
+  if (tesseract_ == nullptr)
+    tesseract_ = new Tesseract;
+  tesseract_->tessedit_pageseg_mode.set_value(mode);
+}
 
 /** Return the current page segmentation mode. */
-    PageSegMode TessBaseAPI::GetPageSegMode() const {
-        if (tesseract_ == nullptr)
-            return PSM_SINGLE_BLOCK;
-        return static_cast<PageSegMode>(
-                static_cast<int>(tesseract_->tessedit_pageseg_mode));
-    }
+PageSegMode TessBaseAPI::GetPageSegMode() const {
+  if (tesseract_ == nullptr)
+    return PSM_SINGLE_BLOCK;
+  return static_cast<PageSegMode>(
+    static_cast<int>(tesseract_->tessedit_pageseg_mode));
+}
 
 /**
  * Recognize a rectangle from an image and return the result as a string.
@@ -553,35 +553,35 @@ namespace tesseract {
  * The recognized text is returned as a char* which is coded
  * as UTF8 and must be freed with the delete [] operator.
  */
-    char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
-                                     int bytes_per_pixel,
-                                     int bytes_per_line,
-                                     int left, int top,
-                                     int width, int height) {
-        if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
-            return nullptr;  // Nothing worth doing.
-
-        // Since this original api didn't give the exact size of the image,
-        // we have to invent a reasonable value.
-        int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
-        SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
-                 bytes_per_pixel, bytes_per_line);
-        SetRectangle(left, top, width, height);
-
-        return GetUTF8Text();
-    }
+char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
+                                 int bytes_per_pixel,
+                                 int bytes_per_line,
+                                 int left, int top,
+                                 int width, int height) {
+  if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
+    return nullptr;  // Nothing worth doing.
+
+  // Since this original api didn't give the exact size of the image,
+  // we have to invent a reasonable value.
+  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
+  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
+           bytes_per_pixel, bytes_per_line);
+  SetRectangle(left, top, width, height);
+
+  return GetUTF8Text();
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /**
  * Call between pages or documents etc to free up memory and forget
  * adaptive data.
  */
-    void TessBaseAPI::ClearAdaptiveClassifier() {
-        if (tesseract_ == nullptr)
-            return;
-        tesseract_->ResetAdaptiveClassifier();
-        tesseract_->ResetDocumentDictionary();
-    }
+void TessBaseAPI::ClearAdaptiveClassifier() {
+  if (tesseract_ == nullptr)
+    return;
+  tesseract_->ResetAdaptiveClassifier();
+  tesseract_->ResetDocumentDictionary();
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /**
@@ -591,22 +591,22 @@ namespace tesseract {
  * full image, so it may be followed immediately by a GetUTF8Text, and it
  * will automatically perform recognition.
  */
-    void TessBaseAPI::SetImage(const unsigned char* imagedata,
-                               int width, int height,
-                               int bytes_per_pixel, int bytes_per_line) {
-        if (InternalSetImage()) {
-            thresholder_->SetImage(imagedata, width, height,
-                                   bytes_per_pixel, bytes_per_line);
-            SetInputImage(thresholder_->GetPixRect());
-        }
-    }
+void TessBaseAPI::SetImage(const unsigned char* imagedata,
+                           int width, int height,
+                           int bytes_per_pixel, int bytes_per_line) {
+  if (InternalSetImage()) {
+    thresholder_->SetImage(imagedata, width, height,
+                           bytes_per_pixel, bytes_per_line);
+    SetInputImage(thresholder_->GetPixRect());
+  }
+}
 
-    void TessBaseAPI::SetSourceResolution(int ppi) {
-        if (thresholder_)
-            thresholder_->SetSourceYResolution(ppi);
-        else
-            tprintf("Please call SetImage before SetSourceResolution.\n");
-    }
+void TessBaseAPI::SetSourceResolution(int ppi) {
+  if (thresholder_)
+    thresholder_->SetSourceYResolution(ppi);
+  else
+    tprintf("Please call SetImage before SetSourceResolution.\n");
+}
 
 /**
  * Provide an image for Tesseract to recognize. As with SetImage above,
@@ -616,53 +616,53 @@ namespace tesseract {
  * Use Pix where possible. Tesseract uses Pix as its internal representation
  * and it is therefore more efficient to provide a Pix directly.
  */
-    void TessBaseAPI::SetImage(Pix* pix) {
-        if (InternalSetImage()) {
-            if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
-                // remove alpha channel from png
-                PIX* p1 = pixRemoveAlpha(pix);
-                pixSetSpp(p1, 3);
-                pix = pixCopy(nullptr, p1);
-                pixDestroy(&p1);
-            }
-            thresholder_->SetImage(pix);
-            SetInputImage(thresholder_->GetPixRect());
-        }
-    }
+void TessBaseAPI::SetImage(Pix* pix) {
+  if (InternalSetImage()) {
+    if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
+      // remove alpha channel from png
+      PIX* p1 = pixRemoveAlpha(pix);
+      pixSetSpp(p1, 3);
+      pix = pixCopy(nullptr, p1);
+      pixDestroy(&p1);
+    }
+    thresholder_->SetImage(pix);
+    SetInputImage(thresholder_->GetPixRect());
+  }
+}
 
 /**
  * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
  * Each SetRectangle clears the recogntion results so multiple rectangles
  * can be recognized with the same image.
  */
-    void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
-        if (thresholder_ == nullptr)
-            return;
-        thresholder_->SetRectangle(left, top, width, height);
-        ClearResults();
-    }
+void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
+  if (thresholder_ == nullptr)
+    return;
+  thresholder_->SetRectangle(left, top, width, height);
+  ClearResults();
+}
 
 /**
  * ONLY available after SetImage if you have Leptonica installed.
  * Get a copy of the internal thresholded image from Tesseract.
  */
-    Pix* TessBaseAPI::GetThresholdedImage() {
-        if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
-        if (tesseract_->pix_binary() == nullptr &&
-            !Threshold(tesseract_->mutable_pix_binary())) {
-            return nullptr;
-        }
-        return pixClone(tesseract_->pix_binary());
-    }
+Pix* TessBaseAPI::GetThresholdedImage() {
+  if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr;
+  if (tesseract_->pix_binary() == nullptr &&
+      !Threshold(tesseract_->mutable_pix_binary())) {
+    return nullptr;
+  }
+  return pixClone(tesseract_->pix_binary());
+}
 
 /**
  * Get the result of page layout analysis as a leptonica-style
  * Boxa, Pixa pair, in reading order.
  * Can be called before or after Recognize.
  */
-    Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
-        return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
-    }
+Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
+  return GetComponentImages(RIL_BLOCK, false, pixa, nullptr);
+}
 
 /**
  * Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order.
@@ -672,11 +672,11 @@ namespace tesseract {
  * If paraids is not nullptr, the paragraph-id of each line within its block is
  * also returned as an array of one element per line. delete [] after use.
  */
-    Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
-                                    Pixa** pixa, int** blockids, int** paraids) {
-        return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
-                                  pixa, blockids, paraids);
-    }
+Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
+                                Pixa** pixa, int** blockids, int** paraids) {
+  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
+                            pixa, blockids, paraids);
+}
 
 /**
  * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
@@ -686,18 +686,18 @@ namespace tesseract {
  * If blockids is not nullptr, the block-id of each line is also returned as an
  * array of one element per line. delete [] after use.
  */
-    Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
-        return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
-    }
+Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
+  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
+}
 
 /**
  * Get the words as a leptonica-style
  * Boxa, Pixa pair, in reading order.
  * Can be called before or after Recognize.
  */
-    Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
-        return GetComponentImages(RIL_WORD, true, pixa, nullptr);
-    }
+Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
+  return GetComponentImages(RIL_WORD, true, pixa, nullptr);
+}
 
 /**
  * Gets the individual connected (text) components (created
@@ -705,9 +705,9 @@ namespace tesseract {
  * as a leptonica-style Boxa, Pixa pair, in reading order.
  * Can be called before or after Recognize.
  */
-    Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
-        return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
-    }
+Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
+  return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr);
+}
 
 /**
  * Get the given level kind of components (block, textline, word etc.) as a
@@ -717,94 +717,94 @@ namespace tesseract {
  * as an array of one element per component. delete [] after use.
  * If text_only is true, then only text components are returned.
  */
-    Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
-                                          bool text_only, bool raw_image,
-                                          const int raw_padding,
-                                          Pixa** pixa, int** blockids,
-                                          int** paraids) {
-        PageIterator* page_it = GetIterator();
-        if (page_it == nullptr)
-            page_it = AnalyseLayout();
-        if (page_it == nullptr)
-            return nullptr;  // Failed.
-
-        // Count the components to get a size for the arrays.
-        int component_count = 0;
-        int left, top, right, bottom;
-
-        TessResultCallback<bool>* get_bbox = nullptr;
+Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
+                                      bool text_only, bool raw_image,
+                                      const int raw_padding,
+                                      Pixa** pixa, int** blockids,
+                                      int** paraids) {
+  PageIterator* page_it = GetIterator();
+  if (page_it == nullptr)
+    page_it = AnalyseLayout();
+  if (page_it == nullptr)
+    return nullptr;  // Failed.
+
+  // Count the components to get a size for the arrays.
+  int component_count = 0;
+  int left, top, right, bottom;
+
+  TessResultCallback<bool>* get_bbox = nullptr;
+  if (raw_image) {
+    // Get bounding box in original raw image with padding.
+    get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox,
+                                        level, raw_padding,
+                                        &left, &top, &right, &bottom);
+  } else {
+    // Get bounding box from binarized imaged. Note that this could be
+    // differently scaled from the original image.
+    get_bbox = NewPermanentTessCallback(page_it,
+                                        &PageIterator::BoundingBoxInternal,
+                                        level, &left, &top, &right, &bottom);
+  }
+  do {
+    if (get_bbox->Run() &&
+        (!text_only || PTIsTextType(page_it->BlockType())))
+      ++component_count;
+  } while (page_it->Next(level));
+
+  Boxa* boxa = boxaCreate(component_count);
+  if (pixa != nullptr)
+    *pixa = pixaCreate(component_count);
+  if (blockids != nullptr)
+    *blockids = new int[component_count];
+  if (paraids != nullptr)
+    *paraids = new int[component_count];
+
+  int blockid = 0;
+  int paraid = 0;
+  int component_index = 0;
+  page_it->Begin();
+  do {
+    if (get_bbox->Run() &&
+        (!text_only || PTIsTextType(page_it->BlockType()))) {
+      Box* lbox = boxCreate(left, top, right - left, bottom - top);
+      boxaAddBox(boxa, lbox, L_INSERT);
+      if (pixa != nullptr) {
+        Pix* pix = nullptr;
         if (raw_image) {
-            // Get bounding box in original raw image with padding.
-            get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox,
-                                                level, raw_padding,
-                                                &left, &top, &right, &bottom);
+          pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
+                                  &top);
         } else {
-            // Get bounding box from binarized imaged. Note that this could be
-            // differently scaled from the original image.
-            get_bbox = NewPermanentTessCallback(page_it,
-                                                &PageIterator::BoundingBoxInternal,
-                                                level, &left, &top, &right, &bottom);
+          pix = page_it->GetBinaryImage(level);
         }
-        do {
-            if (get_bbox->Run() &&
-                (!text_only || PTIsTextType(page_it->BlockType())))
-                ++component_count;
-        } while (page_it->Next(level));
-
-        Boxa* boxa = boxaCreate(component_count);
-        if (pixa != nullptr)
-            *pixa = pixaCreate(component_count);
-        if (blockids != nullptr)
-            *blockids = new int[component_count];
-        if (paraids != nullptr)
-            *paraids = new int[component_count];
-
-        int blockid = 0;
-        int paraid = 0;
-        int component_index = 0;
-        page_it->Begin();
-        do {
-            if (get_bbox->Run() &&
-                (!text_only || PTIsTextType(page_it->BlockType()))) {
-                Box* lbox = boxCreate(left, top, right - left, bottom - top);
-                boxaAddBox(boxa, lbox, L_INSERT);
-                if (pixa != nullptr) {
-                    Pix* pix = nullptr;
-                    if (raw_image) {
-                        pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
-                                                &top);
-                    } else {
-                        pix = page_it->GetBinaryImage(level);
-                    }
-                    pixaAddPix(*pixa, pix, L_INSERT);
-                    pixaAddBox(*pixa, lbox, L_CLONE);
-                }
-                if (paraids != nullptr) {
-                    (*paraids)[component_index] = paraid;
-                    if (page_it->IsAtFinalElement(RIL_PARA, level))
-                        ++paraid;
-                }
-                if (blockids != nullptr) {
-                    (*blockids)[component_index] = blockid;
-                    if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
-                        ++blockid;
-                        paraid = 0;
-                    }
-                }
-                ++component_index;
-            }
-        } while (page_it->Next(level));
-        delete page_it;
-        delete get_bbox;
-        return boxa;
-    }
-
-    int TessBaseAPI::GetThresholdedImageScaleFactor() const {
-        if (thresholder_ == nullptr) {
-            return 0;
+        pixaAddPix(*pixa, pix, L_INSERT);
+        pixaAddBox(*pixa, lbox, L_CLONE);
+      }
+      if (paraids != nullptr) {
+        (*paraids)[component_index] = paraid;
+        if (page_it->IsAtFinalElement(RIL_PARA, level))
+          ++paraid;
+      }
+      if (blockids != nullptr) {
+        (*blockids)[component_index] = blockid;
+        if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
+          ++blockid;
+          paraid = 0;
         }
-        return thresholder_->GetScaleFactor();
+      }
+      ++component_index;
     }
+  } while (page_it->Next(level));
+  delete page_it;
+  delete get_bbox;
+  return boxa;
+}
+
+int TessBaseAPI::GetThresholdedImageScaleFactor() const {
+  if (thresholder_ == nullptr) {
+    return 0;
+  }
+  return thresholder_->GetScaleFactor();
+}
 
 /**
  * Runs page layout analysis in the mode set by SetPageSegMode.
@@ -821,282 +821,282 @@ namespace tesseract {
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-    PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
-
-    PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
-        if (FindLines() == 0) {
-            if (block_list_->empty())
-                return nullptr;  // The page was empty.
-            page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
-            DetectParagraphs(false);
-            return new PageIterator(
-                    page_res_, tesseract_, thresholder_->GetScaleFactor(),
-                    thresholder_->GetScaledYResolution(),
-                    rect_left_, rect_top_, rect_width_, rect_height_);
-        }
-        return nullptr;
-    }
+PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
+
+PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
+  if (FindLines() == 0) {
+    if (block_list_->empty())
+      return nullptr;  // The page was empty.
+    page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
+    DetectParagraphs(false);
+    return new PageIterator(
+        page_res_, tesseract_, thresholder_->GetScaleFactor(),
+        thresholder_->GetScaledYResolution(),
+        rect_left_, rect_top_, rect_width_, rect_height_);
+  }
+  return nullptr;
+}
 
 /**
  * Recognize the tesseract global image and return the result as Tesseract
  * internal structures.
  */
-    int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
-        if (tesseract_ == nullptr)
-            return -1;
-        if (FindLines() != 0)
-            return -1;
-        delete page_res_;
-        if (block_list_->empty()) {
-            page_res_ = new PAGE_RES(false, block_list_,
-                                     &tesseract_->prev_word_best_choice_);
-            return 0; // Empty page.
-        }
+int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
+  if (tesseract_ == nullptr)
+    return -1;
+  if (FindLines() != 0)
+    return -1;
+  delete page_res_;
+  if (block_list_->empty()) {
+    page_res_ = new PAGE_RES(false, block_list_,
+                             &tesseract_->prev_word_best_choice_);
+    return 0; // Empty page.
+  }
 
-        tesseract_->SetBlackAndWhitelist();
-        recognition_done_ = true;
+  tesseract_->SetBlackAndWhitelist();
+  recognition_done_ = true;
 #ifndef DISABLED_LEGACY_ENGINE
-        if (tesseract_->tessedit_resegment_from_line_boxes) {
-            page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
-        } else if (tesseract_->tessedit_resegment_from_boxes) {
-            page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
-        } else
+  if (tesseract_->tessedit_resegment_from_line_boxes) {
+    page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
+  } else if (tesseract_->tessedit_resegment_from_boxes) {
+    page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
+  } else
 #endif  // ndef DISABLED_LEGACY_ENGINE
-        {
-            page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
-                                     block_list_, &tesseract_->prev_word_best_choice_);
-        }
+  {
+    page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
+                             block_list_, &tesseract_->prev_word_best_choice_);
+  }
 
-        if (page_res_ == nullptr) {
-            return -1;
-        }
+  if (page_res_ == nullptr) {
+    return -1;
+  }
 
-        if (tesseract_->tessedit_train_line_recognizer) {
-            tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
-            tesseract_->CorrectClassifyWords(page_res_);
-            return 0;
-        }
+  if (tesseract_->tessedit_train_line_recognizer) {
+    tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
+    tesseract_->CorrectClassifyWords(page_res_);
+    return 0;
+  }
 #ifndef DISABLED_LEGACY_ENGINE
-        if (tesseract_->tessedit_make_boxes_from_boxes) {
-            tesseract_->CorrectClassifyWords(page_res_);
-            return 0;
-        }
+  if (tesseract_->tessedit_make_boxes_from_boxes) {
+    tesseract_->CorrectClassifyWords(page_res_);
+    return 0;
+  }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-        if (truth_cb_ != nullptr) {
-            tesseract_->wordrec_run_blamer.set_value(true);
-            PageIterator *page_it = new PageIterator(
-                    page_res_, tesseract_, thresholder_->GetScaleFactor(),
-                    thresholder_->GetScaledYResolution(),
-                    rect_left_, rect_top_, rect_width_, rect_height_);
-            truth_cb_->Run(tesseract_->getDict().getUnicharset(),
-                           image_height_, page_it, this->tesseract()->pix_grey());
-            delete page_it;
-        }
+  if (truth_cb_ != nullptr) {
+    tesseract_->wordrec_run_blamer.set_value(true);
+    PageIterator *page_it = new PageIterator(
+            page_res_, tesseract_, thresholder_->GetScaleFactor(),
+            thresholder_->GetScaledYResolution(),
+            rect_left_, rect_top_, rect_width_, rect_height_);
+    truth_cb_->Run(tesseract_->getDict().getUnicharset(),
+                   image_height_, page_it, this->tesseract()->pix_grey());
+    delete page_it;
+  }
 
-        int result = 0;
-        if (tesseract_->interactive_display_mode) {
-#ifndef GRAPHICS_DISABLED
-            tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
-#endif  // GRAPHICS_DISABLED
-            // The page_res is invalid after an interactive session, so cleanup
-            // in a way that lets us continue to the next page without crashing.
-            delete page_res_;
-            page_res_ = nullptr;
-            return -1;
-#ifndef DISABLED_LEGACY_ENGINE
-        } else if (tesseract_->tessedit_train_from_boxes) {
-            STRING fontname;
-            ExtractFontName(*output_file_, &fontname);
-            tesseract_->ApplyBoxTraining(fontname, page_res_);
-        } else if (tesseract_->tessedit_ambigs_training) {
-            FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
-            // OCR the page segmented into words by tesseract.
-            tesseract_->recog_training_segmented(
-                    *input_file_, page_res_, monitor, training_output_file);
-            fclose(training_output_file);
-#endif  // ndef DISABLED_LEGACY_ENGINE
-        } else {
-            // Now run the main recognition.
-            bool wait_for_text = true;
-            GetBoolVariable("paragraph_text_based", &wait_for_text);
-            if (!wait_for_text) DetectParagraphs(false);
-            if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
-                if (wait_for_text) DetectParagraphs(true);
-            } else {
-                result = -1;
-            }
-        }
-        return result;
+  int result = 0;
+  if (tesseract_->interactive_display_mode) {
+    #ifndef GRAPHICS_DISABLED
+    tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
+    #endif  // GRAPHICS_DISABLED
+    // The page_res is invalid after an interactive session, so cleanup
+    // in a way that lets us continue to the next page without crashing.
+    delete page_res_;
+    page_res_ = nullptr;
+    return -1;
+  #ifndef DISABLED_LEGACY_ENGINE
+  } else if (tesseract_->tessedit_train_from_boxes) {
+    STRING fontname;
+    ExtractFontName(*output_file_, &fontname);
+    tesseract_->ApplyBoxTraining(fontname, page_res_);
+  } else if (tesseract_->tessedit_ambigs_training) {
+    FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
+    // OCR the page segmented into words by tesseract.
+    tesseract_->recog_training_segmented(
+        *input_file_, page_res_, monitor, training_output_file);
+    fclose(training_output_file);
+  #endif  // ndef DISABLED_LEGACY_ENGINE
+  } else {
+    // Now run the main recognition.
+    bool wait_for_text = true;
+    GetBoolVariable("paragraph_text_based", &wait_for_text);
+    if (!wait_for_text) DetectParagraphs(false);
+    if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
+      if (wait_for_text) DetectParagraphs(true);
+    } else {
+      result = -1;
     }
+  }
+  return result;
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /** Tests the chopper by exhaustively running chop_one_blob. */
-    int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
-        if (tesseract_ == nullptr)
-            return -1;
-        if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
-            tprintf("Please call SetImage before attempting recognition.\n");
-            return -1;
-        }
-        if (page_res_ != nullptr)
-            ClearResults();
-        if (FindLines() != 0)
-            return -1;
-        // Additional conditions under which chopper test cannot be run
-        if (tesseract_->interactive_display_mode) return -1;
-
-        recognition_done_ = true;
-
-        page_res_ = new PAGE_RES(false, block_list_,
-                                 &(tesseract_->prev_word_best_choice_));
-
-        PAGE_RES_IT page_res_it(page_res_);
-
-        while (page_res_it.word() != nullptr) {
-            WERD_RES *word_res = page_res_it.word();
-            GenericVector<TBOX> boxes;
-            tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
-                                          page_res_it.row()->row, word_res);
-            page_res_it.forward();
-        }
-        return 0;
-    }
+int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
+  if (tesseract_ == nullptr)
+    return -1;
+  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
+    tprintf("Please call SetImage before attempting recognition.\n");
+    return -1;
+  }
+  if (page_res_ != nullptr)
+    ClearResults();
+  if (FindLines() != 0)
+    return -1;
+  // Additional conditions under which chopper test cannot be run
+  if (tesseract_->interactive_display_mode) return -1;
+
+  recognition_done_ = true;
+
+  page_res_ = new PAGE_RES(false, block_list_,
+                           &(tesseract_->prev_word_best_choice_));
+
+  PAGE_RES_IT page_res_it(page_res_);
+
+  while (page_res_it.word() != nullptr) {
+    WERD_RES *word_res = page_res_it.word();
+    GenericVector<TBOX> boxes;
+    tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
+                                  page_res_it.row()->row, word_res);
+    page_res_it.forward();
+  }
+  return 0;
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 // Takes ownership of the input pix.
-    void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
+void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
 
-    Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
+Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
 
-    const char * TessBaseAPI::GetInputName() {
-        if (input_file_)
-            return input_file_->c_str();
-        return nullptr;
-    }
+const char * TessBaseAPI::GetInputName() {
+  if (input_file_)
+    return input_file_->c_str();
+  return nullptr;
+}
 
-    const char *  TessBaseAPI::GetDatapath() {
-        return tesseract_->datadir.c_str();
-    }
+const char *  TessBaseAPI::GetDatapath() {
+  return tesseract_->datadir.c_str();
+}
 
-    int TessBaseAPI::GetSourceYResolution() {
-        return thresholder_->GetSourceYResolution();
-    }
+int TessBaseAPI::GetSourceYResolution() {
+  return thresholder_->GetSourceYResolution();
+}
 
 // If flist exists, get data from there. Otherwise get data from buf.
 // Seems convoluted, but is the easiest way I know of to meet multiple
 // goals. Support streaming from stdin, and also work on platforms
 // lacking fmemopen.
-    bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
-                                           STRING *buf,
-                                           const char* retry_config,
-                                           int timeout_millisec,
-                                           TessResultRenderer* renderer,
-                                           int tessedit_page_number) {
-        if (!flist && !buf) return false;
-        int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
-        char pagename[MAX_PATH];
-
-        GenericVector<STRING> lines;
-        if (!flist) {
-            buf->split('\n', &lines);
-            if (lines.empty()) return false;
-        }
-
-        // Skip to the requested page number.
-        for (int i = 0; i < page; i++) {
-            if (flist) {
-                if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
-            }
-        }
+bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
+                                       STRING *buf,
+                                       const char* retry_config,
+                                       int timeout_millisec,
+                                       TessResultRenderer* renderer,
+                                       int tessedit_page_number) {
+  if (!flist && !buf) return false;
+  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+  char pagename[MAX_PATH];
+
+  GenericVector<STRING> lines;
+  if (!flist) {
+    buf->split('\n', &lines);
+    if (lines.empty()) return false;
+  }
 
-        // Begin producing output
-        if (renderer && !renderer->BeginDocument(unknown_title_)) {
-            return false;
-        }
+  // Skip to the requested page number.
+  for (int i = 0; i < page; i++) {
+    if (flist) {
+      if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+    }
+  }
 
-        // Loop over all pages - or just the requested one
-        while (true) {
-            if (flist) {
-                if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
-            } else {
-                if (page >= lines.size()) break;
-                snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
-            }
-            chomp_string(pagename);
-            Pix *pix = pixRead(pagename);
-            if (pix == nullptr) {
-                tprintf("Image file %s cannot be read!\n", pagename);
-                return false;
-            }
-            tprintf("Page %d : %s\n", page, pagename);
-            bool r = ProcessPage(pix, page, pagename, retry_config,
-                                 timeout_millisec, renderer);
-            pixDestroy(&pix);
-            if (!r) return false;
-            if (tessedit_page_number >= 0) break;
-            ++page;
-        }
+  // Begin producing output
+  if (renderer && !renderer->BeginDocument(unknown_title_)) {
+    return false;
+  }
 
-        // Finish producing output
-        if (renderer && !renderer->EndDocument()) {
-            return false;
-        }
-        return true;
-    }
+  // Loop over all pages - or just the requested one
+  while (true) {
+    if (flist) {
+      if (fgets(pagename, sizeof(pagename), flist) == nullptr) break;
+    } else {
+      if (page >= lines.size()) break;
+      snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
+    }
+    chomp_string(pagename);
+    Pix *pix = pixRead(pagename);
+    if (pix == nullptr) {
+      tprintf("Image file %s cannot be read!\n", pagename);
+      return false;
+    }
+    tprintf("Page %d : %s\n", page, pagename);
+    bool r = ProcessPage(pix, page, pagename, retry_config,
+                         timeout_millisec, renderer);
+    pixDestroy(&pix);
+    if (!r) return false;
+    if (tessedit_page_number >= 0) break;
+    ++page;
+  }
 
-    bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
-                                                size_t size,
-                                                const char* filename,
-                                                const char* retry_config,
-                                                int timeout_millisec,
-                                                TessResultRenderer* renderer,
-                                                int tessedit_page_number) {
+  // Finish producing output
+  if (renderer && !renderer->EndDocument()) {
+    return false;
+  }
+  return true;
+}
+
+bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
+                                            size_t size,
+                                            const char* filename,
+                                            const char* retry_config,
+                                            int timeout_millisec,
+                                            TessResultRenderer* renderer,
+                                            int tessedit_page_number) {
 #ifndef ANDROID_BUILD
-        Pix *pix = nullptr;
-        int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
-        size_t offset = 0;
-        for (; ; ++page) {
-            if (tessedit_page_number >= 0)
-                page = tessedit_page_number;
-            pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
-                         : pixReadFromMultipageTiff(filename, &offset);
-            if (pix == nullptr) break;
-            tprintf("Page %d\n", page + 1);
-            char page_str[kMaxIntSize];
-            snprintf(page_str, kMaxIntSize - 1, "%d", page);
-            SetVariable("applybox_page", page_str);
-            bool r = ProcessPage(pix, page, filename, retry_config,
-                                 timeout_millisec, renderer);
-            pixDestroy(&pix);
-            if (!r) return false;
-            if (tessedit_page_number >= 0) break;
-            if (!offset) break;
-        }
-        return true;
+  Pix *pix = nullptr;
+  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
+  size_t offset = 0;
+  for (; ; ++page) {
+    if (tessedit_page_number >= 0)
+      page = tessedit_page_number;
+    pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
+                 : pixReadFromMultipageTiff(filename, &offset);
+    if (pix == nullptr) break;
+    tprintf("Page %d\n", page + 1);
+    char page_str[kMaxIntSize];
+    snprintf(page_str, kMaxIntSize - 1, "%d", page);
+    SetVariable("applybox_page", page_str);
+    bool r = ProcessPage(pix, page, filename, retry_config,
+                           timeout_millisec, renderer);
+    pixDestroy(&pix);
+    if (!r) return false;
+    if (tessedit_page_number >= 0) break;
+    if (!offset) break;
+  }
+  return true;
 #else
-        return false;
+  return false;
 #endif
-    }
+}
 
 // Master ProcessPages calls ProcessPagesInternal and then does any post-
 // processing required due to being in a training mode.
-    bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
-                                   int timeout_millisec,
-                                   TessResultRenderer* renderer) {
-        bool result =
-                ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
-#ifndef DISABLED_LEGACY_ENGINE
-        if (result) {
-            if (tesseract_->tessedit_train_from_boxes &&
-                !tesseract_->WriteTRFile(*output_file_)) {
-                tprintf("Write of TR file failed: %s\n", output_file_->string());
-                return false;
-            }
-        }
-#endif  // ndef DISABLED_LEGACY_ENGINE
-        return result;
+bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
+                               int timeout_millisec,
+                               TessResultRenderer* renderer) {
+  bool result =
+      ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
+  #ifndef DISABLED_LEGACY_ENGINE
+  if (result) {
+    if (tesseract_->tessedit_train_from_boxes &&
+        !tesseract_->WriteTRFile(*output_file_)) {
+      tprintf("Write of TR file failed: %s\n", output_file_->string());
+      return false;
     }
+  }
+  #endif  // ndef DISABLED_LEGACY_ENGINE
+  return result;
+}
 
 // In the ideal scenario, Tesseract will start working on data as soon
 // as it can. For example, if you stream a filelist through stdin, we
@@ -1109,184 +1109,184 @@ namespace tesseract {
 // impractical.  So we support a command line flag to explicitly
 // identify the scenario that really matters: filelists on
 // stdin. We'll still do our best if the user likes pipes.
-    bool TessBaseAPI::ProcessPagesInternal(const char* filename,
-                                           const char* retry_config,
-                                           int timeout_millisec,
-                                           TessResultRenderer* renderer) {
-        PERF_COUNT_START("ProcessPages")
-        bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
-        if (stdInput) {
+bool TessBaseAPI::ProcessPagesInternal(const char* filename,
+                                       const char* retry_config,
+                                       int timeout_millisec,
+                                       TessResultRenderer* renderer) {
+  PERF_COUNT_START("ProcessPages")
+  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
+  if (stdInput) {
 #ifdef WIN32
-            if (_setmode(_fileno(stdin), _O_BINARY) == -1)
+    if (_setmode(_fileno(stdin), _O_BINARY) == -1)
       tprintf("ERROR: cin to binary: %s", strerror(errno));
 #endif  // WIN32
-        }
-
-        if (stream_filelist) {
-            return ProcessPagesFileList(stdin, nullptr, retry_config,
-                                        timeout_millisec, renderer,
-                                        tesseract_->tessedit_page_number);
-        }
-
-        // At this point we are officially in autodection territory.
-        // That means any data in stdin must be buffered, to make it
-        // seekable.
-        std::string buf;
-        const l_uint8 *data = nullptr;
-        if (stdInput) {
-            buf.assign((std::istreambuf_iterator<char>(std::cin)),
-                       (std::istreambuf_iterator<char>()));
-            data = reinterpret_cast<const l_uint8 *>(buf.data());
-        } else {
-            // Check whether the input file can be read.
-            if (FILE* file = fopen(filename, "rb")) {
-                fclose(file);
-            } else {
-                fprintf(stderr, "Error, cannot read input file %s: %s\n",
-                        filename, strerror(errno));
-                return false;
-            }
-        }
-
-        // Here is our autodetection
-        int format;
-        int r = (stdInput) ?
-                findFileFormatBuffer(data, &format) :
-                findFileFormat(filename, &format);
-
-        // Maybe we have a filelist
-        if (r != 0 || format == IFF_UNKNOWN) {
-            STRING s;
-            if (stdInput) {
-                s = buf.c_str();
-            } else {
-                std::ifstream t(filename);
-                std::string u((std::istreambuf_iterator<char>(t)),
-                              std::istreambuf_iterator<char>());
-                s = u.c_str();
-            }
-            return ProcessPagesFileList(nullptr, &s, retry_config,
-                                        timeout_millisec, renderer,
-                                        tesseract_->tessedit_page_number);
-        }
+  }
 
-        // Maybe we have a TIFF which is potentially multipage
-        bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
-                     format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
-                     format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
-                     format == IFF_TIFF_ZIP);
-
-        // Fail early if we can, before producing any output
-        Pix *pix = nullptr;
-        if (!tiff) {
-            pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
-            if (pix == nullptr) {
-                return false;
-            }
-        }
+  if (stream_filelist) {
+    return ProcessPagesFileList(stdin, nullptr, retry_config,
+                                timeout_millisec, renderer,
+                                tesseract_->tessedit_page_number);
+  }
 
-        // Begin the output
-        if (renderer && !renderer->BeginDocument(unknown_title_)) {
-            pixDestroy(&pix);
-            return false;
-        }
+  // At this point we are officially in autodection territory.
+  // That means any data in stdin must be buffered, to make it
+  // seekable.
+  std::string buf;
+  const l_uint8 *data = nullptr;
+  if (stdInput) {
+    buf.assign((std::istreambuf_iterator<char>(std::cin)),
+               (std::istreambuf_iterator<char>()));
+    data = reinterpret_cast<const l_uint8 *>(buf.data());
+  } else {
+    // Check whether the input file can be read.
+    if (FILE* file = fopen(filename, "rb")) {
+      fclose(file);
+    } else {
+      fprintf(stderr, "Error, cannot read input file %s: %s\n",
+              filename, strerror(errno));
+      return false;
+    }
+  }
 
-        // Produce output
-        r = (tiff) ?
-            ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
-                                      timeout_millisec, renderer,
-                                      tesseract_->tessedit_page_number) :
-            ProcessPage(pix, 0, filename, retry_config,
-                        timeout_millisec, renderer);
+  // Here is our autodetection
+  int format;
+  int r = (stdInput) ?
+      findFileFormatBuffer(data, &format) :
+      findFileFormat(filename, &format);
+
+  // Maybe we have a filelist
+  if (r != 0 || format == IFF_UNKNOWN) {
+    STRING s;
+    if (stdInput) {
+      s = buf.c_str();
+    } else {
+      std::ifstream t(filename);
+      std::string u((std::istreambuf_iterator<char>(t)),
+                    std::istreambuf_iterator<char>());
+      s = u.c_str();
+    }
+    return ProcessPagesFileList(nullptr, &s, retry_config,
+                                timeout_millisec, renderer,
+                                tesseract_->tessedit_page_number);
+  }
 
-        // Clean up memory as needed
-        pixDestroy(&pix);
+  // Maybe we have a TIFF which is potentially multipage
+  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
+               format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
+               format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
+               format == IFF_TIFF_ZIP);
 
-        // End the output
-        if (!r || (renderer && !renderer->EndDocument())) {
-            return false;
-        }
-        PERF_COUNT_END
-        return true;
+  // Fail early if we can, before producing any output
+  Pix *pix = nullptr;
+  if (!tiff) {
+    pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
+    if (pix == nullptr) {
+      return false;
     }
+  }
 
-    bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
-                                  const char* retry_config, int timeout_millisec,
-                                  TessResultRenderer* renderer) {
-        PERF_COUNT_START("ProcessPage")
-        SetInputName(filename);
-        SetImage(pix);
-        bool failed = false;
+  // Begin the output
+  if (renderer && !renderer->BeginDocument(unknown_title_)) {
+    pixDestroy(&pix);
+    return false;
+  }
 
-        if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
-            // Disabled character recognition
-            PageIterator* it = AnalyseLayout();
+  // Produce output
+  r = (tiff) ?
+      ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
+                                timeout_millisec, renderer,
+                                tesseract_->tessedit_page_number) :
+      ProcessPage(pix, 0, filename, retry_config,
+                  timeout_millisec, renderer);
 
-            if (it == nullptr) {
-                failed = true;
-            } else {
-                delete it;
-            }
-        } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
-            failed = FindLines() != 0;
-        } else if (timeout_millisec > 0) {
-            // Running with a timeout.
-            ETEXT_DESC monitor;
-            monitor.cancel = nullptr;
-            monitor.cancel_this = nullptr;
-            monitor.set_deadline_msecs(timeout_millisec);
-
-            // Now run the main recognition.
-            failed = Recognize(&monitor) < 0;
-        } else {
-            // Normal layout and character recognition with no timeout.
-            failed = Recognize(nullptr) < 0;
-        }
+  // Clean up memory as needed
+  pixDestroy(&pix);
 
-        if (tesseract_->tessedit_write_images) {
+  // End the output
+  if (!r || (renderer && !renderer->EndDocument())) {
+    return false;
+  }
+  PERF_COUNT_END
+  return true;
+}
+
+bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
+                              const char* retry_config, int timeout_millisec,
+                              TessResultRenderer* renderer) {
+  PERF_COUNT_START("ProcessPage")
+  SetInputName(filename);
+  SetImage(pix);
+  bool failed = false;
+
+  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
+    // Disabled character recognition
+    PageIterator* it = AnalyseLayout();
+
+    if (it == nullptr) {
+      failed = true;
+    } else {
+      delete it;
+    }
+  } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
+    failed = FindLines() != 0;
+  } else if (timeout_millisec > 0) {
+    // Running with a timeout.
+    ETEXT_DESC monitor;
+    monitor.cancel = nullptr;
+    monitor.cancel_this = nullptr;
+    monitor.set_deadline_msecs(timeout_millisec);
+
+    // Now run the main recognition.
+    failed = Recognize(&monitor) < 0;
+  } else {
+    // Normal layout and character recognition with no timeout.
+    failed = Recognize(nullptr) < 0;
+  }
+
+  if (tesseract_->tessedit_write_images) {
 #ifndef ANDROID_BUILD
-            Pix* page_pix = GetThresholdedImage();
-            pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
+    Pix* page_pix = GetThresholdedImage();
+    pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
 #endif  // ANDROID_BUILD
-        }
+  }
 
-        if (failed && retry_config != nullptr && retry_config[0] != '\0') {
-            // Save current config variables before switching modes.
-            FILE* fp = fopen(kOldVarsFile, "wb");
-            if (fp == nullptr) {
-                tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
-            } else {
-                PrintVariables(fp);
-                fclose(fp);
-            }
-            // Switch to alternate mode for retry.
-            ReadConfigFile(retry_config);
-            SetImage(pix);
-            Recognize(nullptr);
-            // Restore saved config variables.
-            ReadConfigFile(kOldVarsFile);
-        }
+  if (failed && retry_config != nullptr && retry_config[0] != '\0') {
+    // Save current config variables before switching modes.
+    FILE* fp = fopen(kOldVarsFile, "wb");
+    if (fp == nullptr) {
+      tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile);
+    } else {
+      PrintVariables(fp);
+      fclose(fp);
+    }
+    // Switch to alternate mode for retry.
+    ReadConfigFile(retry_config);
+    SetImage(pix);
+    Recognize(nullptr);
+    // Restore saved config variables.
+    ReadConfigFile(kOldVarsFile);
+  }
 
-        if (renderer && !failed) {
-            failed = !renderer->AddImage(this);
-        }
+  if (renderer && !failed) {
+    failed = !renderer->AddImage(this);
+  }
 
-        PERF_COUNT_END
-        return !failed;
-    }
+  PERF_COUNT_END
+  return !failed;
+}
 
 /**
  * Get a left-to-right iterator to the results of LayoutAnalysis and/or
  * Recognize. The returned iterator must be deleted after use.
  */
-    LTRResultIterator* TessBaseAPI::GetLTRIterator() {
-        if (tesseract_ == nullptr || page_res_ == nullptr)
-            return nullptr;
-        return new LTRResultIterator(
-                page_res_, tesseract_,
-                thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
-                rect_left_, rect_top_, rect_width_, rect_height_);
-    }
+LTRResultIterator* TessBaseAPI::GetLTRIterator() {
+  if (tesseract_ == nullptr || page_res_ == nullptr)
+    return nullptr;
+  return new LTRResultIterator(
+      page_res_, tesseract_,
+      thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
+      rect_left_, rect_top_, rect_width_, rect_height_);
+}
 
 /**
  * Get a reading-order iterator to the results of LayoutAnalysis and/or
@@ -1296,14 +1296,14 @@ namespace tesseract {
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-    ResultIterator* TessBaseAPI::GetIterator() {
-        if (tesseract_ == nullptr || page_res_ == nullptr)
-            return nullptr;
-        return ResultIterator::StartOfParagraph(LTRResultIterator(
-                page_res_, tesseract_,
-                thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
-                rect_left_, rect_top_, rect_width_, rect_height_));
-    }
+ResultIterator* TessBaseAPI::GetIterator() {
+  if (tesseract_ == nullptr || page_res_ == nullptr)
+    return nullptr;
+  return ResultIterator::StartOfParagraph(LTRResultIterator(
+      page_res_, tesseract_,
+      thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
+      rect_left_, rect_top_, rect_width_, rect_height_));
+}
 
 /**
  * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
@@ -1313,45 +1313,45 @@ namespace tesseract {
  * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
  * DetectOS, or anything else that changes the internal PAGE_RES.
  */
-    MutableIterator* TessBaseAPI::GetMutableIterator() {
-        if (tesseract_ == nullptr || page_res_ == nullptr)
-            return nullptr;
-        return new MutableIterator(page_res_, tesseract_,
-                                   thresholder_->GetScaleFactor(),
-                                   thresholder_->GetScaledYResolution(),
-                                   rect_left_, rect_top_, rect_width_, rect_height_);
-    }
+MutableIterator* TessBaseAPI::GetMutableIterator() {
+  if (tesseract_ == nullptr || page_res_ == nullptr)
+    return nullptr;
+  return new MutableIterator(page_res_, tesseract_,
+                             thresholder_->GetScaleFactor(),
+                             thresholder_->GetScaledYResolution(),
+                             rect_left_, rect_top_, rect_width_, rect_height_);
+}
 
 /** Make a text string from the internal data structures. */
-    char* TessBaseAPI::GetUTF8Text() {
-        if (tesseract_ == nullptr ||
-            (!recognition_done_ && Recognize(nullptr) < 0))
-            return nullptr;
-        STRING text("");
-        ResultIterator *it = GetIterator();
-        do {
-            if (it->Empty(RIL_PARA)) continue;
-            const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
-            text += para_text.get();
-        } while (it->Next(RIL_PARA));
-        char* result = new char[text.length() + 1];
-        strncpy(result, text.string(), text.length() + 1);
-        delete it;
-        return result;
-    }
+char* TessBaseAPI::GetUTF8Text() {
+  if (tesseract_ == nullptr ||
+      (!recognition_done_ && Recognize(nullptr) < 0))
+    return nullptr;
+  STRING text("");
+  ResultIterator *it = GetIterator();
+  do {
+    if (it->Empty(RIL_PARA)) continue;
+    const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
+    text += para_text.get();
+  } while (it->Next(RIL_PARA));
+  char* result = new char[text.length() + 1];
+  strncpy(result, text.string(), text.length() + 1);
+  delete it;
+  return result;
+}
 
 /**
  * Gets the block orientation at the current iterator position.
  */
-    static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
-        tesseract::Orientation orientation;
-        tesseract::WritingDirection writing_direction;
-        tesseract::TextlineOrder textline_order;
-        float deskew_angle;
-        it->Orientation(&orientation, &writing_direction, &textline_order,
-                        &deskew_angle);
-        return orientation;
-    }
+static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
+  tesseract::Orientation orientation;
+  tesseract::WritingDirection writing_direction;
+  tesseract::TextlineOrder textline_order;
+  float deskew_angle;
+  it->Orientation(&orientation, &writing_direction, &textline_order,
+                  &deskew_angle);
+  return orientation;
+}
 
 /**
  * Fits a line to the baseline at the given level, and appends its coefficients
@@ -1361,104 +1361,104 @@ namespace tesseract {
  * method currently only inserts a 'textangle' property to indicate the rotation
  * direction and does not add any baseline information to the hocr string.
  */
-    static void AddBaselineCoordsTohOCR(const PageIterator *it,
-                                        PageIteratorLevel level,
-                                        STRING* hocr_str) {
-        tesseract::Orientation orientation = GetBlockTextOrientation(it);
-        if (orientation != ORIENTATION_PAGE_UP) {
-            hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
-            return;
-        }
-
-        int left, top, right, bottom;
-        it->BoundingBox(level, &left, &top, &right, &bottom);
-
-        // Try to get the baseline coordinates at this level.
-        int x1, y1, x2, y2;
-        if (!it->Baseline(level, &x1, &y1, &x2, &y2))
-            return;
-        // Following the description of this field of the hOCR spec, we convert the
-        // baseline coordinates so that "the bottom left of the bounding box is the
-        // origin".
-        x1 -= left;
-        x2 -= left;
-        y1 -= bottom;
-        y2 -= bottom;
-
-        // Now fit a line through the points so we can extract coefficients for the
-        // equation:  y = p1 x + p0
-        double p1 = 0;
-        double p0 = 0;
-        if (x1 == x2) {
-            // Problem computing the polynomial coefficients.
-            return;
-        }
-        p1 = (y2 - y1) / static_cast<double>(x2 - x1);
-        p0 = y1 - static_cast<double>(p1 * x1);
-
-        hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
-        hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
-    }
-
-    static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
-                            int num2) {
-        const size_t BUFSIZE = 64;
-        char id_buffer[BUFSIZE];
-        if (num2 >= 0) {
-            snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
-        } else {
-            snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
-        }
-        id_buffer[BUFSIZE - 1] = '\0';
-        *hocr_str += " id='";
-        *hocr_str += id_buffer;
-        *hocr_str += "'";
-    }
-
-    static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
-                            int num2, int num3) {
-        const size_t BUFSIZE = 64;
-        char id_buffer[BUFSIZE];
-        snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
-        id_buffer[BUFSIZE - 1] = '\0';
-        *hocr_str += " id='";
-        *hocr_str += id_buffer;
-        *hocr_str += "'";
-    }
-
-    static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
-                             STRING* hocr_str) {
-        int left, top, right, bottom;
-        it->BoundingBox(level, &left, &top, &right, &bottom);
-        // This is the only place we use double quotes instead of single quotes,
-        // but it may too late to change for consistency
-        hocr_str->add_str_int(" title=\"bbox ", left);
-        hocr_str->add_str_int(" ", top);
-        hocr_str->add_str_int(" ", right);
-        hocr_str->add_str_int(" ", bottom);
-        // Add baseline coordinates & heights for textlines only.
-        if (level == RIL_TEXTLINE) {
-            AddBaselineCoordsTohOCR(it, level, hocr_str);
-            // add custom height measures
-            float row_height, descenders, ascenders;  // row attributes
-            it->RowAttributes(&row_height, &descenders, &ascenders);
-            // TODO(rays): Do we want to limit these to a single decimal place?
-            hocr_str->add_str_double("; x_size ", row_height);
-            hocr_str->add_str_double("; x_descenders ", descenders * -1);
-            hocr_str->add_str_double("; x_ascenders ", ascenders);
-        }
-        *hocr_str += "\">";
-    }
+static void AddBaselineCoordsTohOCR(const PageIterator *it,
+                                    PageIteratorLevel level,
+                                    STRING* hocr_str) {
+  tesseract::Orientation orientation = GetBlockTextOrientation(it);
+  if (orientation != ORIENTATION_PAGE_UP) {
+    hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
+    return;
+  }
 
-    static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
-                            STRING* hocr_str) {
-        int left, top, right, bottom;
-        it->BoundingBox(level, &left, &top, &right, &bottom);
-        hocr_str->add_str_int("\t", left);
-        hocr_str->add_str_int("\t", top);
-        hocr_str->add_str_int("\t", right - left);
-        hocr_str->add_str_int("\t", bottom - top);
-    }
+  int left, top, right, bottom;
+  it->BoundingBox(level, &left, &top, &right, &bottom);
+
+  // Try to get the baseline coordinates at this level.
+  int x1, y1, x2, y2;
+  if (!it->Baseline(level, &x1, &y1, &x2, &y2))
+    return;
+  // Following the description of this field of the hOCR spec, we convert the
+  // baseline coordinates so that "the bottom left of the bounding box is the
+  // origin".
+  x1 -= left;
+  x2 -= left;
+  y1 -= bottom;
+  y2 -= bottom;
+
+  // Now fit a line through the points so we can extract coefficients for the
+  // equation:  y = p1 x + p0
+  double p1 = 0;
+  double p0 = 0;
+  if (x1 == x2) {
+    // Problem computing the polynomial coefficients.
+    return;
+  }
+  p1 = (y2 - y1) / static_cast<double>(x2 - x1);
+  p0 = y1 - static_cast<double>(p1 * x1);
+
+  hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
+  hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
+}
+
+static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
+                        int num2) {
+  const size_t BUFSIZE = 64;
+  char id_buffer[BUFSIZE];
+  if (num2 >= 0) {
+    snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
+  } else {
+    snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
+  }
+  id_buffer[BUFSIZE - 1] = '\0';
+  *hocr_str += " id='";
+  *hocr_str += id_buffer;
+  *hocr_str += "'";
+}
+
+static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
+  int num2, int num3) {
+  const size_t BUFSIZE = 64;
+  char id_buffer[BUFSIZE];
+  snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d_%d", base.c_str(), num1, num2,num3);
+  id_buffer[BUFSIZE - 1] = '\0';
+  *hocr_str += " id='";
+  *hocr_str += id_buffer;
+  *hocr_str += "'";
+}
+
+static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
+                         STRING* hocr_str) {
+  int left, top, right, bottom;
+  it->BoundingBox(level, &left, &top, &right, &bottom);
+  // This is the only place we use double quotes instead of single quotes,
+  // but it may too late to change for consistency
+  hocr_str->add_str_int(" title=\"bbox ", left);
+  hocr_str->add_str_int(" ", top);
+  hocr_str->add_str_int(" ", right);
+  hocr_str->add_str_int(" ", bottom);
+  // Add baseline coordinates & heights for textlines only.
+  if (level == RIL_TEXTLINE) {
+    AddBaselineCoordsTohOCR(it, level, hocr_str);
+    // add custom height measures
+    float row_height, descenders, ascenders;  // row attributes
+    it->RowAttributes(&row_height, &descenders, &ascenders);
+    // TODO(rays): Do we want to limit these to a single decimal place?
+    hocr_str->add_str_double("; x_size ", row_height);
+    hocr_str->add_str_double("; x_descenders ", descenders * -1);
+    hocr_str->add_str_double("; x_ascenders ", ascenders);
+  }
+  *hocr_str += "\">";
+}
+
+static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
+                        STRING* hocr_str) {
+  int left, top, right, bottom;
+  it->BoundingBox(level, &left, &top, &right, &bottom);
+  hocr_str->add_str_int("\t", left);
+  hocr_str->add_str_int("\t", top);
+  hocr_str->add_str_int("\t", right - left);
+  hocr_str->add_str_int("\t", bottom - top);
+}
 
 /**
  * Make a HTML-formatted string with hOCR markup from the internal
@@ -1469,9 +1469,9 @@ namespace tesseract {
  * STL removed from original patch submission and refactored by rays.
  * Returned string must be freed with the delete [] operator.
  */
-    char* TessBaseAPI::GetHOCRText(int page_number) {
-        return GetHOCRText(nullptr, page_number);
-    }
+char* TessBaseAPI::GetHOCRText(int page_number) {
+  return GetHOCRText(nullptr, page_number);
+}
 
 /**
  * Make a HTML-formatted string with hOCR markup from the internal
@@ -1482,24 +1482,24 @@ namespace tesseract {
  * STL removed from original patch submission and refactored by rays.
  * Returned string must be freed with the delete [] operator.
  */
-    char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
-        if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
-            return nullptr;
+char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
+  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
+    return nullptr;
 
-        int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
-        int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
-        bool para_is_ltr = true;        // Default direction is LTR
-        const char* paragraph_lang = nullptr;
-        bool font_info = false;
-        GetBoolVariable("hocr_font_info", &font_info);
+  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
+  int page_id = page_number + 1;  // hOCR uses 1-based page numbers.
+  bool para_is_ltr = true;        // Default direction is LTR
+  const char* paragraph_lang = nullptr;
+  bool font_info = false;
+  GetBoolVariable("hocr_font_info", &font_info);
 
-        STRING hocr_str("");
+  STRING hocr_str("");
 
-        if (input_file_ == nullptr)
-            SetInputName(nullptr);
+  if (input_file_ == nullptr)
+      SetInputName(nullptr);
 
 #ifdef _WIN32
-        // convert input name from ANSI encoding to utf-8
+  // convert input name from ANSI encoding to utf-8
   int str16_len =
       MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
   wchar_t *uni16_str = new WCHAR[str16_len];
@@ -1515,325 +1515,325 @@ namespace tesseract {
   delete[] utf8_str;
 #endif
 
-        hocr_str += "  <div class='ocr_page'";
-        AddIdTohOCR(&hocr_str, "page", page_id, -1);
-        hocr_str += " title='image \"";
-        if (input_file_) {
-            hocr_str += HOcrEscape(input_file_->string());
-        } else {
-            hocr_str += "unknown";
-        }
-        hocr_str.add_str_int("\"; bbox ", rect_left_);
-        hocr_str.add_str_int(" ", rect_top_);
-        hocr_str.add_str_int(" ", rect_width_);
-        hocr_str.add_str_int(" ", rect_height_);
-        hocr_str.add_str_int("; ppageno ", page_number);
-        hocr_str += "'>\n";
-
-        ResultIterator *res_it = GetIterator();
-        while (!res_it->Empty(RIL_BLOCK)) {
-            if (res_it->Empty(RIL_WORD)) {
-                res_it->Next(RIL_WORD);
-                continue;
-            }
-
-            // Open any new block/paragraph/textline.
-            if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-                para_is_ltr = true;  // reset to default direction
-                hocr_str += "   <div class='ocr_carea'";
-                AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
-                AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
-            }
-            if (res_it->IsAtBeginningOf(RIL_PARA)) {
-                hocr_str += "\n    <p class='ocr_par'";
-                para_is_ltr = res_it->ParagraphIsLtr();
-                if (!para_is_ltr) {
-                    hocr_str += " dir='rtl'";
-                }
-                AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
-                paragraph_lang = res_it->WordRecognitionLanguage();
-                if (paragraph_lang) {
-                    hocr_str += " lang='";
-                    hocr_str += paragraph_lang;
-                    hocr_str += "'";
-                }
-                AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
-            }
-            if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-                hocr_str += "\n     <span class='ocr_line'";
-                AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
-                AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
-            }
-
-            // Now, process the word...
-            std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
-            if (tesseract_->lstm_choice_mode) {
-                confidencemap = res_it->GetBestLSTMSymbolChoices();
-            }
-            hocr_str += "\n      <span class='ocrx_word'";
-            AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
-            int left, top, right, bottom;
-            bool bold, italic, underlined, monospace, serif, smallcaps;
-            int pointsize, font_id;
-            const char *font_name;
-            res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-            font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
-                                                   &monospace, &serif, &smallcaps,
-                                                   &pointsize, &font_id);
-            hocr_str.add_str_int(" title='bbox ", left);
-            hocr_str.add_str_int(" ", top);
-            hocr_str.add_str_int(" ", right);
-            hocr_str.add_str_int(" ", bottom);
-            hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
-            if (font_info) {
-                if (font_name) {
-                    hocr_str += "; x_font ";
-                    hocr_str += HOcrEscape(font_name);
-                }
-                hocr_str.add_str_int("; x_fsize ", pointsize);
-            }
+  hocr_str += "  <div class='ocr_page'";
+  AddIdTohOCR(&hocr_str, "page", page_id, -1);
+  hocr_str += " title='image \"";
+  if (input_file_) {
+    hocr_str += HOcrEscape(input_file_->string());
+  } else {
+    hocr_str += "unknown";
+  }
+  hocr_str.add_str_int("\"; bbox ", rect_left_);
+  hocr_str.add_str_int(" ", rect_top_);
+  hocr_str.add_str_int(" ", rect_width_);
+  hocr_str.add_str_int(" ", rect_height_);
+  hocr_str.add_str_int("; ppageno ", page_number);
+  hocr_str += "'>\n";
+
+  ResultIterator *res_it = GetIterator();
+  while (!res_it->Empty(RIL_BLOCK)) {
+    if (res_it->Empty(RIL_WORD)) {
+      res_it->Next(RIL_WORD);
+      continue;
+    }
+
+    // Open any new block/paragraph/textline.
+    if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+      para_is_ltr = true;  // reset to default direction
+      hocr_str += "   <div class='ocr_carea'";
+      AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
+      AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
+    }
+    if (res_it->IsAtBeginningOf(RIL_PARA)) {
+      hocr_str += "\n    <p class='ocr_par'";
+      para_is_ltr = res_it->ParagraphIsLtr();
+      if (!para_is_ltr) {
+        hocr_str += " dir='rtl'";
+      }
+      AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
+      paragraph_lang = res_it->WordRecognitionLanguage();
+      if (paragraph_lang) {
+        hocr_str += " lang='";
+        hocr_str += paragraph_lang;
+        hocr_str += "'";
+      }
+      AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
+    }
+    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+      hocr_str += "\n     <span class='ocr_line'";
+      AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
+      AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
+    }
+
+    // Now, process the word...
+    std::vector<std::vector<std::pair<const char*, float>>>* confidencemap = nullptr;
+    if (tesseract_->lstm_choice_mode) {
+      confidencemap = res_it->GetBestLSTMSymbolChoices();
+    }
+    hocr_str += "\n      <span class='ocrx_word'";
+    AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
+    int left, top, right, bottom;
+    bool bold, italic, underlined, monospace, serif, smallcaps;
+    int pointsize, font_id;
+    const char *font_name;
+    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
+    font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
+                                           &monospace, &serif, &smallcaps,
+                                           &pointsize, &font_id);
+    hocr_str.add_str_int(" title='bbox ", left);
+    hocr_str.add_str_int(" ", top);
+    hocr_str.add_str_int(" ", right);
+    hocr_str.add_str_int(" ", bottom);
+    hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
+    if (font_info) {
+      if (font_name) {
+        hocr_str += "; x_font ";
+        hocr_str += HOcrEscape(font_name);
+      }
+      hocr_str.add_str_int("; x_fsize ", pointsize);
+    }
+    hocr_str += "'";
+    const char* lang = res_it->WordRecognitionLanguage();
+    if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
+      hocr_str += " lang='";
+      hocr_str += lang;
+      hocr_str += "'";
+    }
+    switch (res_it->WordDirection()) {
+      // Only emit direction if different from current paragraph direction
+      case DIR_LEFT_TO_RIGHT:
+        if (!para_is_ltr) hocr_str += " dir='ltr'";
+        break;
+      case DIR_RIGHT_TO_LEFT:
+        if (para_is_ltr) hocr_str += " dir='rtl'";
+        break;
+      case DIR_MIX:
+      case DIR_NEUTRAL:
+      default:  // Do nothing.
+        break;
+    }
+    hocr_str += ">";
+    bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
+    bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
+    bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
+    if (bold) hocr_str += "<strong>";
+    if (italic) hocr_str += "<em>";
+    do {
+      const std::unique_ptr<const char[]> grapheme(
+          res_it->GetUTF8Text(RIL_SYMBOL));
+      if (grapheme && grapheme[0] != 0) {
+        hocr_str += HOcrEscape(grapheme.get());
+      }
+      res_it->Next(RIL_SYMBOL);
+    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+    if (italic) hocr_str += "</em>";
+    if (bold) hocr_str += "</strong>";
+    // If the lstm choice mode is required it is added here
+    if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
+      for (size_t i = 0; i < confidencemap->size(); i++) {
+        hocr_str += "\n       <span class='ocrx_cinfo'";
+        AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
+        hocr_str += ">";
+        std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
+        for (std::pair<const char*, float> conf : timestep) {
+          hocr_str += "<span class='ocr_glyph'";
+          AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
+          hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
+          hocr_str += "'";
+          hocr_str += ">";
+          hocr_str += conf.first;
+          hocr_str += "</span>";
+          gcnt++;
+        }
+        hocr_str += "</span>";
+        tcnt++;
+      }
+    } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
+      for (size_t i = 0; i < confidencemap->size(); i++) {
+        std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
+        if (timestep.size() > 0) {
+          hocr_str += "\n       <span class='ocrx_cinfo'";
+          AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
+          hocr_str += " chosen='";
+          hocr_str += timestep[0].first;
+          hocr_str += "'>";
+          for (size_t j = 1; j < timestep.size(); j++) {
+            hocr_str += "<span class='ocr_glyph'";
+            AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
+            hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
             hocr_str += "'";
-            const char* lang = res_it->WordRecognitionLanguage();
-            if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
-                hocr_str += " lang='";
-                hocr_str += lang;
-                hocr_str += "'";
-            }
-            switch (res_it->WordDirection()) {
-                // Only emit direction if different from current paragraph direction
-                case DIR_LEFT_TO_RIGHT:
-                    if (!para_is_ltr) hocr_str += " dir='ltr'";
-                    break;
-                case DIR_RIGHT_TO_LEFT:
-                    if (para_is_ltr) hocr_str += " dir='rtl'";
-                    break;
-                case DIR_MIX:
-                case DIR_NEUTRAL:
-                default:  // Do nothing.
-                    break;
-            }
             hocr_str += ">";
-            bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
-            bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
-            bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
-            if (bold) hocr_str += "<strong>";
-            if (italic) hocr_str += "<em>";
-            do {
-                const std::unique_ptr<const char[]> grapheme(
-                        res_it->GetUTF8Text(RIL_SYMBOL));
-                if (grapheme && grapheme[0] != 0) {
-                    hocr_str += HOcrEscape(grapheme.get());
-                }
-                res_it->Next(RIL_SYMBOL);
-            } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-            if (italic) hocr_str += "</em>";
-            if (bold) hocr_str += "</strong>";
-            // If the lstm choice mode is required it is added here
-            if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
-                for (size_t i = 0; i < confidencemap->size(); i++) {
-                    hocr_str += "\n       <span class='ocrx_cinfo'";
-                    AddIdTohOCR(&hocr_str, "timestep", page_id, wcnt, tcnt);
-                    hocr_str += ">";
-                    std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
-                    for (std::pair<const char*, float> conf : timestep) {
-                        hocr_str += "<span class='ocr_glyph'";
-                        AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
-                        hocr_str.add_str_int(" title='x_confs ", int(conf.second * 100));
-                        hocr_str += "'";
-                        hocr_str += ">";
-                        hocr_str += conf.first;
-                        hocr_str += "</span>";
-                        gcnt++;
-                    }
-                    hocr_str += "</span>";
-                    tcnt++;
-                }
-            } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
-                for (size_t i = 0; i < confidencemap->size(); i++) {
-                    std::vector<std::pair<const char*, float>> timestep = (*confidencemap)[i];
-                    if (timestep.size() > 0) {
-                        hocr_str += "\n       <span class='ocrx_cinfo'";
-                        AddIdTohOCR(&hocr_str, "lstm_choices", page_id, wcnt, tcnt);
-                        hocr_str += " chosen='";
-                        hocr_str += timestep[0].first;
-                        hocr_str += "'>";
-                        for (size_t j = 1; j < timestep.size(); j++) {
-                            hocr_str += "<span class='ocr_glyph'";
-                            AddIdTohOCR(&hocr_str, "choice", page_id, wcnt, gcnt);
-                            hocr_str.add_str_int(" title='x_confs ", int(timestep[j].second * 100));
-                            hocr_str += "'";
-                            hocr_str += ">";
-                            hocr_str += timestep[j].first;
-                            hocr_str += "</span>";
-                            gcnt++;
-                        }
-                        hocr_str += "</span>";
-                        tcnt++;
-                    }
-                }
-            }
+            hocr_str += timestep[j].first;
             hocr_str += "</span>";
-            tcnt = 1;
-            gcnt = 1;
-            wcnt++;
-            // Close any ending block/paragraph/textline.
-            if (last_word_in_line) {
-                hocr_str += "\n     </span>";
-                lcnt++;
-            }
-            if (last_word_in_para) {
-                hocr_str += "\n    </p>\n";
-                pcnt++;
-                para_is_ltr = true;  // back to default direction
-            }
-            if (last_word_in_block) {
-                hocr_str += "   </div>\n";
-                bcnt++;
-            }
+            gcnt++;
+          }
+          hocr_str += "</span>";
+          tcnt++;
         }
-        hocr_str += "  </div>\n";
-
-        char *ret = new char[hocr_str.length() + 1];
-        strcpy(ret, hocr_str.string());
-        delete res_it;
-        return ret;
+      }
     }
+    hocr_str += "</span>";
+    tcnt = 1;
+    gcnt = 1;
+    wcnt++;
+    // Close any ending block/paragraph/textline.
+    if (last_word_in_line) {
+      hocr_str += "\n     </span>";
+      lcnt++;
+    }
+    if (last_word_in_para) {
+      hocr_str += "\n    </p>\n";
+      pcnt++;
+      para_is_ltr = true;  // back to default direction
+    }
+    if (last_word_in_block) {
+      hocr_str += "   </div>\n";
+      bcnt++;
+    }
+  }
+  hocr_str += "  </div>\n";
+
+  char *ret = new char[hocr_str.length() + 1];
+  strcpy(ret, hocr_str.string());
+  delete res_it;
+  return ret;
+}
 
 /**
  * Make a TSV-formatted string from the internal data structures.
  * page_number is 0-based but will appear in the output as 1-based.
  * Returned string must be freed with the delete [] operator.
  */
-    char* TessBaseAPI::GetTSVText(int page_number) {
-        if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
-            return nullptr;
-
-        int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
-        int page_id = page_number + 1;  // we use 1-based page numbers.
-
-        STRING tsv_str("");
-
-        int page_num = page_id;
-        int block_num = 0;
-        int par_num = 0;
-        int line_num = 0;
-        int word_num = 0;
-
-        tsv_str.add_str_int("1\t", page_num);  // level 1 - page
-        tsv_str.add_str_int("\t", block_num);
-        tsv_str.add_str_int("\t", par_num);
-        tsv_str.add_str_int("\t", line_num);
-        tsv_str.add_str_int("\t", word_num);
-        tsv_str.add_str_int("\t", rect_left_);
-        tsv_str.add_str_int("\t", rect_top_);
-        tsv_str.add_str_int("\t", rect_width_);
-        tsv_str.add_str_int("\t", rect_height_);
-        tsv_str += "\t-1\t\n";
-
-        ResultIterator* res_it = GetIterator();
-        while (!res_it->Empty(RIL_BLOCK)) {
-            if (res_it->Empty(RIL_WORD)) {
-                res_it->Next(RIL_WORD);
-                continue;
-            }
-
-            // Add rows for any new block/paragraph/textline.
-            if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-                block_num++;
-                par_num = 0;
-                line_num = 0;
-                word_num = 0;
-                tsv_str.add_str_int("2\t", page_num);  // level 2 - block
-                tsv_str.add_str_int("\t", block_num);
-                tsv_str.add_str_int("\t", par_num);
-                tsv_str.add_str_int("\t", line_num);
-                tsv_str.add_str_int("\t", word_num);
-                AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
-                tsv_str += "\t-1\t\n";  // end of row for block
-            }
-            if (res_it->IsAtBeginningOf(RIL_PARA)) {
-                par_num++;
-                line_num = 0;
-                word_num = 0;
-                tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
-                tsv_str.add_str_int("\t", block_num);
-                tsv_str.add_str_int("\t", par_num);
-                tsv_str.add_str_int("\t", line_num);
-                tsv_str.add_str_int("\t", word_num);
-                AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
-                tsv_str += "\t-1\t\n";  // end of row for para
-            }
-            if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-                line_num++;
-                word_num = 0;
-                tsv_str.add_str_int("4\t", page_num);  // level 4 - line
-                tsv_str.add_str_int("\t", block_num);
-                tsv_str.add_str_int("\t", par_num);
-                tsv_str.add_str_int("\t", line_num);
-                tsv_str.add_str_int("\t", word_num);
-                AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
-                tsv_str += "\t-1\t\n";  // end of row for line
-            }
-
-            // Now, process the word...
-            int left, top, right, bottom;
-            res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-            word_num++;
-            tsv_str.add_str_int("5\t", page_num);  // level 5 - word
-            tsv_str.add_str_int("\t", block_num);
-            tsv_str.add_str_int("\t", par_num);
-            tsv_str.add_str_int("\t", line_num);
-            tsv_str.add_str_int("\t", word_num);
-            tsv_str.add_str_int("\t", left);
-            tsv_str.add_str_int("\t", top);
-            tsv_str.add_str_int("\t", right - left);
-            tsv_str.add_str_int("\t", bottom - top);
-            tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
-            tsv_str += "\t";
-
-            // Increment counts if at end of block/paragraph/textline.
-            if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
-            if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
-            if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
-
-            do {
-                tsv_str +=
-                        std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
-                res_it->Next(RIL_SYMBOL);
-            } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
-            tsv_str += "\n";  // end of row
-            wcnt++;
-        }
+char* TessBaseAPI::GetTSVText(int page_number) {
+  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
+    return nullptr;
+
+  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
+  int page_id = page_number + 1;  // we use 1-based page numbers.
+
+  STRING tsv_str("");
+
+  int page_num = page_id;
+  int block_num = 0;
+  int par_num = 0;
+  int line_num = 0;
+  int word_num = 0;
+
+  tsv_str.add_str_int("1\t", page_num);  // level 1 - page
+  tsv_str.add_str_int("\t", block_num);
+  tsv_str.add_str_int("\t", par_num);
+  tsv_str.add_str_int("\t", line_num);
+  tsv_str.add_str_int("\t", word_num);
+  tsv_str.add_str_int("\t", rect_left_);
+  tsv_str.add_str_int("\t", rect_top_);
+  tsv_str.add_str_int("\t", rect_width_);
+  tsv_str.add_str_int("\t", rect_height_);
+  tsv_str += "\t-1\t\n";
+
+  ResultIterator* res_it = GetIterator();
+  while (!res_it->Empty(RIL_BLOCK)) {
+    if (res_it->Empty(RIL_WORD)) {
+      res_it->Next(RIL_WORD);
+      continue;
+    }
+
+    // Add rows for any new block/paragraph/textline.
+    if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
+      block_num++;
+      par_num = 0;
+      line_num = 0;
+      word_num = 0;
+      tsv_str.add_str_int("2\t", page_num);  // level 2 - block
+      tsv_str.add_str_int("\t", block_num);
+      tsv_str.add_str_int("\t", par_num);
+      tsv_str.add_str_int("\t", line_num);
+      tsv_str.add_str_int("\t", word_num);
+      AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
+      tsv_str += "\t-1\t\n";  // end of row for block
+    }
+    if (res_it->IsAtBeginningOf(RIL_PARA)) {
+      par_num++;
+      line_num = 0;
+      word_num = 0;
+      tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
+      tsv_str.add_str_int("\t", block_num);
+      tsv_str.add_str_int("\t", par_num);
+      tsv_str.add_str_int("\t", line_num);
+      tsv_str.add_str_int("\t", word_num);
+      AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
+      tsv_str += "\t-1\t\n";  // end of row for para
+    }
+    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
+      line_num++;
+      word_num = 0;
+      tsv_str.add_str_int("4\t", page_num);  // level 4 - line
+      tsv_str.add_str_int("\t", block_num);
+      tsv_str.add_str_int("\t", par_num);
+      tsv_str.add_str_int("\t", line_num);
+      tsv_str.add_str_int("\t", word_num);
+      AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
+      tsv_str += "\t-1\t\n";  // end of row for line
+    }
+
+    // Now, process the word...
+    int left, top, right, bottom;
+    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
+    word_num++;
+    tsv_str.add_str_int("5\t", page_num);  // level 5 - word
+    tsv_str.add_str_int("\t", block_num);
+    tsv_str.add_str_int("\t", par_num);
+    tsv_str.add_str_int("\t", line_num);
+    tsv_str.add_str_int("\t", word_num);
+    tsv_str.add_str_int("\t", left);
+    tsv_str.add_str_int("\t", top);
+    tsv_str.add_str_int("\t", right - left);
+    tsv_str.add_str_int("\t", bottom - top);
+    tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
+    tsv_str += "\t";
+
+    // Increment counts if at end of block/paragraph/textline.
+    if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
+    if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
+    if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
+
+    do {
+      tsv_str +=
+          std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
+      res_it->Next(RIL_SYMBOL);
+    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
+    tsv_str += "\n";  // end of row
+    wcnt++;
+  }
 
-        char* ret = new char[tsv_str.length() + 1];
-        strcpy(ret, tsv_str.string());
-        delete res_it;
-        return ret;
-    }
+  char* ret = new char[tsv_str.length() + 1];
+  strcpy(ret, tsv_str.string());
+  delete res_it;
+  return ret;
+}
 
 /** The 5 numbers output for each box (the usual 4 and a page number.) */
-    const int kNumbersPerBlob = 5;
+const int kNumbersPerBlob = 5;
 /**
  * The number of bytes taken by each number. Since we use int16_t for ICOORD,
  * assume only 5 digits max.
  */
-    const int kBytesPerNumber = 5;
+const int kBytesPerNumber = 5;
 /**
  * Multiplier for max expected textlength assumes (kBytesPerNumber + space)
  * * kNumbersPerBlob plus the newline. Add to this the
  * original UTF8 characters, and one kMaxBytesPerLine for safety.
  */
-    const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1;
+const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1;
 /** Max bytes in the decimal representation of int64_t. */
-    const int kBytesPer64BitNumber = 20;
+const int kBytesPer64BitNumber = 20;
 /**
  * A maximal single box could occupy kNumbersPerBlob numbers at
  * kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a
  * space plus the newline and the maximum length of a UNICHAR.
  * Test against this on each iteration for safety.
  */
-    const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
-                                 UNICHAR_LEN;
+const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
+    UNICHAR_LEN;
 
 /**
  * The recognized text is returned as a char* which is coded
@@ -1841,160 +1841,160 @@ namespace tesseract {
  * page_number is a 0-base page index that will appear in the box file.
  * Returned string must be freed with the delete [] operator.
  */
-    char* TessBaseAPI::GetBoxText(int page_number) {
-        if (tesseract_ == nullptr ||
-            (!recognition_done_ && Recognize(nullptr) < 0))
-            return nullptr;
-        int blob_count;
-        int utf8_length = TextLength(&blob_count);
-        int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
-                           kMaxBytesPerLine;
-        char* result = new char[total_length];
-        result[0] = '\0';
-        int output_length = 0;
-        LTRResultIterator* it = GetLTRIterator();
-        do {
-            int left, top, right, bottom;
-            if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
-                const std::unique_ptr</*non-const*/ char[]> text(
-                        it->GetUTF8Text(RIL_SYMBOL));
-                // Tesseract uses space for recognition failure. Fix to a reject
-                // character, kTesseractReject so we don't create illegal box files.
-                for (int i = 0; text[i] != '\0'; ++i) {
-                    if (text[i] == ' ')
-                        text[i] = kTesseractReject;
-                }
-                snprintf(result + output_length, total_length - output_length,
-                         "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
-                         right, image_height_ - top, page_number);
-                output_length += strlen(result + output_length);
-                // Just in case...
-                if (output_length + kMaxBytesPerLine > total_length)
-                    break;
-            }
-        } while (it->Next(RIL_SYMBOL));
-        delete it;
-        return result;
-    }
+char* TessBaseAPI::GetBoxText(int page_number) {
+  if (tesseract_ == nullptr ||
+      (!recognition_done_ && Recognize(nullptr) < 0))
+    return nullptr;
+  int blob_count;
+  int utf8_length = TextLength(&blob_count);
+  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
+      kMaxBytesPerLine;
+  char* result = new char[total_length];
+  result[0] = '\0';
+  int output_length = 0;
+  LTRResultIterator* it = GetLTRIterator();
+  do {
+    int left, top, right, bottom;
+    if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
+      const std::unique_ptr</*non-const*/ char[]> text(
+          it->GetUTF8Text(RIL_SYMBOL));
+      // Tesseract uses space for recognition failure. Fix to a reject
+      // character, kTesseractReject so we don't create illegal box files.
+      for (int i = 0; text[i] != '\0'; ++i) {
+        if (text[i] == ' ')
+          text[i] = kTesseractReject;
+      }
+      snprintf(result + output_length, total_length - output_length,
+               "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom,
+               right, image_height_ - top, page_number);
+      output_length += strlen(result + output_length);
+      // Just in case...
+      if (output_length + kMaxBytesPerLine > total_length)
+        break;
+    }
+  } while (it->Next(RIL_SYMBOL));
+  delete it;
+  return result;
+}
 
 /**
  * Conversion table for non-latin characters.
  * Maps characters out of the latin set into the latin set.
  * TODO(rays) incorporate this translation into unicharset.
  */
-    const int kUniChs[] = {
-            0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
-    };
+const int kUniChs[] = {
+  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
+};
 /** Latin chars corresponding to the unicode chars above. */
-    const int kLatinChs[] = {
-            0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
-    };
+const int kLatinChs[] = {
+  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
+};
 
 /**
  * The recognized text is returned as a char* which is coded
  * as UNLV format Latin-1 with specific reject and suspect codes.
  * Returned string must be freed with the delete [] operator.
  */
-    char* TessBaseAPI::GetUNLVText() {
-        if (tesseract_ == nullptr ||
-            (!recognition_done_ && Recognize(nullptr) < 0))
-            return nullptr;
-        bool tilde_crunch_written = false;
-        bool last_char_was_newline = true;
-        bool last_char_was_tilde = false;
-
-        int total_length = TextLength(nullptr);
-        PAGE_RES_IT   page_res_it(page_res_);
-        char* result = new char[total_length];
-        char* ptr = result;
-        for (page_res_it.restart_page(); page_res_it.word () != nullptr;
-             page_res_it.forward()) {
-            WERD_RES *word = page_res_it.word();
-            // Process the current word.
-            if (word->unlv_crunch_mode != CR_NONE) {
-                if (word->unlv_crunch_mode != CR_DELETE &&
-                    (!tilde_crunch_written ||
-                     (word->unlv_crunch_mode == CR_KEEP_SPACE &&
-                      word->word->space() > 0 &&
-                      !word->word->flag(W_FUZZY_NON) &&
-                      !word->word->flag(W_FUZZY_SP)))) {
-                    if (!word->word->flag(W_BOL) &&
-                        word->word->space() > 0 &&
-                        !word->word->flag(W_FUZZY_NON) &&
-                        !word->word->flag(W_FUZZY_SP)) {
-                        /* Write a space to separate from preceding good text */
-                        *ptr++ = ' ';
-                        last_char_was_tilde = false;
-                    }
-                    if (!last_char_was_tilde) {
-                        // Write a reject char.
-                        last_char_was_tilde = true;
-                        *ptr++ = kUNLVReject;
-                        tilde_crunch_written = true;
-                        last_char_was_newline = false;
-                    }
-                }
-            } else {
-                // NORMAL PROCESSING of non tilde crunched words.
-                tilde_crunch_written = false;
-                tesseract_->set_unlv_suspects(word);
-                const char* wordstr = word->best_choice->unichar_string().string();
-                const STRING& lengths = word->best_choice->unichar_lengths();
-                int length = lengths.length();
-                int i = 0;
-                int offset = 0;
-
-                if (last_char_was_tilde &&
-                    word->word->space() == 0 && wordstr[offset] == ' ') {
-                    // Prevent adjacent tilde across words - we know that adjacent tildes
-                    // within words have been removed.
-                    // Skip the first character.
-                    offset = lengths[i++];
-                }
-                if (i < length && wordstr[offset] != 0) {
-                    if (!last_char_was_newline)
-                        *ptr++ = ' ';
-                    else
-                        last_char_was_newline = false;
-                    for (; i < length; offset += lengths[i++]) {
-                        if (wordstr[offset] == ' ' ||
-                            wordstr[offset] == kTesseractReject) {
-                            *ptr++ = kUNLVReject;
-                            last_char_was_tilde = true;
-                        } else {
-                            if (word->reject_map[i].rejected())
-                                *ptr++ = kUNLVSuspect;
-                            UNICHAR ch(wordstr + offset, lengths[i]);
-                            int uni_ch = ch.first_uni();
-                            for (int j = 0; kUniChs[j] != 0; ++j) {
-                                if (kUniChs[j] == uni_ch) {
-                                    uni_ch = kLatinChs[j];
-                                    break;
-                                }
-                            }
-                            if (uni_ch <= 0xff) {
-                                *ptr++ = static_cast<char>(uni_ch);
-                                last_char_was_tilde = false;
-                            } else {
-                                *ptr++ = kUNLVReject;
-                                last_char_was_tilde = true;
-                            }
-                        }
-                    }
-                }
+char* TessBaseAPI::GetUNLVText() {
+  if (tesseract_ == nullptr ||
+      (!recognition_done_ && Recognize(nullptr) < 0))
+    return nullptr;
+  bool tilde_crunch_written = false;
+  bool last_char_was_newline = true;
+  bool last_char_was_tilde = false;
+
+  int total_length = TextLength(nullptr);
+  PAGE_RES_IT   page_res_it(page_res_);
+  char* result = new char[total_length];
+  char* ptr = result;
+  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
+       page_res_it.forward()) {
+    WERD_RES *word = page_res_it.word();
+    // Process the current word.
+    if (word->unlv_crunch_mode != CR_NONE) {
+      if (word->unlv_crunch_mode != CR_DELETE &&
+          (!tilde_crunch_written ||
+           (word->unlv_crunch_mode == CR_KEEP_SPACE &&
+            word->word->space() > 0 &&
+            !word->word->flag(W_FUZZY_NON) &&
+            !word->word->flag(W_FUZZY_SP)))) {
+        if (!word->word->flag(W_BOL) &&
+            word->word->space() > 0 &&
+            !word->word->flag(W_FUZZY_NON) &&
+            !word->word->flag(W_FUZZY_SP)) {
+          /* Write a space to separate from preceding good text */
+          *ptr++ = ' ';
+          last_char_was_tilde = false;
+        }
+        if (!last_char_was_tilde) {
+          // Write a reject char.
+          last_char_was_tilde = true;
+          *ptr++ = kUNLVReject;
+          tilde_crunch_written = true;
+          last_char_was_newline = false;
+        }
+      }
+    } else {
+      // NORMAL PROCESSING of non tilde crunched words.
+      tilde_crunch_written = false;
+      tesseract_->set_unlv_suspects(word);
+      const char* wordstr = word->best_choice->unichar_string().string();
+      const STRING& lengths = word->best_choice->unichar_lengths();
+      int length = lengths.length();
+      int i = 0;
+      int offset = 0;
+
+      if (last_char_was_tilde &&
+          word->word->space() == 0 && wordstr[offset] == ' ') {
+        // Prevent adjacent tilde across words - we know that adjacent tildes
+        // within words have been removed.
+        // Skip the first character.
+        offset = lengths[i++];
+      }
+      if (i < length && wordstr[offset] != 0) {
+        if (!last_char_was_newline)
+          *ptr++ = ' ';
+        else
+          last_char_was_newline = false;
+        for (; i < length; offset += lengths[i++]) {
+          if (wordstr[offset] == ' ' ||
+              wordstr[offset] == kTesseractReject) {
+            *ptr++ = kUNLVReject;
+            last_char_was_tilde = true;
+          } else {
+            if (word->reject_map[i].rejected())
+              *ptr++ = kUNLVSuspect;
+            UNICHAR ch(wordstr + offset, lengths[i]);
+            int uni_ch = ch.first_uni();
+            for (int j = 0; kUniChs[j] != 0; ++j) {
+              if (kUniChs[j] == uni_ch) {
+                uni_ch = kLatinChs[j];
+                break;
+              }
             }
-            if (word->word->flag(W_EOL) && !last_char_was_newline) {
-                /* Add a new line output */
-                *ptr++ = '\n';
-                tilde_crunch_written = false;
-                last_char_was_newline = true;
-                last_char_was_tilde = false;
+            if (uni_ch <= 0xff) {
+              *ptr++ = static_cast<char>(uni_ch);
+              last_char_was_tilde = false;
+            } else {
+              *ptr++ = kUNLVReject;
+              last_char_was_tilde = true;
             }
+          }
         }
-        *ptr++ = '\n';
-        *ptr = '\0';
-        return result;
+      }
+    }
+    if (word->word->flag(W_EOL) && !last_char_was_newline) {
+      /* Add a new line output */
+      *ptr++ = '\n';
+      tilde_crunch_written = false;
+      last_char_was_newline = true;
+      last_char_was_tilde = false;
     }
+  }
+  *ptr++ = '\n';
+  *ptr = '\0';
+  return result;
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 
@@ -2007,103 +2007,103 @@ namespace tesseract {
  * script_conf is confidence level in the script
  * Returns true on success and writes values to each parameter as an output
  */
-    bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
-                                              const char** script_name,
-                                              float* script_conf) {
-        OSResults osr;
-
-        bool osd = DetectOS(&osr);
-        if (!osd) {
-            return false;
-        }
+bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
+                                          const char** script_name,
+                                          float* script_conf) {
+  OSResults osr;
+
+  bool osd = DetectOS(&osr);
+  if (!osd) {
+    return false;
+  }
 
-        int orient_id = osr.best_result.orientation_id;
-        int script_id = osr.get_best_script(orient_id);
-        if (orient_conf) *orient_conf = osr.best_result.oconfidence;
-        if (orient_deg) *orient_deg = orient_id * 90;  // convert quadrant to degrees
+  int orient_id = osr.best_result.orientation_id;
+  int script_id = osr.get_best_script(orient_id);
+  if (orient_conf) *orient_conf = osr.best_result.oconfidence;
+  if (orient_deg) *orient_deg = orient_id * 90;  // convert quadrant to degrees
 
-        if (script_name) {
-            const char* script = osr.unicharset->get_script_from_script_id(script_id);
+  if (script_name) {
+    const char* script = osr.unicharset->get_script_from_script_id(script_id);
 
-            *script_name = script;
-        }
+    *script_name = script;
+  }
 
-        if (script_conf) *script_conf = osr.best_result.sconfidence;
+  if (script_conf) *script_conf = osr.best_result.sconfidence;
 
-        return true;
-    }
+  return true;
+}
 
 /**
  * The recognized text is returned as a char* which is coded
  * as UTF8 and must be freed with the delete [] operator.
  * page_number is a 0-based page index that will appear in the osd file.
  */
-    char* TessBaseAPI::GetOsdText(int page_number) {
-        int orient_deg;
-        float orient_conf;
-        const char* script_name;
-        float script_conf;
-
-        if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
-                                     &script_conf))
-            return nullptr;
-
-        // clockwise rotation needed to make the page upright
-        int rotate = OrientationIdToValue(orient_deg / 90);
-
-        const int kOsdBufsize = 255;
-        char* osd_buf = new char[kOsdBufsize];
-        snprintf(osd_buf, kOsdBufsize,
-                 "Page number: %d\n"
-                 "Orientation in degrees: %d\n"
-                 "Rotate: %d\n"
-                 "Orientation confidence: %.2f\n"
-                 "Script: %s\n"
-                 "Script confidence: %.2f\n",
-                 page_number, orient_deg, rotate, orient_conf, script_name,
-                 script_conf);
-
-        return osd_buf;
-    }
+char* TessBaseAPI::GetOsdText(int page_number) {
+  int orient_deg;
+  float orient_conf;
+  const char* script_name;
+  float script_conf;
+
+  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
+                               &script_conf))
+    return nullptr;
+
+  // clockwise rotation needed to make the page upright
+  int rotate = OrientationIdToValue(orient_deg / 90);
+
+  const int kOsdBufsize = 255;
+  char* osd_buf = new char[kOsdBufsize];
+  snprintf(osd_buf, kOsdBufsize,
+           "Page number: %d\n"
+           "Orientation in degrees: %d\n"
+           "Rotate: %d\n"
+           "Orientation confidence: %.2f\n"
+           "Script: %s\n"
+           "Script confidence: %.2f\n",
+           page_number, orient_deg, rotate, orient_conf, script_name,
+           script_conf);
+
+  return osd_buf;
+}
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
 /** Returns the average word confidence for Tesseract page result. */
-    int TessBaseAPI::MeanTextConf() {
-        int* conf = AllWordConfidences();
-        if (!conf) return 0;
-        int sum = 0;
-        int *pt = conf;
-        while (*pt >= 0) sum += *pt++;
-        if (pt != conf) sum /= pt - conf;
-        delete [] conf;
-        return sum;
-    }
+int TessBaseAPI::MeanTextConf() {
+  int* conf = AllWordConfidences();
+  if (!conf) return 0;
+  int sum = 0;
+  int *pt = conf;
+  while (*pt >= 0) sum += *pt++;
+  if (pt != conf) sum /= pt - conf;
+  delete [] conf;
+  return sum;
+}
 
 /** Returns an array of all word confidences, terminated by -1. */
-    int* TessBaseAPI::AllWordConfidences() {
-        if (tesseract_ == nullptr ||
-            (!recognition_done_ && Recognize(nullptr) < 0))
-            return nullptr;
-        int n_word = 0;
-        PAGE_RES_IT res_it(page_res_);
-        for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
-            n_word++;
-
-        int* conf = new int[n_word+1];
-        n_word = 0;
-        for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
-            WERD_RES *word = res_it.word();
-            WERD_CHOICE* choice = word->best_choice;
-            int w_conf = static_cast<int>(100 + 5 * choice->certainty());
-            // This is the eq for converting Tesseract confidence to 1..100
-            if (w_conf < 0) w_conf = 0;
-            if (w_conf > 100) w_conf = 100;
-            conf[n_word++] = w_conf;
-        }
-        conf[n_word] = -1;
-        return conf;
-    }
+int* TessBaseAPI::AllWordConfidences() {
+  if (tesseract_ == nullptr ||
+      (!recognition_done_ && Recognize(nullptr) < 0))
+    return nullptr;
+  int n_word = 0;
+  PAGE_RES_IT res_it(page_res_);
+  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
+    n_word++;
+
+  int* conf = new int[n_word+1];
+  n_word = 0;
+  for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
+    WERD_RES *word = res_it.word();
+    WERD_CHOICE* choice = word->best_choice;
+    int w_conf = static_cast<int>(100 + 5 * choice->certainty());
+                 // This is the eq for converting Tesseract confidence to 1..100
+    if (w_conf < 0) w_conf = 0;
+    if (w_conf > 100) w_conf = 100;
+    conf[n_word++] = w_conf;
+  }
+  conf[n_word] = -1;
+  return conf;
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /**
@@ -2116,61 +2116,61 @@ namespace tesseract {
  * The currently set PageSegMode is preserved.
  * Returns false if adaption was not possible for some reason.
  */
-    bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
-        int debug = 0;
-        GetIntVariable("applybox_debug", &debug);
-        bool success = true;
-        PageSegMode current_psm = GetPageSegMode();
-        SetPageSegMode(mode);
-        SetVariable("classify_enable_learning", "0");
-        const std::unique_ptr<const char[]> text(GetUTF8Text());
-        if (debug) {
-            tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
-        }
-        if (text != nullptr) {
-            PAGE_RES_IT it(page_res_);
-            WERD_RES* word_res = it.word();
-            if (word_res != nullptr) {
-                word_res->word->set_text(wordstr);
-                // Check to see if text matches wordstr.
-                int w = 0;
-                int t;
-                for (t = 0; text[t] != '\0'; ++t) {
-                    if (text[t] == '\n' || text[t] == ' ')
-                        continue;
-                    while (wordstr[w] == ' ') ++w;
-                    if (text[t] != wordstr[w])
-                        break;
-                    ++w;
-                }
-                if (text[t] != '\0' || wordstr[w] != '\0') {
-                    // No match.
-                    delete page_res_;
-                    GenericVector<TBOX> boxes;
-                    page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
-                    tesseract_->ReSegmentByClassification(page_res_);
-                    tesseract_->TidyUp(page_res_);
-                    PAGE_RES_IT pr_it(page_res_);
-                    if (pr_it.word() == nullptr)
-                        success = false;
-                    else
-                        word_res = pr_it.word();
-                } else {
-                    word_res->BestChoiceToCorrectText();
-                }
-                if (success) {
-                    tesseract_->EnableLearning = true;
-                    tesseract_->LearnWord(nullptr, word_res);
-                }
-            } else {
-                success = false;
-            }
-        } else {
-            success = false;
-        }
-        SetPageSegMode(current_psm);
-        return success;
+bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
+  int debug = 0;
+  GetIntVariable("applybox_debug", &debug);
+  bool success = true;
+  PageSegMode current_psm = GetPageSegMode();
+  SetPageSegMode(mode);
+  SetVariable("classify_enable_learning", "0");
+  const std::unique_ptr<const char[]> text(GetUTF8Text());
+  if (debug) {
+    tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
+  }
+  if (text != nullptr) {
+    PAGE_RES_IT it(page_res_);
+    WERD_RES* word_res = it.word();
+    if (word_res != nullptr) {
+      word_res->word->set_text(wordstr);
+      // Check to see if text matches wordstr.
+      int w = 0;
+      int t;
+      for (t = 0; text[t] != '\0'; ++t) {
+        if (text[t] == '\n' || text[t] == ' ')
+          continue;
+        while (wordstr[w] == ' ') ++w;
+        if (text[t] != wordstr[w])
+          break;
+        ++w;
+      }
+      if (text[t] != '\0' || wordstr[w] != '\0') {
+        // No match.
+        delete page_res_;
+        GenericVector<TBOX> boxes;
+        page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
+        tesseract_->ReSegmentByClassification(page_res_);
+        tesseract_->TidyUp(page_res_);
+        PAGE_RES_IT pr_it(page_res_);
+        if (pr_it.word() == nullptr)
+          success = false;
+        else
+          word_res = pr_it.word();
+      } else {
+        word_res->BestChoiceToCorrectText();
+      }
+      if (success) {
+        tesseract_->EnableLearning = true;
+        tesseract_->LearnWord(nullptr, word_res);
+      }
+    } else {
+      success = false;
     }
+  } else {
+    success = false;
+  }
+  SetPageSegMode(current_psm);
+  return success;
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /**
@@ -2179,12 +2179,12 @@ namespace tesseract {
  * Afterwards, you must call SetImage or TesseractRect before doing
  * any Recognize or Get* operation.
  */
-    void TessBaseAPI::Clear() {
-        if (thresholder_ != nullptr)
-            thresholder_->Clear();
-        ClearResults();
-        if (tesseract_ != nullptr) SetInputImage(nullptr);
-    }
+void TessBaseAPI::Clear() {
+  if (thresholder_ != nullptr)
+    thresholder_->Clear();
+  ClearResults();
+  if (tesseract_ != nullptr) SetInputImage(nullptr);
+}
 
 /**
  * Close down tesseract and free up all memory. End() is equivalent to
@@ -2192,100 +2192,100 @@ namespace tesseract {
  * Once End() has been used, none of the other API functions may be used
  * other than Init and anything declared above it in the class definition.
  */
-    void TessBaseAPI::End() {
-        Clear();
-        delete thresholder_;
-        thresholder_ = nullptr;
-        delete page_res_;
-        page_res_ = nullptr;
-        delete block_list_;
-        block_list_ = nullptr;
-        if (paragraph_models_ != nullptr) {
-            paragraph_models_->delete_data_pointers();
-            delete paragraph_models_;
-            paragraph_models_ = nullptr;
-        }
-        if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
-        delete tesseract_;
-        tesseract_ = nullptr;
-        delete osd_tesseract_;
-        osd_tesseract_ = nullptr;
-        delete equ_detect_;
-        equ_detect_ = nullptr;
-        delete input_file_;
-        input_file_ = nullptr;
-        delete output_file_;
-        output_file_ = nullptr;
-        delete datapath_;
-        datapath_ = nullptr;
-        delete language_;
-        language_ = nullptr;
-    }
+void TessBaseAPI::End() {
+  Clear();
+  delete thresholder_;
+  thresholder_ = nullptr;
+  delete page_res_;
+  page_res_ = nullptr;
+  delete block_list_;
+  block_list_ = nullptr;
+  if (paragraph_models_ != nullptr) {
+    paragraph_models_->delete_data_pointers();
+    delete paragraph_models_;
+    paragraph_models_ = nullptr;
+  }
+  if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr;
+  delete tesseract_;
+  tesseract_ = nullptr;
+  delete osd_tesseract_;
+  osd_tesseract_ = nullptr;
+  delete equ_detect_;
+  equ_detect_ = nullptr;
+  delete input_file_;
+  input_file_ = nullptr;
+  delete output_file_;
+  output_file_ = nullptr;
+  delete datapath_;
+  datapath_ = nullptr;
+  delete language_;
+  language_ = nullptr;
+}
 
 // Clear any library-level memory caches.
 // There are a variety of expensive-to-load constant data structures (mostly
 // language dictionaries) that are cached globally -- surviving the Init()
 // and End() of individual TessBaseAPI's.  This function allows the clearing
 // of these caches.
-    void TessBaseAPI::ClearPersistentCache() {
-        Dict::GlobalDawgCache()->DeleteUnusedDawgs();
-    }
+void TessBaseAPI::ClearPersistentCache() {
+  Dict::GlobalDawgCache()->DeleteUnusedDawgs();
+}
 
 /**
  * Check whether a word is valid according to Tesseract's language model
  * returns 0 if the word is invalid, non-zero if valid
  */
-    int TessBaseAPI::IsValidWord(const char *word) {
-        return tesseract_->getDict().valid_word(word);
-    }
+int TessBaseAPI::IsValidWord(const char *word) {
+  return tesseract_->getDict().valid_word(word);
+}
 // Returns true if utf8_character is defined in the UniCharset.
-    bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
-        return tesseract_->unicharset.contains_unichar(utf8_character);
-    }
+bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
+    return tesseract_->unicharset.contains_unichar(utf8_character);
+}
 
 
 // TODO(rays) Obsolete this function and replace with a more aptly named
 // function that returns image coordinates rather than tesseract coordinates.
-    bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
-        PageIterator* it = AnalyseLayout();
-        if (it == nullptr) {
-            return false;
-        }
-        int x1, x2, y1, y2;
-        it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
-        // Calculate offset and slope (NOTE: Kind of ugly)
-        if (x2 <= x1) x2 = x1 + 1;
-        // Convert the point pair to slope/offset of the baseline (in image coords.)
-        *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
-        *out_offset = static_cast<int>(y1 - *out_slope * x1);
-        // Get the y-coord of the baseline at the left and right edges of the
-        // textline's bounding box.
-        int left, top, right, bottom;
-        if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
-            delete it;
-            return false;
-        }
-        int left_y = IntCastRounded(*out_slope * left + *out_offset);
-        int right_y = IntCastRounded(*out_slope * right + *out_offset);
-        // Shift the baseline down so it passes through the nearest bottom-corner
-        // of the textline's bounding box. This is the difference between the y
-        // at the lowest (max) edge of the box and the actual box bottom.
-        *out_offset += bottom - std::max(left_y, right_y);
-        // Switch back to bottom-up tesseract coordinates. Requires negation of
-        // the slope and height - offset for the offset.
-        *out_slope = -*out_slope;
-        *out_offset = rect_height_ - *out_offset;
-        delete it;
-
-        return true;
-    }
+bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
+  PageIterator* it = AnalyseLayout();
+  if (it == nullptr) {
+    return false;
+  }
+  int x1, x2, y1, y2;
+  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
+  // Calculate offset and slope (NOTE: Kind of ugly)
+  if (x2 <= x1) x2 = x1 + 1;
+  // Convert the point pair to slope/offset of the baseline (in image coords.)
+  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
+  *out_offset = static_cast<int>(y1 - *out_slope * x1);
+  // Get the y-coord of the baseline at the left and right edges of the
+  // textline's bounding box.
+  int left, top, right, bottom;
+  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
+    delete it;
+    return false;
+  }
+  int left_y = IntCastRounded(*out_slope * left + *out_offset);
+  int right_y = IntCastRounded(*out_slope * right + *out_offset);
+  // Shift the baseline down so it passes through the nearest bottom-corner
+  // of the textline's bounding box. This is the difference between the y
+  // at the lowest (max) edge of the box and the actual box bottom.
+  *out_offset += bottom - std::max(left_y, right_y);
+  // Switch back to bottom-up tesseract coordinates. Requires negation of
+  // the slope and height - offset for the offset.
+  *out_slope = -*out_slope;
+  *out_offset = rect_height_ - *out_offset;
+  delete it;
+
+  return true;
+}
 
 /** Sets Dict::letter_is_okay_ function to point to the given function. */
-    void TessBaseAPI::SetDictFunc(DictFunc f) {
-        if (tesseract_ != nullptr) {
-            tesseract_->getDict().letter_is_okay_ = f;
-        }
-    }
+void TessBaseAPI::SetDictFunc(DictFunc f) {
+  if (tesseract_ != nullptr) {
+    tesseract_->getDict().letter_is_okay_ = f;
+  }
+}
 
 /**
  * Sets Dict::probability_in_context_ function to point to the given
@@ -2295,35 +2295,35 @@ namespace tesseract {
  * "character" (in general a utf-8 string), given the context of a previous
  * utf-8 string.
  */
-    void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
-        if (tesseract_ != nullptr) {
-            tesseract_->getDict().probability_in_context_ = f;
-            // Set it for the sublangs too.
-            int num_subs = tesseract_->num_sub_langs();
-            for (int i = 0; i < num_subs; ++i) {
-                tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
-            }
-        }
+void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
+  if (tesseract_ != nullptr) {
+    tesseract_->getDict().probability_in_context_ = f;
+    // Set it for the sublangs too.
+    int num_subs = tesseract_->num_sub_langs();
+    for (int i = 0; i < num_subs; ++i) {
+      tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
     }
+  }
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /** Sets Wordrec::fill_lattice_ function to point to the given function. */
-    void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
-        if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
-    }
+void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
+  if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 /** Common code for setting the image. */
-    bool TessBaseAPI::InternalSetImage() {
-        if (tesseract_ == nullptr) {
-            tprintf("Please call Init before attempting to set an image.\n");
-            return false;
-        }
-        if (thresholder_ == nullptr)
-            thresholder_ = new ImageThresholder;
-        ClearResults();
-        return true;
-    }
+bool TessBaseAPI::InternalSetImage() {
+  if (tesseract_ == nullptr) {
+    tprintf("Please call Init before attempting to set an image.\n");
+    return false;
+  }
+  if (thresholder_ == nullptr)
+    thresholder_ = new ImageThresholder;
+  ClearResults();
+  return true;
+}
 
 /**
  * Run the thresholder to make the thresholded image, returned in pix,
@@ -2331,155 +2331,155 @@ namespace tesseract {
  * to an existing pixDestroyable Pix.
  * The usual argument to Threshold is Tesseract::mutable_pix_binary().
  */
-    bool TessBaseAPI::Threshold(Pix** pix) {
-        ASSERT_HOST(pix != nullptr);
-        if (*pix != nullptr)
-            pixDestroy(pix);
-        // Zero resolution messes up the algorithms, so make sure it is credible.
-        int user_dpi = 0;
-        bool a = GetIntVariable("user_defined_dpi", &user_dpi);
-        int y_res = thresholder_->GetScaledYResolution();
-        if (user_dpi && (user_dpi < kMinCredibleResolution ||
-                         user_dpi > kMaxCredibleResolution)) {
-            tprintf("Warning: User defined image dpi is outside of expected range "
-                    "(%d - %d)!\n",
-                    kMinCredibleResolution, kMaxCredibleResolution);
-        }
-        // Always use user defined dpi
-        if (user_dpi) {
-            thresholder_->SetSourceYResolution(user_dpi);
-        } else if (y_res < kMinCredibleResolution ||
-                   y_res > kMaxCredibleResolution) {
-            tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
-                    y_res, kMinCredibleResolution);
-            thresholder_->SetSourceYResolution(kMinCredibleResolution);
-        }
-        PageSegMode pageseg_mode =
-                static_cast<PageSegMode>(
-                        static_cast<int>(tesseract_->tessedit_pageseg_mode));
-        if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
-        thresholder_->GetImageSizes(&rect_left_, &rect_top_,
-                                    &rect_width_, &rect_height_,
-                                    &image_width_, &image_height_);
-        if (!thresholder_->IsBinary()) {
-            tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
-            tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
-        } else {
-            tesseract_->set_pix_thresholds(nullptr);
-            tesseract_->set_pix_grey(nullptr);
-        }
-        // Set the internal resolution that is used for layout parameters from the
-        // estimated resolution, rather than the image resolution, which may be
-        // fabricated, but we will use the image resolution, if there is one, to
-        // report output point sizes.
-        int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
-                                        kMinCredibleResolution,
-                                        kMaxCredibleResolution);
-        if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
-            tprintf("Estimated internal resolution %d out of range! "
-                    "Corrected to %d.\n",
-                    thresholder_->GetScaledEstimatedResolution(), estimated_res);
-        }
-        tesseract_->set_source_resolution(estimated_res);
-        SavePixForCrash(estimated_res, *pix);
-        return true;
-    }
+bool TessBaseAPI::Threshold(Pix** pix) {
+  ASSERT_HOST(pix != nullptr);
+  if (*pix != nullptr)
+    pixDestroy(pix);
+  // Zero resolution messes up the algorithms, so make sure it is credible.
+  int user_dpi = 0;
+  bool a = GetIntVariable("user_defined_dpi", &user_dpi);
+  int y_res = thresholder_->GetScaledYResolution();
+  if (user_dpi && (user_dpi < kMinCredibleResolution ||
+      user_dpi > kMaxCredibleResolution)) {
+    tprintf("Warning: User defined image dpi is outside of expected range "
+            "(%d - %d)!\n",
+            kMinCredibleResolution, kMaxCredibleResolution);
+  }
+  // Always use user defined dpi
+  if (user_dpi) {
+    thresholder_->SetSourceYResolution(user_dpi);
+  } else if (y_res < kMinCredibleResolution ||
+             y_res > kMaxCredibleResolution) {
+    tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
+            y_res, kMinCredibleResolution);
+    thresholder_->SetSourceYResolution(kMinCredibleResolution);
+  }
+  PageSegMode pageseg_mode =
+      static_cast<PageSegMode>(
+          static_cast<int>(tesseract_->tessedit_pageseg_mode));
+  if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false;
+  thresholder_->GetImageSizes(&rect_left_, &rect_top_,
+                              &rect_width_, &rect_height_,
+                              &image_width_, &image_height_);
+  if (!thresholder_->IsBinary()) {
+    tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
+    tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
+  } else {
+    tesseract_->set_pix_thresholds(nullptr);
+    tesseract_->set_pix_grey(nullptr);
+  }
+  // Set the internal resolution that is used for layout parameters from the
+  // estimated resolution, rather than the image resolution, which may be
+  // fabricated, but we will use the image resolution, if there is one, to
+  // report output point sizes.
+  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
+                                  kMinCredibleResolution,
+                                  kMaxCredibleResolution);
+  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
+    tprintf("Estimated internal resolution %d out of range! "
+            "Corrected to %d.\n",
+            thresholder_->GetScaledEstimatedResolution(), estimated_res);
+  }
+  tesseract_->set_source_resolution(estimated_res);
+  SavePixForCrash(estimated_res, *pix);
+  return true;
+}
 
 /** Find lines from the image making the BLOCK_LIST. */
-    int TessBaseAPI::FindLines() {
-        if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
-            tprintf("Please call SetImage before attempting recognition.\n");
-            return -1;
-        }
-        if (recognition_done_)
-            ClearResults();
-        if (!block_list_->empty()) {
-            return 0;
-        }
-        if (tesseract_ == nullptr) {
-            tesseract_ = new Tesseract;
-#ifndef DISABLED_LEGACY_ENGINE
-            tesseract_->InitAdaptiveClassifier(nullptr);
-#endif
-        }
-        if (tesseract_->pix_binary() == nullptr &&
-            !Threshold(tesseract_->mutable_pix_binary())) {
-            return -1;
-        }
+int TessBaseAPI::FindLines() {
+  if (thresholder_ == nullptr || thresholder_->IsEmpty()) {
+    tprintf("Please call SetImage before attempting recognition.\n");
+    return -1;
+  }
+  if (recognition_done_)
+    ClearResults();
+  if (!block_list_->empty()) {
+    return 0;
+  }
+  if (tesseract_ == nullptr) {
+    tesseract_ = new Tesseract;
+  #ifndef DISABLED_LEGACY_ENGINE
+    tesseract_->InitAdaptiveClassifier(nullptr);
+  #endif
+  }
+  if (tesseract_->pix_binary() == nullptr &&
+      !Threshold(tesseract_->mutable_pix_binary())) {
+    return -1;
+  }
 
-        tesseract_->PrepareForPageseg();
+  tesseract_->PrepareForPageseg();
 
 #ifndef DISABLED_LEGACY_ENGINE
-        if (tesseract_->textord_equation_detect) {
-            if (equ_detect_ == nullptr && datapath_ != nullptr) {
-                equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
-            }
-            if (equ_detect_ == nullptr) {
-                tprintf("Warning: Could not set equation detector\n");
-            } else {
-                tesseract_->SetEquationDetect(equ_detect_);
-            }
-        }
+  if (tesseract_->textord_equation_detect) {
+    if (equ_detect_ == nullptr && datapath_ != nullptr) {
+      equ_detect_ = new EquationDetect(datapath_->string(), nullptr);
+    }
+    if (equ_detect_ == nullptr) {
+      tprintf("Warning: Could not set equation detector\n");
+    } else {
+      tesseract_->SetEquationDetect(equ_detect_);
+    }
+  }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-        Tesseract* osd_tess = osd_tesseract_;
-        OSResults osr;
-        if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
-            osd_tess == nullptr) {
-            if (strcmp(language_->string(), "osd") == 0) {
-                osd_tess = tesseract_;
-            } else {
-                osd_tesseract_ = new Tesseract;
-                TessdataManager mgr(reader_);
-                if (datapath_ == nullptr) {
-                    tprintf("Warning: Auto orientation and script detection requested,"
-                            " but data path is undefined\n");
-                    delete osd_tesseract_;
-                    osd_tesseract_ = nullptr;
-                } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
-                                                          "osd", OEM_TESSERACT_ONLY,
-                                                          nullptr, 0, nullptr, nullptr,
-                                                          false, &mgr) == 0) {
-                    osd_tess = osd_tesseract_;
-                    osd_tesseract_->set_source_resolution(
-                            thresholder_->GetSourceYResolution());
-                } else {
-                    tprintf("Warning: Auto orientation and script detection requested,"
-                            " but osd language failed to load\n");
-                    delete osd_tesseract_;
-                    osd_tesseract_ = nullptr;
-                }
-            }
-        }
+  Tesseract* osd_tess = osd_tesseract_;
+  OSResults osr;
+  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
+      osd_tess == nullptr) {
+    if (strcmp(language_->string(), "osd") == 0) {
+      osd_tess = tesseract_;
+    } else {
+      osd_tesseract_ = new Tesseract;
+      TessdataManager mgr(reader_);
+      if (datapath_ == nullptr) {
+        tprintf("Warning: Auto orientation and script detection requested,"
+                " but data path is undefined\n");
+        delete osd_tesseract_;
+        osd_tesseract_ = nullptr;
+      } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr,
+                                                "osd", OEM_TESSERACT_ONLY,
+                                                nullptr, 0, nullptr, nullptr,
+                                                false, &mgr) == 0) {
+        osd_tess = osd_tesseract_;
+        osd_tesseract_->set_source_resolution(
+            thresholder_->GetSourceYResolution());
+      } else {
+        tprintf("Warning: Auto orientation and script detection requested,"
+                " but osd language failed to load\n");
+        delete osd_tesseract_;
+        osd_tesseract_ = nullptr;
+      }
+    }
+  }
 
-        if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
-            return -1;
+  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
+    return -1;
 
-        // If Devanagari is being recognized, we use different images for page seg
-        // and for OCR.
-        tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
-        return 0;
-    }
+  // If Devanagari is being recognized, we use different images for page seg
+  // and for OCR.
+  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
+  return 0;
+}
 
 /** Delete the pageres and clear the block list ready for a new page. */
-    void TessBaseAPI::ClearResults() {
-        if (tesseract_ != nullptr) {
-            tesseract_->Clear();
-        }
-        delete page_res_;
-        page_res_ = nullptr;
-        recognition_done_ = false;
-        if (block_list_ == nullptr)
-            block_list_ = new BLOCK_LIST;
-        else
-            block_list_->clear();
-        if (paragraph_models_ != nullptr) {
-            paragraph_models_->delete_data_pointers();
-            delete paragraph_models_;
-            paragraph_models_ = nullptr;
-        }
-        SavePixForCrash(0, nullptr);
-    }
+void TessBaseAPI::ClearResults() {
+  if (tesseract_ != nullptr) {
+    tesseract_->Clear();
+  }
+  delete page_res_;
+  page_res_ = nullptr;
+  recognition_done_ = false;
+  if (block_list_ == nullptr)
+    block_list_ = new BLOCK_LIST;
+  else
+    block_list_->clear();
+  if (paragraph_models_ != nullptr) {
+    paragraph_models_->delete_data_pointers();
+    delete paragraph_models_;
+    paragraph_models_ = nullptr;
+  }
+  SavePixForCrash(0, nullptr);
+}
 
 /**
  * Return the length of the output text string, as UTF8, assuming
@@ -2488,55 +2488,55 @@ namespace tesseract {
  * character.
  * Also return the number of recognized blobs in blob_count.
  */
-    int TessBaseAPI::TextLength(int* blob_count) {
-        if (tesseract_ == nullptr || page_res_ == nullptr)
-            return 0;
-
-        PAGE_RES_IT   page_res_it(page_res_);
-        int total_length = 2;
-        int total_blobs = 0;
-        // Iterate over the data structures to extract the recognition result.
-        for (page_res_it.restart_page(); page_res_it.word () != nullptr;
-             page_res_it.forward()) {
-            WERD_RES *word = page_res_it.word();
-            WERD_CHOICE* choice = word->best_choice;
-            if (choice != nullptr) {
-                total_blobs += choice->length() + 2;
-                total_length += choice->unichar_string().length() + 2;
-                for (int i = 0; i < word->reject_map.length(); ++i) {
-                    if (word->reject_map[i].rejected())
-                        ++total_length;
-                }
-            }
-        }
-        if (blob_count != nullptr)
-            *blob_count = total_blobs;
-        return total_length;
+int TessBaseAPI::TextLength(int* blob_count) {
+  if (tesseract_ == nullptr || page_res_ == nullptr)
+    return 0;
+
+  PAGE_RES_IT   page_res_it(page_res_);
+  int total_length = 2;
+  int total_blobs = 0;
+  // Iterate over the data structures to extract the recognition result.
+  for (page_res_it.restart_page(); page_res_it.word () != nullptr;
+       page_res_it.forward()) {
+    WERD_RES *word = page_res_it.word();
+    WERD_CHOICE* choice = word->best_choice;
+    if (choice != nullptr) {
+      total_blobs += choice->length() + 2;
+      total_length += choice->unichar_string().length() + 2;
+      for (int i = 0; i < word->reject_map.length(); ++i) {
+        if (word->reject_map[i].rejected())
+          ++total_length;
+      }
     }
+  }
+  if (blob_count != nullptr)
+    *blob_count = total_blobs;
+  return total_length;
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 /**
  * Estimates the Orientation And Script of the image.
  * Returns true if the image was processed successfully.
  */
-    bool TessBaseAPI::DetectOS(OSResults* osr) {
-        if (tesseract_ == nullptr)
-            return false;
-        ClearResults();
-        if (tesseract_->pix_binary() == nullptr &&
-            !Threshold(tesseract_->mutable_pix_binary())) {
-            return false;
-        }
+bool TessBaseAPI::DetectOS(OSResults* osr) {
+  if (tesseract_ == nullptr)
+    return false;
+  ClearResults();
+  if (tesseract_->pix_binary() == nullptr &&
+      !Threshold(tesseract_->mutable_pix_binary())) {
+    return false;
+  }
 
-        if (input_file_ == nullptr)
-            input_file_ = new STRING(kInputFile);
-        return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
-    }
+  if (input_file_ == nullptr)
+    input_file_ = new STRING(kInputFile);
+  return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-    void TessBaseAPI::set_min_orientation_margin(double margin) {
-        tesseract_->min_orientation_margin.set_value(margin);
-    }
+void TessBaseAPI::set_min_orientation_margin(double margin) {
+  tesseract_->min_orientation_margin.set_value(margin);
+}
 
 /**
  * Return text orientation of each block as determined in an earlier page layout
@@ -2552,98 +2552,98 @@ namespace tesseract {
  * be less than the total number of blocks. The ordering is intended to be
  * consistent with GetTextLines().
  */
-    void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
-                                               bool** vertical_writing) {
-        delete[] *block_orientation;
-        *block_orientation = nullptr;
-        delete[] *vertical_writing;
-        *vertical_writing = nullptr;
-        BLOCK_IT block_it(block_list_);
-
-        block_it.move_to_first();
-        int num_blocks = 0;
-        for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
-            if (!block_it.data()->pdblk.poly_block()->IsText()) {
-                continue;
-            }
-            ++num_blocks;
-        }
-        if (!num_blocks) {
-            tprintf("WARNING: Found no blocks\n");
-            return;
-        }
-        *block_orientation = new int[num_blocks];
-        *vertical_writing = new bool[num_blocks];
-        block_it.move_to_first();
-        int i = 0;
-        for (block_it.mark_cycle_pt(); !block_it.cycled_list();
-             block_it.forward()) {
-            if (!block_it.data()->pdblk.poly_block()->IsText()) {
-                continue;
-            }
-            FCOORD re_rotation = block_it.data()->re_rotation();
-            float re_theta = re_rotation.angle();
-            FCOORD classify_rotation = block_it.data()->classify_rotation();
-            float classify_theta = classify_rotation.angle();
-            double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
-            if (rot_theta < 0) rot_theta += 4;
-            int num_rotations = static_cast<int>(rot_theta + 0.5);
-            (*block_orientation)[i] = num_rotations;
-            // The classify_rotation is non-zero only if the text has vertical
-            // writing direction.
-            (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
-            ++i;
-        }
-    }
-
-
-    void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
-        int debug_level = 0;
-        GetIntVariable("paragraph_debug_level", &debug_level);
-        if (paragraph_models_ == nullptr)
-            paragraph_models_ = new GenericVector<ParagraphModel*>;
-        MutableIterator *result_it = GetMutableIterator();
-        do {  // Detect paragraphs for this block
-            GenericVector<ParagraphModel *> models;
-            ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
-                                          result_it, &models);
-            *paragraph_models_ += models;
-        } while (result_it->Next(RIL_BLOCK));
-        delete result_it;
-    }
+void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
+                                           bool** vertical_writing) {
+  delete[] *block_orientation;
+  *block_orientation = nullptr;
+  delete[] *vertical_writing;
+  *vertical_writing = nullptr;
+  BLOCK_IT block_it(block_list_);
+
+  block_it.move_to_first();
+  int num_blocks = 0;
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    if (!block_it.data()->pdblk.poly_block()->IsText()) {
+      continue;
+    }
+    ++num_blocks;
+  }
+  if (!num_blocks) {
+    tprintf("WARNING: Found no blocks\n");
+    return;
+  }
+  *block_orientation = new int[num_blocks];
+  *vertical_writing = new bool[num_blocks];
+  block_it.move_to_first();
+  int i = 0;
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    if (!block_it.data()->pdblk.poly_block()->IsText()) {
+      continue;
+    }
+    FCOORD re_rotation = block_it.data()->re_rotation();
+    float re_theta = re_rotation.angle();
+    FCOORD classify_rotation = block_it.data()->classify_rotation();
+    float classify_theta = classify_rotation.angle();
+    double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
+    if (rot_theta < 0) rot_theta += 4;
+    int num_rotations = static_cast<int>(rot_theta + 0.5);
+    (*block_orientation)[i] = num_rotations;
+    // The classify_rotation is non-zero only if the text has vertical
+    // writing direction.
+    (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
+    ++i;
+  }
+}
+
+
+void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
+  int debug_level = 0;
+  GetIntVariable("paragraph_debug_level", &debug_level);
+  if (paragraph_models_ == nullptr)
+    paragraph_models_ = new GenericVector<ParagraphModel*>;
+  MutableIterator *result_it = GetMutableIterator();
+  do {  // Detect paragraphs for this block
+    GenericVector<ParagraphModel *> models;
+    ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
+                                  result_it, &models);
+    *paragraph_models_ += models;
+  } while (result_it->Next(RIL_BLOCK));
+  delete result_it;
+}
 
 /** This method returns the string form of the specified unichar. */
-    const char* TessBaseAPI::GetUnichar(int unichar_id) {
-        return tesseract_->unicharset.id_to_unichar(unichar_id);
-    }
+const char* TessBaseAPI::GetUnichar(int unichar_id) {
+  return tesseract_->unicharset.id_to_unichar(unichar_id);
+}
 
 /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-    const Dawg *TessBaseAPI::GetDawg(int i) const {
-        if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
-        return tesseract_->getDict().GetDawg(i);
-    }
+const Dawg *TessBaseAPI::GetDawg(int i) const {
+  if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr;
+  return tesseract_->getDict().GetDawg(i);
+}
 
 /** Return the number of dawgs loaded into tesseract_ object. */
-    int TessBaseAPI::NumDawgs() const {
-        return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
-    }
+int TessBaseAPI::NumDawgs() const {
+  return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
+}
 
 /** Escape a char string - remove <>&"' with HTML codes. */
-    STRING HOcrEscape(const char* text) {
-        STRING ret;
-        const char *ptr;
-        for (ptr = text; *ptr; ptr++) {
-            switch (*ptr) {
-                case '<': ret += "&lt;"; break;
-                case '>': ret += "&gt;"; break;
-                case '&': ret += "&amp;"; break;
-                case '"': ret += "&quot;"; break;
-                case '\'': ret += "&#39;"; break;
-                default: ret += *ptr;
-            }
-        }
-        return ret;
+STRING HOcrEscape(const char* text) {
+  STRING ret;
+  const char *ptr;
+  for (ptr = text; *ptr; ptr++) {
+    switch (*ptr) {
+      case '<': ret += "&lt;"; break;
+      case '>': ret += "&gt;"; break;
+      case '&': ret += "&amp;"; break;
+      case '"': ret += "&quot;"; break;
+      case '\'': ret += "&#39;"; break;
+      default: ret += *ptr;
     }
+  }
+  return ret;
+}
 
 
 #ifndef DISABLED_LEGACY_ENGINE
@@ -2653,271 +2653,271 @@ namespace tesseract {
 // Ocropus add-ons.
 
 /** Find lines from the image making the BLOCK_LIST. */
-    BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
-        ASSERT_HOST(FindLines() == 0);
-        BLOCK_LIST* result = block_list_;
-        block_list_ = nullptr;
-        return result;
-    }
+BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
+  ASSERT_HOST(FindLines() == 0);
+  BLOCK_LIST* result = block_list_;
+  block_list_ = nullptr;
+  return result;
+}
 
 /**
  * Delete a block list.
  * This is to keep BLOCK_LIST pointer opaque
  * and let go of including the other headers.
  */
-    void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
-        delete block_list;
-    }
-
-
-    ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
-                                     float xheight,
-                                     float descender,
-                                     float ascender) {
-        int32_t xstarts[] = {-32000};
-        double quad_coeffs[] = {0, 0, baseline};
-        return new ROW(1,
-                       xstarts,
-                       quad_coeffs,
-                       xheight,
-                       ascender - (baseline + xheight),
-                       descender - baseline,
-                       0,
-                       0);
-    }
+void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
+  delete block_list;
+}
+
+
+ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
+                                 float xheight,
+                                 float descender,
+                                 float ascender) {
+  int32_t xstarts[] = {-32000};
+  double quad_coeffs[] = {0, 0, baseline};
+  return new ROW(1,
+                 xstarts,
+                 quad_coeffs,
+                 xheight,
+                 ascender - (baseline + xheight),
+                 descender - baseline,
+                 0,
+                 0);
+}
 
 /** Creates a TBLOB* from the whole pix. */
-    TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
-        int width = pixGetWidth(pix);
-        int height = pixGetHeight(pix);
-        BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
-
-        // Create C_BLOBs from the page
-        extract_edges(pix, &block);
-
-        // Merge all C_BLOBs
-        C_BLOB_LIST *list = block.blob_list();
-        C_BLOB_IT c_blob_it(list);
-        if (c_blob_it.empty())
-            return nullptr;
-        // Move all the outlines to the first blob.
-        C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
-        for (c_blob_it.forward();
-             !c_blob_it.at_first();
-             c_blob_it.forward()) {
-            C_BLOB *c_blob = c_blob_it.data();
-            ol_it.add_list_after(c_blob->out_list());
-        }
-        // Convert the first blob to the output TBLOB.
-        return TBLOB::PolygonalCopy(false, c_blob_it.data());
-    }
+TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
+
+  // Create C_BLOBs from the page
+  extract_edges(pix, &block);
+
+  // Merge all C_BLOBs
+  C_BLOB_LIST *list = block.blob_list();
+  C_BLOB_IT c_blob_it(list);
+  if (c_blob_it.empty())
+    return nullptr;
+  // Move all the outlines to the first blob.
+  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
+  for (c_blob_it.forward();
+       !c_blob_it.at_first();
+       c_blob_it.forward()) {
+      C_BLOB *c_blob = c_blob_it.data();
+      ol_it.add_list_after(c_blob->out_list());
+  }
+  // Convert the first blob to the output TBLOB.
+  return TBLOB::PolygonalCopy(false, c_blob_it.data());
+}
 
 /**
  * This method baseline normalizes a TBLOB in-place. The input row is used
  * for normalization. The denorm is an optional parameter in which the
  * normalization-antidote is returned.
  */
-    void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
-        TBOX box = tblob->bounding_box();
-        float x_center = (box.left() + box.right()) / 2.0f;
-        float baseline = row->base_line(x_center);
-        float scale = kBlnXHeight / row->x_height();
-        tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
-                         0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
-    }
+void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
+  TBOX box = tblob->bounding_box();
+  float x_center = (box.left() + box.right()) / 2.0f;
+  float baseline = row->base_line(x_center);
+  float scale = kBlnXHeight / row->x_height();
+  tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
+                   0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
+}
 
 /**
  * Return a TBLOB * from the whole pix.
  * To be freed later with delete.
  */
-    static TBLOB *make_tesseract_blob(float baseline, float xheight,
-                                      float descender, float ascender,
-                                      bool numeric_mode, Pix* pix) {
-        TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
-
-        // Normalize TBLOB
-        ROW *row =
-                TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
-        TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
-        delete row;
-        return tblob;
-    }
+static TBLOB *make_tesseract_blob(float baseline, float xheight,
+                                  float descender, float ascender,
+                                  bool numeric_mode, Pix* pix) {
+  TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
+
+  // Normalize TBLOB
+  ROW *row =
+      TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
+  TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
+  delete row;
+  return tblob;
+}
 
 /**
  * Adapt to recognize the current image as the given character.
  * The image must be preloaded into pix_binary_ and be just an image
  * of a single character.
  */
-    void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
-                                       int length,
-                                       float baseline,
-                                       float xheight,
-                                       float descender,
-                                       float ascender) {
-        UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
-        TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
-                                          tesseract_->classify_bln_numeric_mode,
-                                          tesseract_->pix_binary());
-        float threshold;
-        float best_rating = -100;
-
-
-        // Classify to get a raw choice.
-        BLOB_CHOICE_LIST choices;
-        tesseract_->AdaptiveClassifier(blob, &choices);
-        BLOB_CHOICE_IT choice_it;
-        choice_it.set_to_list(&choices);
-        for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
-             choice_it.forward()) {
-            if (choice_it.data()->rating() > best_rating) {
-                best_rating = choice_it.data()->rating();
-            }
-        }
-
-        threshold = tesseract_->matcher_good_threshold;
-
-        if (blob->outlines)
-            tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
-                                    tesseract_->AdaptedTemplates);
-        delete blob;
-    }
-
-
-    PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
-        PAGE_RES *page_res = new PAGE_RES(false, block_list,
-                                          &(tesseract_->prev_word_best_choice_));
-        tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
-        return page_res;
+void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
+                                   int length,
+                                   float baseline,
+                                   float xheight,
+                                   float descender,
+                                   float ascender) {
+  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
+  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
+                                    tesseract_->classify_bln_numeric_mode,
+                                    tesseract_->pix_binary());
+  float threshold;
+  float best_rating = -100;
+
+
+  // Classify to get a raw choice.
+  BLOB_CHOICE_LIST choices;
+  tesseract_->AdaptiveClassifier(blob, &choices);
+  BLOB_CHOICE_IT choice_it;
+  choice_it.set_to_list(&choices);
+  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
+       choice_it.forward()) {
+    if (choice_it.data()->rating() > best_rating) {
+      best_rating = choice_it.data()->rating();
     }
+  }
 
-    PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
-                                            PAGE_RES* pass1_result) {
-        if (!pass1_result)
-            pass1_result = new PAGE_RES(false, block_list,
-                                        &(tesseract_->prev_word_best_choice_));
-        tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
-        return pass1_result;
-    }
-
-    struct TESS_CHAR : ELIST_LINK {
-        char *unicode_repr;
-        int length;  // of unicode_repr
-        float cost;
-        TBOX box;
-
-        TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
-            length = (len == -1 ? strlen(repr) : len);
-            unicode_repr = new char[length + 1];
-            strncpy(unicode_repr, repr, length);
-        }
+  threshold = tesseract_->matcher_good_threshold;
+
+  if (blob->outlines)
+    tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
+                            tesseract_->AdaptedTemplates);
+  delete blob;
+}
+
+
+PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
+  PAGE_RES *page_res = new PAGE_RES(false, block_list,
+                                    &(tesseract_->prev_word_best_choice_));
+  tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
+  return page_res;
+}
+
+PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
+                                        PAGE_RES* pass1_result) {
+  if (!pass1_result)
+    pass1_result = new PAGE_RES(false, block_list,
+                                &(tesseract_->prev_word_best_choice_));
+  tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
+  return pass1_result;
+}
+
+struct TESS_CHAR : ELIST_LINK {
+  char *unicode_repr;
+  int length;  // of unicode_repr
+  float cost;
+  TBOX box;
+
+  TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
+    length = (len == -1 ? strlen(repr) : len);
+    unicode_repr = new char[length + 1];
+    strncpy(unicode_repr, repr, length);
+  }
 
-        TESS_CHAR()
-                : unicode_repr(nullptr),
-                  length(0),
-                  cost(0.0f)
-        {  // Satisfies ELISTIZE.
-        }
-        ~TESS_CHAR() {
-            delete [] unicode_repr;
-        }
-    };
+  TESS_CHAR()
+    : unicode_repr(nullptr),
+      length(0),
+      cost(0.0f)
+  {  // Satisfies ELISTIZE.
+  }
+  ~TESS_CHAR() {
+    delete [] unicode_repr;
+  }
+};
 
-    ELISTIZEH(TESS_CHAR)
-    ELISTIZE(TESS_CHAR)
+ELISTIZEH(TESS_CHAR)
+ELISTIZE(TESS_CHAR)
 
-    static void add_space(TESS_CHAR_IT* it) {
-        TESS_CHAR *t = new TESS_CHAR(0, " ");
-        it->add_after_then_move(t);
-    }
+static void add_space(TESS_CHAR_IT* it) {
+  TESS_CHAR *t = new TESS_CHAR(0, " ");
+  it->add_after_then_move(t);
+}
 
 
-    static float rating_to_cost(float rating) {
-        rating = 100 + rating;
-        // cuddled that to save from coverage profiler
-        // (I have never seen ratings worse than -100,
-        //  but the check won't hurt)
-        if (rating < 0) rating = 0;
-        return rating;
-    }
+static float rating_to_cost(float rating) {
+  rating = 100 + rating;
+  // cuddled that to save from coverage profiler
+  // (I have never seen ratings worse than -100,
+  //  but the check won't hurt)
+  if (rating < 0) rating = 0;
+  return rating;
+}
 
 /**
  * Extract the OCR results, costs (penalty points for uncertainty),
  * and the bounding boxes of the characters.
  */
-    static void extract_result(TESS_CHAR_IT* out,
-                               PAGE_RES* page_res) {
-        PAGE_RES_IT page_res_it(page_res);
-        int word_count = 0;
-        while (page_res_it.word() != nullptr) {
-            WERD_RES *word = page_res_it.word();
-            const char *str = word->best_choice->unichar_string().string();
-            const char *len = word->best_choice->unichar_lengths().string();
-            TBOX real_rect = word->word->bounding_box();
-
-            if (word_count)
-                add_space(out);
-            int n = strlen(len);
-            for (int i = 0; i < n; i++) {
-                TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
-                                              str, *len);
-                tc->box = real_rect.intersection(word->box_word->BlobBox(i));
-                out->add_after_then_move(tc);
-                str += *len;
-                len++;
-            }
-            page_res_it.forward();
-            word_count++;
-        }
-    }
+static void extract_result(TESS_CHAR_IT* out,
+                           PAGE_RES* page_res) {
+  PAGE_RES_IT page_res_it(page_res);
+  int word_count = 0;
+  while (page_res_it.word() != nullptr) {
+    WERD_RES *word = page_res_it.word();
+    const char *str = word->best_choice->unichar_string().string();
+    const char *len = word->best_choice->unichar_lengths().string();
+    TBOX real_rect = word->word->bounding_box();
+
+    if (word_count)
+      add_space(out);
+    int n = strlen(len);
+    for (int i = 0; i < n; i++) {
+      TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
+                                    str, *len);
+      tc->box = real_rect.intersection(word->box_word->BlobBox(i));
+      out->add_after_then_move(tc);
+       str += *len;
+      len++;
+    }
+    page_res_it.forward();
+    word_count++;
+  }
+}
 
 /**
  * Extract the OCR results, costs (penalty points for uncertainty),
  * and the bounding boxes of the characters.
  */
-    int TessBaseAPI::TesseractExtractResult(char** text,
-                                            int** lengths,
-                                            float** costs,
-                                            int** x0,
-                                            int** y0,
-                                            int** x1,
-                                            int** y1,
-                                            PAGE_RES* page_res) {
-        TESS_CHAR_LIST tess_chars;
-        TESS_CHAR_IT tess_chars_it(&tess_chars);
-        extract_result(&tess_chars_it, page_res);
-        tess_chars_it.move_to_first();
-        int n = tess_chars.length();
-        int text_len = 0;
-        *lengths = new int[n];
-        *costs = new float[n];
-        *x0 = new int[n];
-        *y0 = new int[n];
-        *x1 = new int[n];
-        *y1 = new int[n];
-        int i = 0;
-        for (tess_chars_it.mark_cycle_pt();
-             !tess_chars_it.cycled_list();
-             tess_chars_it.forward(), i++) {
-            TESS_CHAR *tc = tess_chars_it.data();
-            text_len += (*lengths)[i] = tc->length;
-            (*costs)[i] = tc->cost;
-            (*x0)[i] = tc->box.left();
-            (*y0)[i] = tc->box.bottom();
-            (*x1)[i] = tc->box.right();
-            (*y1)[i] = tc->box.top();
-        }
-        char *p = *text = new char[text_len];
-
-        tess_chars_it.move_to_first();
-        for (tess_chars_it.mark_cycle_pt();
-             !tess_chars_it.cycled_list();
-             tess_chars_it.forward()) {
-            TESS_CHAR *tc = tess_chars_it.data();
-            strncpy(p, tc->unicode_repr, tc->length);
-            p += tc->length;
-        }
-        return n;
-    }
+int TessBaseAPI::TesseractExtractResult(char** text,
+                                        int** lengths,
+                                        float** costs,
+                                        int** x0,
+                                        int** y0,
+                                        int** x1,
+                                        int** y1,
+                                        PAGE_RES* page_res) {
+  TESS_CHAR_LIST tess_chars;
+  TESS_CHAR_IT tess_chars_it(&tess_chars);
+  extract_result(&tess_chars_it, page_res);
+  tess_chars_it.move_to_first();
+  int n = tess_chars.length();
+  int text_len = 0;
+  *lengths = new int[n];
+  *costs = new float[n];
+  *x0 = new int[n];
+  *y0 = new int[n];
+  *x1 = new int[n];
+  *y1 = new int[n];
+  int i = 0;
+  for (tess_chars_it.mark_cycle_pt();
+       !tess_chars_it.cycled_list();
+       tess_chars_it.forward(), i++) {
+    TESS_CHAR *tc = tess_chars_it.data();
+    text_len += (*lengths)[i] = tc->length;
+    (*costs)[i] = tc->cost;
+    (*x0)[i] = tc->box.left();
+    (*y0)[i] = tc->box.bottom();
+    (*x1)[i] = tc->box.right();
+    (*y1)[i] = tc->box.top();
+  }
+  char *p = *text = new char[text_len];
+
+  tess_chars_it.move_to_first();
+  for (tess_chars_it.mark_cycle_pt();
+        !tess_chars_it.cycled_list();
+       tess_chars_it.forward()) {
+    TESS_CHAR *tc = tess_chars_it.data();
+    strncpy(p, tc->unicode_repr, tc->length);
+    p += tc->length;
+  }
+  return n;
+}
 
 /** This method returns the features associated with the input blob. */
 // The resulting features are returned in int_features, which must be
@@ -2926,80 +2926,80 @@ namespace tesseract {
 // On return feature_outline_index is filled with an index of the outline
 // corresponding to each feature in int_features.
 // TODO(rays) Fix the caller to out outline_counts instead.
-    void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
-                                         INT_FEATURE_STRUCT* int_features,
-                                         int* num_features,
-                                         int* feature_outline_index) {
-        GenericVector<int> outline_counts;
-        GenericVector<INT_FEATURE_STRUCT> bl_features;
-        GenericVector<INT_FEATURE_STRUCT> cn_features;
-        INT_FX_RESULT_STRUCT fx_info;
-        tesseract_->ExtractFeatures(*blob, false, &bl_features,
-                                    &cn_features, &fx_info, &outline_counts);
-        if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
-            *num_features = 0;
-            return;  // Feature extraction failed.
-        }
-        *num_features = cn_features.size();
-        memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
-        // TODO(rays) Pass outline_counts back and simplify the calling code.
-        if (feature_outline_index != nullptr) {
-            int f = 0;
-            for (int i = 0; i < outline_counts.size(); ++i) {
-                while (f < outline_counts[i])
-                    feature_outline_index[f++] = i;
-            }
-        }
+void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
+                                     INT_FEATURE_STRUCT* int_features,
+                                     int* num_features,
+                                     int* feature_outline_index) {
+  GenericVector<int> outline_counts;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  GenericVector<INT_FEATURE_STRUCT> cn_features;
+  INT_FX_RESULT_STRUCT fx_info;
+  tesseract_->ExtractFeatures(*blob, false, &bl_features,
+                              &cn_features, &fx_info, &outline_counts);
+  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
+    *num_features = 0;
+    return;  // Feature extraction failed.
+  }
+  *num_features = cn_features.size();
+  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
+  // TODO(rays) Pass outline_counts back and simplify the calling code.
+  if (feature_outline_index != nullptr) {
+    int f = 0;
+    for (int i = 0; i < outline_counts.size(); ++i) {
+      while (f < outline_counts[i])
+        feature_outline_index[f++] = i;
     }
+  }
+}
 
 // This method returns the row to which a box of specified dimensions would
 // belong. If no good match is found, it returns nullptr.
-    ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
-                                    int left, int top, int right, int bottom) {
-        TBOX box(left, bottom, right, top);
-        BLOCK_IT b_it(blocks);
-        for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
-            BLOCK* block = b_it.data();
-            if (!box.major_overlap(block->pdblk.bounding_box()))
-                continue;
-            ROW_IT r_it(block->row_list());
-            for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
-                ROW* row = r_it.data();
-                if (!box.major_overlap(row->bounding_box()))
-                    continue;
-                WERD_IT w_it(row->word_list());
-                for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
-                    WERD* word = w_it.data();
-                    if (box.major_overlap(word->bounding_box()))
-                        return row;
-                }
-            }
-        }
-        return nullptr;
+ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
+                                int left, int top, int right, int bottom) {
+  TBOX box(left, bottom, right, top);
+  BLOCK_IT b_it(blocks);
+  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+    BLOCK* block = b_it.data();
+    if (!box.major_overlap(block->pdblk.bounding_box()))
+      continue;
+    ROW_IT r_it(block->row_list());
+    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+      ROW* row = r_it.data();
+      if (!box.major_overlap(row->bounding_box()))
+        continue;
+      WERD_IT w_it(row->word_list());
+      for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+        WERD* word = w_it.data();
+        if (box.major_overlap(word->bounding_box()))
+          return row;
+      }
     }
+  }
+  return nullptr;
+}
 
 /** Method to run adaptive classifier on a blob. */
-    void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
-                                            int num_max_matches,
-                                            int* unichar_ids,
-                                            float* ratings,
-                                            int* num_matches_returned) {
-        BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
-        tesseract_->AdaptiveClassifier(blob, choices);
-        BLOB_CHOICE_IT choices_it(choices);
-        int& index = *num_matches_returned;
-        index = 0;
-        for (choices_it.mark_cycle_pt();
-             !choices_it.cycled_list() && index < num_max_matches;
-             choices_it.forward()) {
-            BLOB_CHOICE* choice = choices_it.data();
-            unichar_ids[index] = choice->unichar_id();
-            ratings[index] = choice->rating();
-            ++index;
-        }
-        *num_matches_returned = index;
-        delete choices;
-    }
+void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
+                                        int num_max_matches,
+                                        int* unichar_ids,
+                                        float* ratings,
+                                        int* num_matches_returned) {
+  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
+  tesseract_->AdaptiveClassifier(blob, choices);
+  BLOB_CHOICE_IT choices_it(choices);
+  int& index = *num_matches_returned;
+  index = 0;
+  for (choices_it.mark_cycle_pt();
+       !choices_it.cycled_list() && index < num_max_matches;
+       choices_it.forward()) {
+    BLOB_CHOICE* choice = choices_it.data();
+    unichar_ids[index] = choice->unichar_id();
+    ratings[index] = choice->rating();
+    ++index;
+  }
+  *num_matches_returned = index;
+  delete choices;
+}
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 }  // namespace tesseract.
diff --git a/src/api/baseapi.h b/src/api/baseapi.h
index f82dfa2d56..efa97ecd8f 100644
--- a/src/api/baseapi.h
+++ b/src/api/baseapi.h
@@ -61,34 +61,34 @@ struct TBLOB;
 
 namespace tesseract {
 
-    class Dawg;
-    class Dict;
-    class EquationDetect;
-    class PageIterator;
-    class LTRResultIterator;
-    class ResultIterator;
-    class MutableIterator;
-    class TessResultRenderer;
-    class Tesseract;
-    class Trie;
-    class Wordrec;
-
-    typedef int (Dict::*DictFunc)(void* void_dawg_args,
-                                  const UNICHARSET& unicharset,
-                                  UNICHAR_ID unichar_id, bool word_end) const;
-    typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
-                                                     const char* context,
-                                                     int context_bytes,
-                                                     const char* character,
-                                                     int character_bytes);
-    typedef float (Dict::*ParamsModelClassifyFunc)(
-            const char *lang, void *path);
-    typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
-                                             const WERD_CHOICE_LIST &best_choices,
-                                             const UNICHARSET &unicharset,
-                                             BlamerBundle *blamer_bundle);
-    typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
-            TruthCallback;
+class Dawg;
+class Dict;
+class EquationDetect;
+class PageIterator;
+class LTRResultIterator;
+class ResultIterator;
+class MutableIterator;
+class TessResultRenderer;
+class Tesseract;
+class Trie;
+class Wordrec;
+
+typedef int (Dict::*DictFunc)(void* void_dawg_args,
+                              const UNICHARSET& unicharset,
+                              UNICHAR_ID unichar_id, bool word_end) const;
+typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
+                                                 const char* context,
+                                                 int context_bytes,
+                                                 const char* character,
+                                                 int character_bytes);
+typedef float (Dict::*ParamsModelClassifyFunc)(
+    const char *lang, void *path);
+typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
+                                         const WERD_CHOICE_LIST &best_choices,
+                                         const UNICHARSET &unicharset,
+                                         BlamerBundle *blamer_bundle);
+typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
+    TruthCallback;
 
 /**
  * Base class for all tesseract APIs.
@@ -98,842 +98,842 @@ namespace tesseract {
  * class to hide the data types so that users of this class don't have to
  * include any other Tesseract headers.
  */
-    class TESS_API TessBaseAPI {
-    public:
-        TessBaseAPI();
-        virtual ~TessBaseAPI();
-
-        /**
-         * Returns the version identifier as a static string. Do not delete.
-         */
-        static const char* Version();
-
-        /**
-         * If compiled with OpenCL AND an available OpenCL
-         * device is deemed faster than serial code, then
-         * "device" is populated with the cl_device_id
-         * and returns sizeof(cl_device_id)
-         * otherwise *device=nullptr and returns 0.
-         */
-        static size_t getOpenCLDevice(void **device);
-
-        /**
-         * Writes the thresholded image to stderr as a PBM file on receipt of a
-         * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
-         */
-        static void CatchSignals();
-
-        /**
-         * Set the name of the input file. Needed for training and
-         * reading a UNLV zone file, and for searchable PDF output.
-         */
-        void SetInputName(const char* name);
-        /**
-         * These functions are required for searchable PDF output.
-         * We need our hands on the input file so that we can include
-         * it in the PDF without transcoding. If that is not possible,
-         * we need the original image. Finally, resolution metadata
-         * is stored in the PDF so we need that as well.
-         */
-        const char* GetInputName();
-        // Takes ownership of the input pix.
-        void SetInputImage(Pix *pix);
-        Pix* GetInputImage();
-        int GetSourceYResolution();
-        const char* GetDatapath();
-
-        /** Set the name of the bonus output files. Needed only for debugging. */
-        void SetOutputName(const char* name);
-
-        /**
-         * Set the value of an internal "parameter."
-         * Supply the name of the parameter and the value as a string, just as
-         * you would in a config file.
-         * Returns false if the name lookup failed.
-         * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-         * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-         * SetVariable may be used before Init, but settings will revert to
-         * defaults on End().
-         *
-         * Note: Must be called after Init(). Only works for non-init variables
-         * (init variables should be passed to Init()).
-         */
-        bool SetVariable(const char* name, const char* value);
-        bool SetDebugVariable(const char* name, const char* value);
-
-        /**
-         * Returns true if the parameter was found among Tesseract parameters.
-         * Fills in value with the value of the parameter.
-         */
-        bool GetIntVariable(const char *name, int *value) const;
-        bool GetBoolVariable(const char *name, bool *value) const;
-        bool GetDoubleVariable(const char *name, double *value) const;
-
-        /**
-         * Returns the pointer to the string that represents the value of the
-         * parameter if it was found among Tesseract parameters.
-         */
-        const char *GetStringVariable(const char *name) const;
-
-        /**
-         * Print Tesseract parameters to the given file.
-         */
-        void PrintVariables(FILE *fp) const;
-
-        /**
-         * Get value of named variable as a string, if it exists.
-         */
-        bool GetVariableAsString(const char *name, STRING *val);
-
-        /**
-         * Instances are now mostly thread-safe and totally independent,
-         * but some global parameters remain. Basically it is safe to use multiple
-         * TessBaseAPIs in different threads in parallel, UNLESS:
-         * you use SetVariable on some of the Params in classify and textord.
-         * If you do, then the effect will be to change it for all your instances.
-         *
-         * Start tesseract. Returns zero on success and -1 on failure.
-         * NOTE that the only members that may be called before Init are those
-         * listed above here in the class definition.
-         *
-         * The datapath must be the name of the parent directory of tessdata and
-         * must end in / . Any name after the last / will be stripped.
-         * The language is (usually) an ISO 639-3 string or nullptr will default to eng.
-         * It is entirely safe (and eventually will be efficient too) to call
-         * Init multiple times on the same instance to change language, or just
-         * to reset the classifier.
-         * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-         * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-         * English. Languages may specify internally that they want to be loaded
-         * with one or more other languages, so the ~ sign is available to override
-         * that. Eg if hin were set to load eng by default, then hin+~eng would force
-         * loading only hin. The number of loaded languages is limited only by
-         * memory, with the caveat that loading additional languages will impact
-         * both speed and accuracy, as there is more work to do to decide on the
-         * applicable language, and there is more chance of hallucinating incorrect
-         * words.
-         * WARNING: On changing languages, all Tesseract parameters are reset
-         * back to their default values. (Which may vary between languages.)
-         * If you have a rare need to set a Variable that controls
-         * initialization for a second call to Init you should explicitly
-         * call End() and then use SetVariable before Init. This is only a very
-         * rare use case, since there are very few uses that require any parameters
-         * to be set before Init.
-         *
-         * If set_only_non_debug_params is true, only params that do not contain
-         * "debug" in the name will be set.
-         */
-        int Init(const char* datapath, const char* language, OcrEngineMode mode,
-                 char **configs, int configs_size,
-                 const GenericVector<STRING> *vars_vec,
-                 const GenericVector<STRING> *vars_values,
-                 bool set_only_non_debug_params);
-        int Init(const char* datapath, const char* language, OcrEngineMode oem) {
-            return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-        }
-        int Init(const char* datapath, const char* language) {
-            return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
-        }
-        // In-memory version reads the traineddata file directly from the given
-        // data[data_size] array, and/or reads data via a FileReader.
-        int Init(const char* data, int data_size, const char* language,
-                 OcrEngineMode mode, char** configs, int configs_size,
-                 const GenericVector<STRING>* vars_vec,
-                 const GenericVector<STRING>* vars_values,
-                 bool set_only_non_debug_params, FileReader reader);
-
-        /**
-         * Returns the languages string used in the last valid initialization.
-         * If the last initialization specified "deu+hin" then that will be
-         * returned. If hin loaded eng automatically as well, then that will
-         * not be included in this list. To find the languages actually
-         * loaded use GetLoadedLanguagesAsVector.
-         * The returned string should NOT be deleted.
-         */
-        const char* GetInitLanguagesAsString() const;
-
-        /**
-         * Returns the loaded languages in the vector of STRINGs.
-         * Includes all languages loaded by the last Init, including those loaded
-         * as dependencies of other loaded languages.
-         */
-        void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
-
-        /**
-         * Returns the available languages in the sorted vector of STRINGs.
-         */
-        void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
-
-        /**
-         * Init only the lang model component of Tesseract. The only functions
-         * that work after this init are SetVariable and IsValidWord.
-         * WARNING: temporary! This function will be removed from here and placed
-         * in a separate API at some future time.
-         */
-        int InitLangMod(const char* datapath, const char* language);
-
-        /**
-         * Init only for page layout analysis. Use only for calls to SetImage and
-         * AnalysePage. Calls that attempt recognition will generate an error.
-         */
-        void InitForAnalysePage();
-
-        /**
-         * Read a "config" file containing a set of param, value pairs.
-         * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-         * and also accepts a relative or absolute path name.
-         * Note: only non-init params will be set (init params are set by Init()).
-         */
-        void ReadConfigFile(const char* filename);
-        /** Same as above, but only set debug params from the given config file. */
-        void ReadDebugConfigFile(const char* filename);
-
-        /**
-         * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-         * The mode is stored as an IntParam so it can also be modified by
-         * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-         */
-        void SetPageSegMode(PageSegMode mode);
-
-        /** Return the current page segmentation mode. */
-        PageSegMode GetPageSegMode() const;
-
-        /**
-         * Recognize a rectangle from an image and return the result as a string.
-         * May be called many times for a single Init.
-         * Currently has no error checking.
-         * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-         * Palette color images will not work properly and must be converted to
-         * 24 bit.
-         * Binary images of 1 bit per pixel may also be given but they must be
-         * byte packed with the MSB of the first byte being the first pixel, and a
-         * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-         * The recognized text is returned as a char* which is coded
-         * as UTF8 and must be freed with the delete [] operator.
-         *
-         * Note that TesseractRect is the simplified convenience interface.
-         * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-         * and one or more of the Get*Text functions below.
-         */
-        char* TesseractRect(const unsigned char* imagedata,
-                            int bytes_per_pixel, int bytes_per_line,
-                            int left, int top, int width, int height);
-
-        /**
-         * Call between pages or documents etc to free up memory and forget
-         * adaptive data.
-         */
-        void ClearAdaptiveClassifier();
-
-        /**
-         * @defgroup AdvancedAPI Advanced API
-         * The following methods break TesseractRect into pieces, so you can
-         * get hold of the thresholded image, get the text in different formats,
-         * get bounding boxes, confidences etc.
-         */
-        /* @{ */
-
-        /**
-         * Provide an image for Tesseract to recognize. Format is as
-         * TesseractRect above. Copies the image buffer and converts to Pix.
-         * SetImage clears all recognition results, and sets the rectangle to the
-         * full image, so it may be followed immediately by a GetUTF8Text, and it
-         * will automatically perform recognition.
-         */
-        void SetImage(const unsigned char* imagedata, int width, int height,
-                      int bytes_per_pixel, int bytes_per_line);
-
-        /**
-         * Provide an image for Tesseract to recognize. As with SetImage above,
-         * Tesseract takes its own copy of the image, so it need not persist until
-         * after Recognize.
-         * Pix vs raw, which to use?
-         * Use Pix where possible. Tesseract uses Pix as its internal representation
-         * and it is therefore more efficient to provide a Pix directly.
-         */
-        void SetImage(Pix* pix);
-
-        /**
-         * Set the resolution of the source image in pixels per inch so font size
-         * information can be calculated in results.  Call this after SetImage().
-         */
-        void SetSourceResolution(int ppi);
-
-        /**
-         * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-         * Each SetRectangle clears the recogntion results so multiple rectangles
-         * can be recognized with the same image.
-         */
-        void SetRectangle(int left, int top, int width, int height);
-
-        /**
-         * In extreme cases only, usually with a subclass of Thresholder, it
-         * is possible to provide a different Thresholder. The Thresholder may
-         * be preloaded with an image, settings etc, or they may be set after.
-         * Note that Tesseract takes ownership of the Thresholder and will
-         * delete it when it it is replaced or the API is destructed.
-         */
-        void SetThresholder(ImageThresholder* thresholder) {
-            delete thresholder_;
-            thresholder_ = thresholder;
-            ClearResults();
-        }
-
-        /**
-         * Get a copy of the internal thresholded image from Tesseract.
-         * Caller takes ownership of the Pix and must pixDestroy it.
-         * May be called any time after SetImage, or after TesseractRect.
-         */
-        Pix* GetThresholdedImage();
-
-        /**
-         * Get the result of page layout analysis as a leptonica-style
-         * Boxa, Pixa pair, in reading order.
-         * Can be called before or after Recognize.
-         */
-        Boxa* GetRegions(Pixa** pixa);
-
-        /**
-         * Get the textlines as a leptonica-style
-         * Boxa, Pixa pair, in reading order.
-         * Can be called before or after Recognize.
-         * If raw_image is true, then extract from the original image instead of the
-         * thresholded image and pad by raw_padding pixels.
-         * If blockids is not nullptr, the block-id of each line is also returned as an
-         * array of one element per line. delete [] after use.
-         * If paraids is not nullptr, the paragraph-id of each line within its block is
-         * also returned as an array of one element per line. delete [] after use.
-         */
-        Boxa* GetTextlines(const bool raw_image, const int raw_padding,
+class TESS_API TessBaseAPI {
+ public:
+  TessBaseAPI();
+  virtual ~TessBaseAPI();
+
+  /**
+   * Returns the version identifier as a static string. Do not delete.
+   */
+  static const char* Version();
+
+  /**
+   * If compiled with OpenCL AND an available OpenCL
+   * device is deemed faster than serial code, then
+   * "device" is populated with the cl_device_id
+   * and returns sizeof(cl_device_id)
+   * otherwise *device=nullptr and returns 0.
+   */
+  static size_t getOpenCLDevice(void **device);
+
+  /**
+   * Writes the thresholded image to stderr as a PBM file on receipt of a
+   * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
+   */
+  static void CatchSignals();
+
+  /**
+   * Set the name of the input file. Needed for training and
+   * reading a UNLV zone file, and for searchable PDF output.
+   */
+  void SetInputName(const char* name);
+  /**
+   * These functions are required for searchable PDF output.
+   * We need our hands on the input file so that we can include
+   * it in the PDF without transcoding. If that is not possible,
+   * we need the original image. Finally, resolution metadata
+   * is stored in the PDF so we need that as well.
+   */
+  const char* GetInputName();
+  // Takes ownership of the input pix.
+  void SetInputImage(Pix *pix);
+  Pix* GetInputImage();
+  int GetSourceYResolution();
+  const char* GetDatapath();
+
+  /** Set the name of the bonus output files. Needed only for debugging. */
+  void SetOutputName(const char* name);
+
+  /**
+   * Set the value of an internal "parameter."
+   * Supply the name of the parameter and the value as a string, just as
+   * you would in a config file.
+   * Returns false if the name lookup failed.
+   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
+   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
+   * SetVariable may be used before Init, but settings will revert to
+   * defaults on End().
+   *
+   * Note: Must be called after Init(). Only works for non-init variables
+   * (init variables should be passed to Init()).
+   */
+  bool SetVariable(const char* name, const char* value);
+  bool SetDebugVariable(const char* name, const char* value);
+
+  /**
+   * Returns true if the parameter was found among Tesseract parameters.
+   * Fills in value with the value of the parameter.
+   */
+  bool GetIntVariable(const char *name, int *value) const;
+  bool GetBoolVariable(const char *name, bool *value) const;
+  bool GetDoubleVariable(const char *name, double *value) const;
+
+  /**
+   * Returns the pointer to the string that represents the value of the
+   * parameter if it was found among Tesseract parameters.
+   */
+  const char *GetStringVariable(const char *name) const;
+
+  /**
+   * Print Tesseract parameters to the given file.
+   */
+  void PrintVariables(FILE *fp) const;
+
+  /**
+   * Get value of named variable as a string, if it exists.
+   */
+  bool GetVariableAsString(const char *name, STRING *val);
+
+  /**
+   * Instances are now mostly thread-safe and totally independent,
+   * but some global parameters remain. Basically it is safe to use multiple
+   * TessBaseAPIs in different threads in parallel, UNLESS:
+   * you use SetVariable on some of the Params in classify and textord.
+   * If you do, then the effect will be to change it for all your instances.
+   *
+   * Start tesseract. Returns zero on success and -1 on failure.
+   * NOTE that the only members that may be called before Init are those
+   * listed above here in the class definition.
+   *
+   * The datapath must be the name of the parent directory of tessdata and
+   * must end in / . Any name after the last / will be stripped.
+   * The language is (usually) an ISO 639-3 string or nullptr will default to eng.
+   * It is entirely safe (and eventually will be efficient too) to call
+   * Init multiple times on the same instance to change language, or just
+   * to reset the classifier.
+   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
+   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
+   * English. Languages may specify internally that they want to be loaded
+   * with one or more other languages, so the ~ sign is available to override
+   * that. Eg if hin were set to load eng by default, then hin+~eng would force
+   * loading only hin. The number of loaded languages is limited only by
+   * memory, with the caveat that loading additional languages will impact
+   * both speed and accuracy, as there is more work to do to decide on the
+   * applicable language, and there is more chance of hallucinating incorrect
+   * words.
+   * WARNING: On changing languages, all Tesseract parameters are reset
+   * back to their default values. (Which may vary between languages.)
+   * If you have a rare need to set a Variable that controls
+   * initialization for a second call to Init you should explicitly
+   * call End() and then use SetVariable before Init. This is only a very
+   * rare use case, since there are very few uses that require any parameters
+   * to be set before Init.
+   *
+   * If set_only_non_debug_params is true, only params that do not contain
+   * "debug" in the name will be set.
+   */
+  int Init(const char* datapath, const char* language, OcrEngineMode mode,
+           char **configs, int configs_size,
+           const GenericVector<STRING> *vars_vec,
+           const GenericVector<STRING> *vars_values,
+           bool set_only_non_debug_params);
+  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
+    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
+  }
+  int Init(const char* datapath, const char* language) {
+    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
+  }
+  // In-memory version reads the traineddata file directly from the given
+  // data[data_size] array, and/or reads data via a FileReader.
+  int Init(const char* data, int data_size, const char* language,
+           OcrEngineMode mode, char** configs, int configs_size,
+           const GenericVector<STRING>* vars_vec,
+           const GenericVector<STRING>* vars_values,
+           bool set_only_non_debug_params, FileReader reader);
+
+  /**
+   * Returns the languages string used in the last valid initialization.
+   * If the last initialization specified "deu+hin" then that will be
+   * returned. If hin loaded eng automatically as well, then that will
+   * not be included in this list. To find the languages actually
+   * loaded use GetLoadedLanguagesAsVector.
+   * The returned string should NOT be deleted.
+   */
+  const char* GetInitLanguagesAsString() const;
+
+  /**
+   * Returns the loaded languages in the vector of STRINGs.
+   * Includes all languages loaded by the last Init, including those loaded
+   * as dependencies of other loaded languages.
+   */
+  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
+
+  /**
+   * Returns the available languages in the sorted vector of STRINGs.
+   */
+  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
+
+  /**
+   * Init only the lang model component of Tesseract. The only functions
+   * that work after this init are SetVariable and IsValidWord.
+   * WARNING: temporary! This function will be removed from here and placed
+   * in a separate API at some future time.
+   */
+  int InitLangMod(const char* datapath, const char* language);
+
+  /**
+   * Init only for page layout analysis. Use only for calls to SetImage and
+   * AnalysePage. Calls that attempt recognition will generate an error.
+   */
+  void InitForAnalysePage();
+
+  /**
+   * Read a "config" file containing a set of param, value pairs.
+   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
+   * and also accepts a relative or absolute path name.
+   * Note: only non-init params will be set (init params are set by Init()).
+   */
+  void ReadConfigFile(const char* filename);
+  /** Same as above, but only set debug params from the given config file. */
+  void ReadDebugConfigFile(const char* filename);
+
+  /**
+   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
+   * The mode is stored as an IntParam so it can also be modified by
+   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
+   */
+  void SetPageSegMode(PageSegMode mode);
+
+  /** Return the current page segmentation mode. */
+  PageSegMode GetPageSegMode() const;
+
+  /**
+   * Recognize a rectangle from an image and return the result as a string.
+   * May be called many times for a single Init.
+   * Currently has no error checking.
+   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
+   * Palette color images will not work properly and must be converted to
+   * 24 bit.
+   * Binary images of 1 bit per pixel may also be given but they must be
+   * byte packed with the MSB of the first byte being the first pixel, and a
+   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
+   * The recognized text is returned as a char* which is coded
+   * as UTF8 and must be freed with the delete [] operator.
+   *
+   * Note that TesseractRect is the simplified convenience interface.
+   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
+   * and one or more of the Get*Text functions below.
+   */
+  char* TesseractRect(const unsigned char* imagedata,
+                      int bytes_per_pixel, int bytes_per_line,
+                      int left, int top, int width, int height);
+
+  /**
+   * Call between pages or documents etc to free up memory and forget
+   * adaptive data.
+   */
+  void ClearAdaptiveClassifier();
+
+  /**
+   * @defgroup AdvancedAPI Advanced API
+   * The following methods break TesseractRect into pieces, so you can
+   * get hold of the thresholded image, get the text in different formats,
+   * get bounding boxes, confidences etc.
+   */
+   /* @{ */
+
+  /**
+   * Provide an image for Tesseract to recognize. Format is as
+   * TesseractRect above. Copies the image buffer and converts to Pix.
+   * SetImage clears all recognition results, and sets the rectangle to the
+   * full image, so it may be followed immediately by a GetUTF8Text, and it
+   * will automatically perform recognition.
+   */
+  void SetImage(const unsigned char* imagedata, int width, int height,
+                int bytes_per_pixel, int bytes_per_line);
+
+  /**
+   * Provide an image for Tesseract to recognize. As with SetImage above,
+   * Tesseract takes its own copy of the image, so it need not persist until
+   * after Recognize.
+   * Pix vs raw, which to use?
+   * Use Pix where possible. Tesseract uses Pix as its internal representation
+   * and it is therefore more efficient to provide a Pix directly.
+   */
+  void SetImage(Pix* pix);
+
+  /**
+   * Set the resolution of the source image in pixels per inch so font size
+   * information can be calculated in results.  Call this after SetImage().
+   */
+  void SetSourceResolution(int ppi);
+
+  /**
+   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
+   * Each SetRectangle clears the recogntion results so multiple rectangles
+   * can be recognized with the same image.
+   */
+  void SetRectangle(int left, int top, int width, int height);
+
+  /**
+   * In extreme cases only, usually with a subclass of Thresholder, it
+   * is possible to provide a different Thresholder. The Thresholder may
+   * be preloaded with an image, settings etc, or they may be set after.
+   * Note that Tesseract takes ownership of the Thresholder and will
+   * delete it when it it is replaced or the API is destructed.
+   */
+  void SetThresholder(ImageThresholder* thresholder) {
+    delete thresholder_;
+    thresholder_ = thresholder;
+    ClearResults();
+  }
+
+  /**
+   * Get a copy of the internal thresholded image from Tesseract.
+   * Caller takes ownership of the Pix and must pixDestroy it.
+   * May be called any time after SetImage, or after TesseractRect.
+   */
+  Pix* GetThresholdedImage();
+
+  /**
+   * Get the result of page layout analysis as a leptonica-style
+   * Boxa, Pixa pair, in reading order.
+   * Can be called before or after Recognize.
+   */
+  Boxa* GetRegions(Pixa** pixa);
+
+  /**
+   * Get the textlines as a leptonica-style
+   * Boxa, Pixa pair, in reading order.
+   * Can be called before or after Recognize.
+   * If raw_image is true, then extract from the original image instead of the
+   * thresholded image and pad by raw_padding pixels.
+   * If blockids is not nullptr, the block-id of each line is also returned as an
+   * array of one element per line. delete [] after use.
+   * If paraids is not nullptr, the paragraph-id of each line within its block is
+   * also returned as an array of one element per line. delete [] after use.
+   */
+  Boxa* GetTextlines(const bool raw_image, const int raw_padding,
+                     Pixa** pixa, int** blockids, int** paraids);
+  /*
+     Helper method to extract from the thresholded image. (most common usage)
+  */
+  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
+    return GetTextlines(false, 0, pixa, blockids, nullptr);
+  }
+
+  /**
+   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
+   * pair, in reading order. Enables downstream handling of non-rectangular
+   * regions.
+   * Can be called before or after Recognize.
+   * If blockids is not nullptr, the block-id of each line is also returned as an
+   * array of one element per line. delete [] after use.
+   */
+  Boxa* GetStrips(Pixa** pixa, int** blockids);
+
+  /**
+   * Get the words as a leptonica-style
+   * Boxa, Pixa pair, in reading order.
+   * Can be called before or after Recognize.
+   */
+  Boxa* GetWords(Pixa** pixa);
+
+  /**
+   * Gets the individual connected (text) components (created
+   * after pages segmentation step, but before recognition)
+   * as a leptonica-style Boxa, Pixa pair, in reading order.
+   * Can be called before or after Recognize.
+   * Note: the caller is responsible for calling boxaDestroy()
+   * on the returned Boxa array and pixaDestroy() on cc array.
+   */
+  Boxa* GetConnectedComponents(Pixa** cc);
+
+  /**
+   * Get the given level kind of components (block, textline, word etc.) as a
+   * leptonica-style Boxa, Pixa pair, in reading order.
+   * Can be called before or after Recognize.
+   * If blockids is not nullptr, the block-id of each component is also returned
+   * as an array of one element per component. delete [] after use.
+   * If blockids is not nullptr, the paragraph-id of each component with its block
+   * is also returned as an array of one element per component. delete [] after
+   * use.
+   * If raw_image is true, then portions of the original image are extracted
+   * instead of the thresholded image and padded with raw_padding.
+   * If text_only is true, then only text components are returned.
+   */
+  Boxa* GetComponentImages(const PageIteratorLevel level,
+                           const bool text_only, const bool raw_image,
+                           const int raw_padding,
                            Pixa** pixa, int** blockids, int** paraids);
-        /*
-           Helper method to extract from the thresholded image. (most common usage)
-        */
-        Boxa* GetTextlines(Pixa** pixa, int** blockids) {
-            return GetTextlines(false, 0, pixa, blockids, nullptr);
-        }
-
-        /**
-         * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-         * pair, in reading order. Enables downstream handling of non-rectangular
-         * regions.
-         * Can be called before or after Recognize.
-         * If blockids is not nullptr, the block-id of each line is also returned as an
-         * array of one element per line. delete [] after use.
-         */
-        Boxa* GetStrips(Pixa** pixa, int** blockids);
-
-        /**
-         * Get the words as a leptonica-style
-         * Boxa, Pixa pair, in reading order.
-         * Can be called before or after Recognize.
-         */
-        Boxa* GetWords(Pixa** pixa);
-
-        /**
-         * Gets the individual connected (text) components (created
-         * after pages segmentation step, but before recognition)
-         * as a leptonica-style Boxa, Pixa pair, in reading order.
-         * Can be called before or after Recognize.
-         * Note: the caller is responsible for calling boxaDestroy()
-         * on the returned Boxa array and pixaDestroy() on cc array.
-         */
-        Boxa* GetConnectedComponents(Pixa** cc);
-
-        /**
-         * Get the given level kind of components (block, textline, word etc.) as a
-         * leptonica-style Boxa, Pixa pair, in reading order.
-         * Can be called before or after Recognize.
-         * If blockids is not nullptr, the block-id of each component is also returned
-         * as an array of one element per component. delete [] after use.
-         * If blockids is not nullptr, the paragraph-id of each component with its block
-         * is also returned as an array of one element per component. delete [] after
-         * use.
-         * If raw_image is true, then portions of the original image are extracted
-         * instead of the thresholded image and padded with raw_padding.
-         * If text_only is true, then only text components are returned.
-         */
-        Boxa* GetComponentImages(const PageIteratorLevel level,
-                                 const bool text_only, const bool raw_image,
-                                 const int raw_padding,
-                                 Pixa** pixa, int** blockids, int** paraids);
-        // Helper function to get binary images with no padding (most common usage).
-        Boxa* GetComponentImages(const PageIteratorLevel level,
-                                 const bool text_only,
-                                 Pixa** pixa, int** blockids) {
-            return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
-        }
-
-        /**
-         * Returns the scale factor of the thresholded image that would be returned by
-         * GetThresholdedImage() and the various GetX() methods that call
-         * GetComponentImages().
-         * Returns 0 if no thresholder has been set.
-         */
-        int GetThresholdedImageScaleFactor() const;
-
-        /**
-         * Runs page layout analysis in the mode set by SetPageSegMode.
-         * May optionally be called prior to Recognize to get access to just
-         * the page layout results. Returns an iterator to the results.
-         * If merge_similar_words is true, words are combined where suitable for use
-         * with a line recognizer. Use if you want to use AnalyseLayout to find the
-         * textlines, and then want to process textline fragments with an external
-         * line recognizer.
-         * Returns nullptr on error or an empty page.
-         * The returned iterator must be deleted after use.
-         * WARNING! This class points to data held within the TessBaseAPI class, and
-         * therefore can only be used while the TessBaseAPI class still exists and
-         * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-         * DetectOS, or anything else that changes the internal PAGE_RES.
-         */
-        PageIterator* AnalyseLayout();
-        PageIterator* AnalyseLayout(bool merge_similar_words);
-
-        /**
-         * Recognize the image from SetAndThresholdImage, generating Tesseract
-         * internal structures. Returns 0 on success.
-         * Optional. The Get*Text functions below will call Recognize if needed.
-         * After Recognize, the output is kept internally until the next SetImage.
-         */
-        int Recognize(ETEXT_DESC* monitor);
-
-        /**
-         * Methods to retrieve information after SetAndThresholdImage(),
-         * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-         */
+  // Helper function to get binary images with no padding (most common usage).
+  Boxa* GetComponentImages(const PageIteratorLevel level,
+                           const bool text_only,
+                           Pixa** pixa, int** blockids) {
+    return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
+  }
+
+  /**
+   * Returns the scale factor of the thresholded image that would be returned by
+   * GetThresholdedImage() and the various GetX() methods that call
+   * GetComponentImages().
+   * Returns 0 if no thresholder has been set.
+   */
+  int GetThresholdedImageScaleFactor() const;
+
+  /**
+   * Runs page layout analysis in the mode set by SetPageSegMode.
+   * May optionally be called prior to Recognize to get access to just
+   * the page layout results. Returns an iterator to the results.
+   * If merge_similar_words is true, words are combined where suitable for use
+   * with a line recognizer. Use if you want to use AnalyseLayout to find the
+   * textlines, and then want to process textline fragments with an external
+   * line recognizer.
+   * Returns nullptr on error or an empty page.
+   * The returned iterator must be deleted after use.
+   * WARNING! This class points to data held within the TessBaseAPI class, and
+   * therefore can only be used while the TessBaseAPI class still exists and
+   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+   * DetectOS, or anything else that changes the internal PAGE_RES.
+   */
+  PageIterator* AnalyseLayout();
+  PageIterator* AnalyseLayout(bool merge_similar_words);
+
+  /**
+   * Recognize the image from SetAndThresholdImage, generating Tesseract
+   * internal structures. Returns 0 on success.
+   * Optional. The Get*Text functions below will call Recognize if needed.
+   * After Recognize, the output is kept internally until the next SetImage.
+   */
+  int Recognize(ETEXT_DESC* monitor);
+
+  /**
+   * Methods to retrieve information after SetAndThresholdImage(),
+   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
+   */
+
+  #ifndef DISABLED_LEGACY_ENGINE
+  /** Variant on Recognize used for testing chopper. */
+  int RecognizeForChopTest(ETEXT_DESC* monitor);
+  #endif
+
+  /**
+   * Turns images into symbolic text.
+   *
+   * filename can point to a single image, a multi-page TIFF,
+   * or a plain text list of image filenames.
+   *
+   * retry_config is useful for debugging. If not nullptr, you can fall
+   * back to an alternate configuration if a page fails for some
+   * reason.
+   *
+   * timeout_millisec terminates processing if any single page
+   * takes too long. Set to 0 for unlimited time.
+   *
+   * renderer is responible for creating the output. For example,
+   * use the TessTextRenderer if you want plaintext output, or
+   * the TessPDFRender to produce searchable PDF.
+   *
+   * If tessedit_page_number is non-negative, will only process that
+   * single page. Works for multi-page tiff file, or filelist.
+   *
+   * Returns true if successful, false on error.
+   */
+  bool ProcessPages(const char* filename, const char* retry_config,
+                    int timeout_millisec, TessResultRenderer* renderer);
+  // Does the real work of ProcessPages.
+  bool ProcessPagesInternal(const char* filename, const char* retry_config,
+                            int timeout_millisec, TessResultRenderer* renderer);
+
+  /**
+   * Turn a single image into symbolic text.
+   *
+   * The pix is the image processed. filename and page_index are
+   * metadata used by side-effect processes, such as reading a box
+   * file or formatting as hOCR.
+   *
+   * See ProcessPages for desciptions of other parameters.
+   */
+  bool ProcessPage(Pix* pix, int page_index, const char* filename,
+                   const char* retry_config, int timeout_millisec,
+                   TessResultRenderer* renderer);
+
+  /**
+   * Get a reading-order iterator to the results of LayoutAnalysis and/or
+   * Recognize. The returned iterator must be deleted after use.
+   * WARNING! This class points to data held within the TessBaseAPI class, and
+   * therefore can only be used while the TessBaseAPI class still exists and
+   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+   * DetectOS, or anything else that changes the internal PAGE_RES.
+   */
+  ResultIterator* GetIterator();
+
+  /**
+   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
+   * The returned iterator must be deleted after use.
+   * WARNING! This class points to data held within the TessBaseAPI class, and
+   * therefore can only be used while the TessBaseAPI class still exists and
+   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+   * DetectOS, or anything else that changes the internal PAGE_RES.
+   */
+  MutableIterator* GetMutableIterator();
+
+  /**
+   * The recognized text is returned as a char* which is coded
+   * as UTF8 and must be freed with the delete [] operator.
+   */
+  char* GetUTF8Text();
+
+  /**
+   * Make a HTML-formatted string with hOCR markup from the internal
+   * data structures.
+   * page_number is 0-based but will appear in the output as 1-based.
+   * monitor can be used to
+   *  cancel the recognition
+   *  receive progress callbacks
+   * Returned string must be freed with the delete [] operator.
+   */
+  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
+
+  /**
+   * Make a HTML-formatted string with hOCR markup from the internal
+   * data structures.
+   * page_number is 0-based but will appear in the output as 1-based.
+   * Returned string must be freed with the delete [] operator.
+   */
+  char* GetHOCRText(int page_number);
+
+  /**
+   * Make an XML-formatted string with Alto markup from the internal
+   * data structures.
+   */
+  char* GetAltoText(ETEXT_DESC* monitor, int page_number);
+
+
+  /**
+   * Make an XML-formatted string with Alto markup from the internal
+   * data structures.
+   */
+  char* GetAltoText(int page_number);
+
+  /**
+   * Make a TSV-formatted string from the internal data structures.
+   * page_number is 0-based but will appear in the output as 1-based.
+   * Returned string must be freed with the delete [] operator.
+   */
+  char* GetTSVText(int page_number);
+
+  /**
+   * The recognized text is returned as a char* which is coded in the same
+   * format as a box file used in training.
+   * Constructs coordinates in the original image - not just the rectangle.
+   * page_number is a 0-based page index that will appear in the box file.
+   * Returned string must be freed with the delete [] operator.
+   */
+  char* GetBoxText(int page_number);
+
+  /**
+   * The recognized text is returned as a char* which is coded
+   * as UNLV format Latin-1 with specific reject and suspect codes.
+   * Returned string must be freed with the delete [] operator.
+   */
+  char* GetUNLVText();
+
+  /**
+   * Detect the orientation of the input image and apparent script (alphabet).
+   * orient_deg is the detected clockwise rotation of the input image in degrees
+   * (0, 90, 180, 270)
+   * orient_conf is the confidence (15.0 is reasonably confident)
+   * script_name is an ASCII string, the name of the script, e.g. "Latin"
+   * script_conf is confidence level in the script
+   * Returns true on success and writes values to each parameter as an output
+   */
+  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
+                               const char** script_name, float* script_conf);
+
+  /**
+   * The recognized text is returned as a char* which is coded
+   * as UTF8 and must be freed with the delete [] operator.
+   * page_number is a 0-based page index that will appear in the osd file.
+   */
+  char* GetOsdText(int page_number);
+
+  /** Returns the (average) confidence value between 0 and 100. */
+  int MeanTextConf();
+  /**
+   * Returns all word confidences (between 0 and 100) in an array, terminated
+   * by -1.  The calling function must delete [] after use.
+   * The number of confidences should correspond to the number of space-
+   * delimited words in GetUTF8Text.
+   */
+  int* AllWordConfidences();
 
 #ifndef DISABLED_LEGACY_ENGINE
-        /** Variant on Recognize used for testing chopper. */
-        int RecognizeForChopTest(ETEXT_DESC* monitor);
-#endif
-
-        /**
-         * Turns images into symbolic text.
-         *
-         * filename can point to a single image, a multi-page TIFF,
-         * or a plain text list of image filenames.
-         *
-         * retry_config is useful for debugging. If not nullptr, you can fall
-         * back to an alternate configuration if a page fails for some
-         * reason.
-         *
-         * timeout_millisec terminates processing if any single page
-         * takes too long. Set to 0 for unlimited time.
-         *
-         * renderer is responible for creating the output. For example,
-         * use the TessTextRenderer if you want plaintext output, or
-         * the TessPDFRender to produce searchable PDF.
-         *
-         * If tessedit_page_number is non-negative, will only process that
-         * single page. Works for multi-page tiff file, or filelist.
-         *
-         * Returns true if successful, false on error.
-         */
-        bool ProcessPages(const char* filename, const char* retry_config,
-                          int timeout_millisec, TessResultRenderer* renderer);
-        // Does the real work of ProcessPages.
-        bool ProcessPagesInternal(const char* filename, const char* retry_config,
-                                  int timeout_millisec, TessResultRenderer* renderer);
-
-        /**
-         * Turn a single image into symbolic text.
-         *
-         * The pix is the image processed. filename and page_index are
-         * metadata used by side-effect processes, such as reading a box
-         * file or formatting as hOCR.
-         *
-         * See ProcessPages for desciptions of other parameters.
-         */
-        bool ProcessPage(Pix* pix, int page_index, const char* filename,
-                         const char* retry_config, int timeout_millisec,
-                         TessResultRenderer* renderer);
-
-        /**
-         * Get a reading-order iterator to the results of LayoutAnalysis and/or
-         * Recognize. The returned iterator must be deleted after use.
-         * WARNING! This class points to data held within the TessBaseAPI class, and
-         * therefore can only be used while the TessBaseAPI class still exists and
-         * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-         * DetectOS, or anything else that changes the internal PAGE_RES.
-         */
-        ResultIterator* GetIterator();
-
-        /**
-         * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-         * The returned iterator must be deleted after use.
-         * WARNING! This class points to data held within the TessBaseAPI class, and
-         * therefore can only be used while the TessBaseAPI class still exists and
-         * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-         * DetectOS, or anything else that changes the internal PAGE_RES.
-         */
-        MutableIterator* GetMutableIterator();
-
-        /**
-         * The recognized text is returned as a char* which is coded
-         * as UTF8 and must be freed with the delete [] operator.
-         */
-        char* GetUTF8Text();
-
-        /**
-         * Make a HTML-formatted string with hOCR markup from the internal
-         * data structures.
-         * page_number is 0-based but will appear in the output as 1-based.
-         * monitor can be used to
-         *  cancel the recognition
-         *  receive progress callbacks
-         * Returned string must be freed with the delete [] operator.
-         */
-        char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
-
-        /**
-         * Make a HTML-formatted string with hOCR markup from the internal
-         * data structures.
-         * page_number is 0-based but will appear in the output as 1-based.
-         * Returned string must be freed with the delete [] operator.
-         */
-        char* GetHOCRText(int page_number);
-
-        /**
-        * Make an XML-formatted string with Alto markup from the internal
-        * data structures.
-        */
-        char* GetAltoText(ETEXT_DESC* monitor, int page_number);
-
-
-        /**
-       * Make an XML-formatted string with Alto markup from the internal
-       * data structures.
-       */
-        char* GetAltoText(int page_number);
-
-        /**
-         * Make a TSV-formatted string from the internal data structures.
-         * page_number is 0-based but will appear in the output as 1-based.
-         * Returned string must be freed with the delete [] operator.
-         */
-        char* GetTSVText(int page_number);
-
-        /**
-         * The recognized text is returned as a char* which is coded in the same
-         * format as a box file used in training.
-         * Constructs coordinates in the original image - not just the rectangle.
-         * page_number is a 0-based page index that will appear in the box file.
-         * Returned string must be freed with the delete [] operator.
-         */
-        char* GetBoxText(int page_number);
-
-        /**
-         * The recognized text is returned as a char* which is coded
-         * as UNLV format Latin-1 with specific reject and suspect codes.
-         * Returned string must be freed with the delete [] operator.
-         */
-        char* GetUNLVText();
-
-        /**
-         * Detect the orientation of the input image and apparent script (alphabet).
-         * orient_deg is the detected clockwise rotation of the input image in degrees
-         * (0, 90, 180, 270)
-         * orient_conf is the confidence (15.0 is reasonably confident)
-         * script_name is an ASCII string, the name of the script, e.g. "Latin"
-         * script_conf is confidence level in the script
-         * Returns true on success and writes values to each parameter as an output
-         */
-        bool DetectOrientationScript(int* orient_deg, float* orient_conf,
-                                     const char** script_name, float* script_conf);
-
-        /**
-         * The recognized text is returned as a char* which is coded
-         * as UTF8 and must be freed with the delete [] operator.
-         * page_number is a 0-based page index that will appear in the osd file.
-         */
-        char* GetOsdText(int page_number);
-
-        /** Returns the (average) confidence value between 0 and 100. */
-        int MeanTextConf();
-        /**
-         * Returns all word confidences (between 0 and 100) in an array, terminated
-         * by -1.  The calling function must delete [] after use.
-         * The number of confidences should correspond to the number of space-
-         * delimited words in GetUTF8Text.
-         */
-        int* AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-        /**
-         * Applies the given word to the adaptive classifier if possible.
-         * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-         * tell the boundaries of the graphemes.
-         * Assumes that SetImage/SetRectangle have been used to set the image
-         * to the given word. The mode arg should be PSM_SINGLE_WORD or
-         * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-         * The currently set PageSegMode is preserved.
-         * Returns false if adaption was not possible for some reason.
-         */
-        bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
+  /**
+   * Applies the given word to the adaptive classifier if possible.
+   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
+   * tell the boundaries of the graphemes.
+   * Assumes that SetImage/SetRectangle have been used to set the image
+   * to the given word. The mode arg should be PSM_SINGLE_WORD or
+   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
+   * The currently set PageSegMode is preserved.
+   * Returns false if adaption was not possible for some reason.
+   */
+  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
 #endif  //  ndef DISABLED_LEGACY_ENGINE
 
-        /**
-         * Free up recognition results and any stored image data, without actually
-         * freeing any recognition data that would be time-consuming to reload.
-         * Afterwards, you must call SetImage or TesseractRect before doing
-         * any Recognize or Get* operation.
-         */
-        void Clear();
-
-        /**
-         * Close down tesseract and free up all memory. End() is equivalent to
-         * destructing and reconstructing your TessBaseAPI.
-         * Once End() has been used, none of the other API functions may be used
-         * other than Init and anything declared above it in the class definition.
-         */
-        void End();
-
-        /**
-         * Clear any library-level memory caches.
-         * There are a variety of expensive-to-load constant data structures (mostly
-         * language dictionaries) that are cached globally -- surviving the Init()
-         * and End() of individual TessBaseAPI's.  This function allows the clearing
-         * of these caches.
-         **/
-        static void ClearPersistentCache();
-
-        /**
-         * Check whether a word is valid according to Tesseract's language model
-         * @return 0 if the word is invalid, non-zero if valid.
-         * @warning temporary! This function will be removed from here and placed
-         * in a separate API at some future time.
-         */
-        int IsValidWord(const char *word);
-        // Returns true if utf8_character is defined in the UniCharset.
-        bool IsValidCharacter(const char *utf8_character);
-
-
-        bool GetTextDirection(int* out_offset, float* out_slope);
-
-        /** Sets Dict::letter_is_okay_ function to point to the given function. */
-        void SetDictFunc(DictFunc f);
-
-        /** Sets Dict::probability_in_context_ function to point to the given
-         * function.
-         */
-        void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-        /**
-         * Estimates the Orientation And Script of the image.
-         * @return true if the image was processed successfully.
-         */
-        bool DetectOS(OSResults*);
-
-        /**
-         * Return text orientation of each block as determined by an earlier run
-         * of layout analysis.
-         */
-        void GetBlockTextOrientations(int** block_orientation,
-                                      bool** vertical_writing);
-
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-        /** Sets Wordrec::fill_lattice_ function to point to the given function. */
-        void SetFillLatticeFunc(FillLatticeFunc f);
-
-        /** Find lines from the image making the BLOCK_LIST. */
-        BLOCK_LIST* FindLinesCreateBlockList();
-
-        /**
-         * Delete a block list.
-         * This is to keep BLOCK_LIST pointer opaque
-         * and let go of including the other headers.
-         */
-        static void DeleteBlockList(BLOCK_LIST* block_list);
-
-        /** Returns a ROW object created from the input row specification. */
-        static ROW *MakeTessOCRRow(float baseline, float xheight,
-                                   float descender, float ascender);
-
-        /** Returns a TBLOB corresponding to the entire input image. */
-        static TBLOB *MakeTBLOB(Pix *pix);
-
-        /**
-         * This method baseline normalizes a TBLOB in-place. The input row is used
-         * for normalization. The denorm is an optional parameter in which the
-         * normalization-antidote is returned.
-         */
-        static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
-
-        /** This method returns the features associated with the input image. */
-        void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
-                                int* num_features, int* feature_outline_index);
-
-        /**
-         * This method returns the row to which a box of specified dimensions would
-         * belong. If no good match is found, it returns nullptr.
-         */
-        static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
-                                  int right, int bottom);
-
-        /**
-         * Method to run adaptive classifier on a blob.
-         * It returns at max num_max_matches results.
-         */
-        void RunAdaptiveClassifier(TBLOB* blob,
-                                   int num_max_matches,
-                                   int* unichar_ids,
-                                   float* ratings,
-                                   int* num_matches_returned);
+  /**
+   * Free up recognition results and any stored image data, without actually
+   * freeing any recognition data that would be time-consuming to reload.
+   * Afterwards, you must call SetImage or TesseractRect before doing
+   * any Recognize or Get* operation.
+   */
+  void Clear();
+
+  /**
+   * Close down tesseract and free up all memory. End() is equivalent to
+   * destructing and reconstructing your TessBaseAPI.
+   * Once End() has been used, none of the other API functions may be used
+   * other than Init and anything declared above it in the class definition.
+   */
+  void End();
+
+  /**
+   * Clear any library-level memory caches.
+   * There are a variety of expensive-to-load constant data structures (mostly
+   * language dictionaries) that are cached globally -- surviving the Init()
+   * and End() of individual TessBaseAPI's.  This function allows the clearing
+   * of these caches.
+   **/
+  static void ClearPersistentCache();
+
+  /**
+   * Check whether a word is valid according to Tesseract's language model
+   * @return 0 if the word is invalid, non-zero if valid.
+   * @warning temporary! This function will be removed from here and placed
+   * in a separate API at some future time.
+   */
+  int IsValidWord(const char *word);
+  // Returns true if utf8_character is defined in the UniCharset.
+  bool IsValidCharacter(const char *utf8_character);
+
+
+  bool GetTextDirection(int* out_offset, float* out_slope);
+
+  /** Sets Dict::letter_is_okay_ function to point to the given function. */
+  void SetDictFunc(DictFunc f);
+
+  /** Sets Dict::probability_in_context_ function to point to the given
+   * function.
+   */
+  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
+
+  /**
+   * Estimates the Orientation And Script of the image.
+   * @return true if the image was processed successfully.
+   */
+  bool DetectOS(OSResults*);
+
+  /**
+   * Return text orientation of each block as determined by an earlier run
+   * of layout analysis.
+   */
+  void GetBlockTextOrientations(int** block_orientation,
+                                bool** vertical_writing);
+
+
+  #ifndef DISABLED_LEGACY_ENGINE
+
+  /** Sets Wordrec::fill_lattice_ function to point to the given function. */
+  void SetFillLatticeFunc(FillLatticeFunc f);
+
+  /** Find lines from the image making the BLOCK_LIST. */
+  BLOCK_LIST* FindLinesCreateBlockList();
+
+  /**
+   * Delete a block list.
+   * This is to keep BLOCK_LIST pointer opaque
+   * and let go of including the other headers.
+   */
+  static void DeleteBlockList(BLOCK_LIST* block_list);
+
+  /** Returns a ROW object created from the input row specification. */
+  static ROW *MakeTessOCRRow(float baseline, float xheight,
+                             float descender, float ascender);
+
+  /** Returns a TBLOB corresponding to the entire input image. */
+  static TBLOB *MakeTBLOB(Pix *pix);
+
+  /**
+   * This method baseline normalizes a TBLOB in-place. The input row is used
+   * for normalization. The denorm is an optional parameter in which the
+   * normalization-antidote is returned.
+   */
+  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
+
+  /** This method returns the features associated with the input image. */
+  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
+                          int* num_features, int* feature_outline_index);
+
+  /**
+   * This method returns the row to which a box of specified dimensions would
+   * belong. If no good match is found, it returns nullptr.
+   */
+  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
+                            int right, int bottom);
+
+  /**
+   * Method to run adaptive classifier on a blob.
+   * It returns at max num_max_matches results.
+   */
+  void RunAdaptiveClassifier(TBLOB* blob,
+                             int num_max_matches,
+                             int* unichar_ids,
+                             float* ratings,
+                             int* num_matches_returned);
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-        /** This method returns the string form of the specified unichar. */
-        const char* GetUnichar(int unichar_id);
-
-        /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-        const Dawg *GetDawg(int i) const;
-
-        /** Return the number of dawgs loaded into tesseract_ object. */
-        int NumDawgs() const;
-
-        Tesseract* tesseract() const { return tesseract_; }
-
-        OcrEngineMode oem() const { return last_oem_requested_; }
-
-        void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
-
-        void set_min_orientation_margin(double margin);
-        /* @} */
-
-    protected:
-
-        /** Common code for setting the image. Returns true if Init has been called. */
-        TESS_LOCAL bool InternalSetImage();
-
-        /**
-         * Run the thresholder to make the thresholded image. If pix is not nullptr,
-         * the source is thresholded to pix instead of the internal IMAGE.
-         */
-        TESS_LOCAL virtual bool Threshold(Pix** pix);
-
-        /**
-         * Find lines from the image making the BLOCK_LIST.
-         * @return 0 on success.
-         */
-        TESS_LOCAL int FindLines();
-
-        /** Delete the pageres and block list ready for a new page. */
-        void ClearResults();
-
-        /**
-         * Return an LTR Result Iterator -- used only for training, as we really want
-         * to ignore all BiDi smarts at that point.
-         * delete once you're done with it.
-         */
-        TESS_LOCAL LTRResultIterator* GetLTRIterator();
-
-        /**
-         * Return the length of the output text string, as UTF8, assuming
-         * one newline per line and one per block, with a terminator,
-         * and assuming a single character reject marker for each rejected character.
-         * Also return the number of recognized blobs in blob_count.
-         */
-        TESS_LOCAL int TextLength(int* blob_count);
-
-        //// paragraphs.cpp ////////////////////////////////////////////////////
-        TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-        /** @defgroup ocropusAddOns ocropus add-ons */
-        /* @{ */
-
-        /**
-         * Adapt to recognize the current image as the given character.
-         * The image must be preloaded and be just an image of a single character.
-         */
-        TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
-                                         int length,
-                                         float baseline,
-                                         float xheight,
-                                         float descender,
-                                         float ascender);
-
-        /** Recognize text doing one pass only, using settings for a given pass. */
-        TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
-
-        TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
-                                              PAGE_RES* pass1_result);
-
-        /**
-         * Extract the OCR results, costs (penalty points for uncertainty),
-         * and the bounding boxes of the characters.
-         */
-        TESS_LOCAL static int TesseractExtractResult(char** text,
-                                                     int** lengths,
-                                                     float** costs,
-                                                     int** x0,
-                                                     int** y0,
-                                                     int** x1,
-                                                     int** y1,
-                                                     PAGE_RES* page_res);
-
-        TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
-        /* @} */
+  /** This method returns the string form of the specified unichar. */
+  const char* GetUnichar(int unichar_id);
+
+  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
+  const Dawg *GetDawg(int i) const;
+
+  /** Return the number of dawgs loaded into tesseract_ object. */
+  int NumDawgs() const;
+
+  Tesseract* tesseract() const { return tesseract_; }
+
+  OcrEngineMode oem() const { return last_oem_requested_; }
+
+  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
+
+  void set_min_orientation_margin(double margin);
+ /* @} */
+
+ protected:
+
+  /** Common code for setting the image. Returns true if Init has been called. */
+  TESS_LOCAL bool InternalSetImage();
+
+  /**
+   * Run the thresholder to make the thresholded image. If pix is not nullptr,
+   * the source is thresholded to pix instead of the internal IMAGE.
+   */
+  TESS_LOCAL virtual bool Threshold(Pix** pix);
+
+  /**
+   * Find lines from the image making the BLOCK_LIST.
+   * @return 0 on success.
+   */
+  TESS_LOCAL int FindLines();
+
+  /** Delete the pageres and block list ready for a new page. */
+  void ClearResults();
+
+  /**
+   * Return an LTR Result Iterator -- used only for training, as we really want
+   * to ignore all BiDi smarts at that point.
+   * delete once you're done with it.
+   */
+  TESS_LOCAL LTRResultIterator* GetLTRIterator();
+
+  /**
+   * Return the length of the output text string, as UTF8, assuming
+   * one newline per line and one per block, with a terminator,
+   * and assuming a single character reject marker for each rejected character.
+   * Also return the number of recognized blobs in blob_count.
+   */
+  TESS_LOCAL int TextLength(int* blob_count);
+
+  //// paragraphs.cpp ////////////////////////////////////////////////////
+  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
+
+  #ifndef DISABLED_LEGACY_ENGINE
+
+  /** @defgroup ocropusAddOns ocropus add-ons */
+  /* @{ */
+
+  /**
+   * Adapt to recognize the current image as the given character.
+   * The image must be preloaded and be just an image of a single character.
+   */
+  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
+                                   int length,
+                                   float baseline,
+                                   float xheight,
+                                   float descender,
+                                   float ascender);
+
+  /** Recognize text doing one pass only, using settings for a given pass. */
+  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
+
+  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
+                                        PAGE_RES* pass1_result);
+
+  /**
+   * Extract the OCR results, costs (penalty points for uncertainty),
+   * and the bounding boxes of the characters.
+   */
+  TESS_LOCAL static int TesseractExtractResult(char** text,
+                                    int** lengths,
+                                    float** costs,
+                                    int** x0,
+                                    int** y0,
+                                    int** x1,
+                                    int** y1,
+                                    PAGE_RES* page_res);
+
+  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
+  /* @} */
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
-    protected:
-        Tesseract*        tesseract_;       ///< The underlying data object.
-        Tesseract*        osd_tesseract_;   ///< For orientation & script detection.
-        EquationDetect*   equ_detect_;      ///<The equation detector.
-        FileReader reader_;                 ///< Reads files from any filesystem.
-        ImageThresholder* thresholder_;     ///< Image thresholding module.
-        GenericVector<ParagraphModel *>* paragraph_models_;
-        BLOCK_LIST*       block_list_;      ///< The page layout.
-        PAGE_RES*         page_res_;        ///< The page-level data.
-        STRING*           input_file_;      ///< Name used by training code.
-        STRING*           output_file_;     ///< Name used by debug code.
-        STRING*           datapath_;        ///< Current location of tessdata.
-        STRING*           language_;        ///< Last initialized language.
-        OcrEngineMode last_oem_requested_;  ///< Last ocr language mode requested.
-        bool          recognition_done_;   ///< page_res_ contains recognition data.
-        TruthCallback *truth_cb_;           /// fxn for setting truth_* in WERD_RES
-
-        /**
-         * @defgroup ThresholderParams Thresholder Parameters
-         * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-         */
-        /* @{ */
-        int rect_left_;
-        int rect_top_;
-        int rect_width_;
-        int rect_height_;
-        int image_width_;
-        int image_height_;
-        /* @} */
-
-    private:
-        // A list of image filenames gets special consideration
-        bool ProcessPagesFileList(FILE *fp,
-                                  STRING *buf,
-                                  const char* retry_config, int timeout_millisec,
-                                  TessResultRenderer* renderer,
-                                  int tessedit_page_number);
-        // TIFF supports multipage so gets special consideration.
-        bool ProcessPagesMultipageTiff(const unsigned char *data,
-                                       size_t size,
-                                       const char* filename,
-                                       const char* retry_config,
-                                       int timeout_millisec,
-                                       TessResultRenderer* renderer,
-                                       int tessedit_page_number);
-        // There's currently no way to pass a document title from the
-        // Tesseract command line, and we have multiple places that choose
-        // to set the title to an empty string. Using a single named
-        // variable will hopefully reduce confusion if the situation changes
-        // in the future.
-        const char *unknown_title_ = "";
-    };  // class TessBaseAPI.
+ protected:
+  Tesseract*        tesseract_;       ///< The underlying data object.
+  Tesseract*        osd_tesseract_;   ///< For orientation & script detection.
+  EquationDetect*   equ_detect_;      ///<The equation detector.
+  FileReader reader_;                 ///< Reads files from any filesystem.
+  ImageThresholder* thresholder_;     ///< Image thresholding module.
+  GenericVector<ParagraphModel *>* paragraph_models_;
+  BLOCK_LIST*       block_list_;      ///< The page layout.
+  PAGE_RES*         page_res_;        ///< The page-level data.
+  STRING*           input_file_;      ///< Name used by training code.
+  STRING*           output_file_;     ///< Name used by debug code.
+  STRING*           datapath_;        ///< Current location of tessdata.
+  STRING*           language_;        ///< Last initialized language.
+  OcrEngineMode last_oem_requested_;  ///< Last ocr language mode requested.
+  bool          recognition_done_;   ///< page_res_ contains recognition data.
+  TruthCallback *truth_cb_;           /// fxn for setting truth_* in WERD_RES
+
+  /**
+   * @defgroup ThresholderParams Thresholder Parameters
+   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
+   */
+  /* @{ */
+  int rect_left_;
+  int rect_top_;
+  int rect_width_;
+  int rect_height_;
+  int image_width_;
+  int image_height_;
+  /* @} */
+
+ private:
+  // A list of image filenames gets special consideration
+  bool ProcessPagesFileList(FILE *fp,
+                            STRING *buf,
+                            const char* retry_config, int timeout_millisec,
+                            TessResultRenderer* renderer,
+                            int tessedit_page_number);
+  // TIFF supports multipage so gets special consideration.
+  bool ProcessPagesMultipageTiff(const unsigned char *data,
+                                 size_t size,
+                                 const char* filename,
+                                 const char* retry_config,
+                                 int timeout_millisec,
+                                 TessResultRenderer* renderer,
+                                 int tessedit_page_number);
+  // There's currently no way to pass a document title from the
+  // Tesseract command line, and we have multiple places that choose
+  // to set the title to an empty string. Using a single named
+  // variable will hopefully reduce confusion if the situation changes
+  // in the future.
+  const char *unknown_title_ = "";
+};  // class TessBaseAPI.
 
 /** Escape a char string - remove &<>"' with HTML codes. */
-    STRING HOcrEscape(const char* text);
+STRING HOcrEscape(const char* text);
 }  // namespace tesseract.
 
 #endif  // TESSERACT_API_BASEAPI_H_
diff --git a/src/api/capi.cpp b/src/api/capi.cpp
index 1bbf621c25..333bbcd3fe 100644
--- a/src/api/capi.cpp
+++ b/src/api/capi.cpp
@@ -244,9 +244,9 @@ TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, cons
 }
 
 TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language,
-                                        TessOcrEngineMode mode, char** configs, int configs_size,
-                                        char** vars_vec, char** vars_values, size_t vars_vec_size,
-                                        BOOL set_only_non_debug_params)
+    TessOcrEngineMode mode, char** configs, int configs_size,
+    char** vars_vec, char** vars_values, size_t vars_vec_size,
+    BOOL set_only_non_debug_params)
 {
     GenericVector<STRING> varNames;
     GenericVector<STRING> varValues;
@@ -337,8 +337,8 @@ TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI*
 }
 
 TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
-                                         int bytes_per_pixel, int bytes_per_line,
-                                         int left, int top, int width, int height)
+                                               int bytes_per_pixel, int bytes_per_line,
+                                               int left, int top, int width, int height)
 {
     return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height);
 }
@@ -351,7 +351,7 @@ TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle)
 #endif
 
 TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height,
-                                            int bytes_per_pixel, int bytes_per_line)
+                                                  int bytes_per_pixel, int bytes_per_line)
 {
     handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
 }
@@ -392,7 +392,7 @@ TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, str
 }
 
 TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
-                                                         struct Pixa** pixa, int** blockids, int** paraids)
+                                                                  struct Pixa** pixa, int** blockids, int** paraids)
 {
     return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids);
 }
@@ -550,7 +550,7 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand
 #ifndef DISABLED_LEGACY_ENGINE
 
 TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
-                                                           int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
+                                                            int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
 {
     bool success;
     success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
@@ -558,7 +558,7 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
 }
 
 TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
-                                                      int* num_features, int* FeatureOutlineIndex)
+                                                            int* num_features, int* FeatureOutlineIndex)
 {
     handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex);
 }
@@ -569,7 +569,7 @@ TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
 }
 
 TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches,
-                                                         int* unichar_ids, float* ratings, int* num_matches_returned)
+                                                               int* unichar_ids, float* ratings, int* num_matches_returned)
 {
     handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned);
 }
@@ -661,13 +661,13 @@ TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator*
 }
 
 TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level,
-                                                         TessPageIteratorLevel element)
+                                                               TessPageIteratorLevel element)
 {
     return handle->IsAtFinalElement(level, element) ? TRUE : FALSE;
 }
 
 TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
-                                                    int* left, int* top, int* right, int* bottom)
+                                                          int* left, int* top, int* right, int* bottom)
 {
     return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE;
 }
@@ -689,14 +689,14 @@ TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator*
 }
 
 TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
-                                                 int* x1, int* y1, int* x2, int* y2)
+                                                       int* x1, int* y1, int* x2, int* y2)
 {
     return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE;
 }
 
 TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation,
-                                                    TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
-                                                    float* deskew_angle)
+                                                          TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
+                                                          float* deskew_angle)
 {
     handle->Orientation(orientation, writing_direction, textline_order, deskew_angle);
 }
@@ -759,8 +759,8 @@ TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const T
 }
 
 TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
-                                                                    BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
-                                                                    BOOL* is_smallcaps, int* pointsize, int* font_id)
+                                                                          BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
+                                                                          BOOL* is_smallcaps, int* pointsize, int* font_id)
 {
     bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps;
     const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif,
diff --git a/src/api/capi.h b/src/api/capi.h
index 8f999e8536..5101fdcf77 100644
--- a/src/api/capi.h
+++ b/src/api/capi.h
@@ -118,7 +118,7 @@ struct Pixa;
 /* General free functions */
 
 TESS_API const char*
-TESS_CALL TessVersion();
+               TESS_CALL TessVersion();
 TESS_API void  TESS_CALL TessDeleteText(char* text);
 TESS_API void  TESS_CALL TessDeleteTextArray(char** arr);
 TESS_API void  TESS_CALL TessDeleteIntArray(int* arr);
@@ -136,7 +136,7 @@ TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* out
 TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer);
 TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next);
 TESS_API TessResultRenderer*
-TESS_CALL TessResultRendererNext(TessResultRenderer* renderer);
+              TESS_CALL TessResultRendererNext(TessResultRenderer* renderer);
 TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title);
 TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api);
 TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer);
@@ -148,7 +148,7 @@ TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer);
 /* Base API */
 
 TESS_API TessBaseAPI*
-TESS_CALL TessBaseAPICreate();
+               TESS_CALL TessBaseAPICreate();
 TESS_API void  TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle);
 
 TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device);
@@ -171,7 +171,7 @@ TESS_API BOOL  TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, co
 TESS_API BOOL  TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value);
 TESS_API BOOL  TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value);
 TESS_API const char*
-TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
+               TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
 
 TESS_API void  TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp);
 TESS_API BOOL  TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename);
@@ -193,16 +193,16 @@ TESS_API int   TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datap
 TESS_API int   TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language);
 
 TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode,
-                                        char** configs, int configs_size,
-                                        char** vars_vec, char** vars_values, size_t vars_vec_size,
-                                        BOOL set_only_non_debug_params);
+    char** configs, int configs_size,
+    char** vars_vec, char** vars_values, size_t vars_vec_size,
+    BOOL set_only_non_debug_params);
 
 TESS_API const char*
-TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
 TESS_API char**
-TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
 TESS_API char**
-TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
 
 TESS_API int   TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language);
 TESS_API void  TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
@@ -212,7 +212,7 @@ TESS_API void  TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, con
 
 TESS_API void  TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode);
 TESS_API TessPageSegMode
-TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
 
 TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
                                          int bytes_per_pixel, int bytes_per_line,
@@ -233,32 +233,32 @@ TESS_API void  TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImag
 #endif
 
 TESS_API struct Pix*
-TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa);
+               TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
+               TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
-                                   struct Pixa** pixa, int** blockids, int** paraids);
+               TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
+                                                  struct Pixa** pixa, int** blockids, int** paraids);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
+               TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa);
+               TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
+               TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
-                                        struct Pixa** pixa, int** blockids);
+               TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+                                                       struct Pixa** pixa, int** blockids);
 TESS_API struct Boxa*
-TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
-                                         const BOOL raw_image, const int raw_padding,
-                                         struct Pixa** pixa, int** blockids, int** paraids);
+               TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+                                                        const BOOL raw_image, const int raw_padding,
+                                                        struct Pixa** pixa, int** blockids, int** paraids);
 
 TESS_API int   TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle);
 
 TESS_API TessPageIterator*
-TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
 
 TESS_API int   TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
 
@@ -269,12 +269,12 @@ TESS_API int   TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ET
 TESS_API BOOL  TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle,  const char* filename, const char* retry_config,
                                                  int timeout_millisec, TessResultRenderer* renderer);
 TESS_API BOOL  TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename,
-                                                const char* retry_config, int timeout_millisec, TessResultRenderer* renderer);
+                                               const char* retry_config, int timeout_millisec, TessResultRenderer* renderer);
 
 TESS_API TessResultIterator*
-TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle);
 TESS_API TessMutableIterator*
-TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
 
 TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
 TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
@@ -314,19 +314,19 @@ TESS_API BOOL  TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
 #endif  // def TESS_CAPI_INCLUDE_BASEAPI
 
 TESS_API const char*
-TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
+               TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
 
 TESS_API void  TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);
 
 #ifdef TESS_CAPI_INCLUDE_BASEAPI
 
 TESS_API const TessDawg*
-TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
+               TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
 
 TESS_API int   TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle);
 
 TESS_API TessOcrEngineMode
-TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
 
 TESS_API void  TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb);
 
@@ -339,7 +339,7 @@ TESS_API void  TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, i
 TESS_API void  TESS_CALL TessPageIteratorDelete(TessPageIterator* handle);
 
 TESS_API TessPageIterator*
-TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle);
+               TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle);
 
 TESS_API void  TESS_CALL TessPageIteratorBegin(TessPageIterator* handle);
 
@@ -354,14 +354,14 @@ TESS_API BOOL  TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* han
                                                      int* left, int* top, int* right, int* bottom);
 
 TESS_API TessPolyBlockType
-TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle);
+               TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle);
 
 TESS_API struct Pix*
-TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level);
+               TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level);
 
 TESS_API struct Pix*
-TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
-                                   struct Pix* original_image, int* left, int* top);
+               TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
+                                                  struct Pix* original_image, int* left, int* top);
 
 TESS_API BOOL  TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
                                                   int* x1, int* y1, int* x2, int* y2);
@@ -377,23 +377,23 @@ TESS_API void  TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle,
 
 TESS_API void  TESS_CALL TessResultIteratorDelete(TessResultIterator* handle);
 TESS_API TessResultIterator*
-TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle);
+               TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle);
 TESS_API TessPageIterator*
-TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
+               TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
 TESS_API const TessPageIterator*
-TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
+               TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
 TESS_API TessChoiceIterator*
-TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
+               TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
 
 TESS_API BOOL  TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level);
 TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level);
 TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level);
 TESS_API const char*
-TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
+               TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
 TESS_API const char*
-TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
-                                               BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
-                                               BOOL* is_smallcaps, int* pointsize, int* font_id);
+               TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
+                                                              BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
+                                                              BOOL* is_smallcaps, int* pointsize, int* font_id);
 
 TESS_API BOOL  TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
 TESS_API BOOL  TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
@@ -434,12 +434,12 @@ TESS_API void  TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, T
 TESS_API ROW*  TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender);
 
 TESS_API TBLOB*
-TESS_CALL TessMakeTBLOB(Pix* pix);
+               TESS_CALL TessMakeTBLOB(Pix* pix);
 
 TESS_API void  TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
 
 TESS_API BLOCK_LIST*
-TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
+               TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
 
 TESS_API void  TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
 
diff --git a/src/api/renderer.cpp b/src/api/renderer.cpp
index c4c24e032f..af31be8e59 100644
--- a/src/api/renderer.cpp
+++ b/src/api/renderer.cpp
@@ -30,250 +30,250 @@ namespace tesseract {
 /**********************************************************************
  * Base Renderer interface implementation
  **********************************************************************/
-    TessResultRenderer::TessResultRenderer(const char *outputbase,
-                                           const char* extension)
-            : file_extension_(extension),
-              title_(""), imagenum_(-1),
-              fout_(stdout),
-              next_(nullptr),
-              happy_(true) {
-        if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
-            STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
-            fout_ = fopen(outfile.string(), "wb");
-            if (fout_ == nullptr) {
-                happy_ = false;
-            }
-        }
+TessResultRenderer::TessResultRenderer(const char *outputbase,
+                                       const char* extension)
+    : file_extension_(extension),
+      title_(""), imagenum_(-1),
+      fout_(stdout),
+      next_(nullptr),
+      happy_(true) {
+  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
+    STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
+    fout_ = fopen(outfile.string(), "wb");
+    if (fout_ == nullptr) {
+      happy_ = false;
     }
-
-    TessResultRenderer::~TessResultRenderer() {
-        if (fout_ != nullptr) {
-            if (fout_ != stdout)
-                fclose(fout_);
-            else
-                clearerr(fout_);
-        }
-        delete next_;
-    }
-
-    void TessResultRenderer::insert(TessResultRenderer* next) {
-        if (next == nullptr) return;
-
-        TessResultRenderer* remainder = next_;
-        next_ = next;
-        if (remainder) {
-            while (next->next_ != nullptr) {
-                next = next->next_;
-            }
-            next->next_ = remainder;
-        }
-    }
-
-    bool TessResultRenderer::BeginDocument(const char* title) {
-        if (!happy_) return false;
-        title_ = title;
-        imagenum_ = -1;
-        bool ok = BeginDocumentHandler();
-        if (next_) {
-            ok = next_->BeginDocument(title) && ok;
-        }
-        return ok;
-    }
-
-    bool TessResultRenderer::AddImage(TessBaseAPI* api) {
-        if (!happy_) return false;
-        ++imagenum_;
-        bool ok = AddImageHandler(api);
-        if (next_) {
-            ok = next_->AddImage(api) && ok;
-        }
-        return ok;
-    }
-
-    bool TessResultRenderer::EndDocument() {
-        if (!happy_) return false;
-        bool ok = EndDocumentHandler();
-        if (next_) {
-            ok = next_->EndDocument() && ok;
-        }
-        return ok;
-    }
-
-    void TessResultRenderer::AppendString(const char* s) {
-        AppendData(s, strlen(s));
-    }
-
-    void TessResultRenderer::AppendData(const char* s, int len) {
-        if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
-    }
-
-    bool TessResultRenderer::BeginDocumentHandler() {
-        return happy_;
-    }
-
-    bool TessResultRenderer::EndDocumentHandler() {
-        return happy_;
+  }
+}
+
+TessResultRenderer::~TessResultRenderer() {
+  if (fout_ != nullptr) {
+    if (fout_ != stdout)
+      fclose(fout_);
+    else
+      clearerr(fout_);
+  }
+  delete next_;
+}
+
+void TessResultRenderer::insert(TessResultRenderer* next) {
+  if (next == nullptr) return;
+
+  TessResultRenderer* remainder = next_;
+  next_ = next;
+  if (remainder) {
+    while (next->next_ != nullptr) {
+      next = next->next_;
     }
+    next->next_ = remainder;
+  }
+}
+
+bool TessResultRenderer::BeginDocument(const char* title) {
+  if (!happy_) return false;
+  title_ = title;
+  imagenum_ = -1;
+  bool ok = BeginDocumentHandler();
+  if (next_) {
+    ok = next_->BeginDocument(title) && ok;
+  }
+  return ok;
+}
+
+bool TessResultRenderer::AddImage(TessBaseAPI* api) {
+  if (!happy_) return false;
+  ++imagenum_;
+  bool ok = AddImageHandler(api);
+  if (next_) {
+    ok = next_->AddImage(api) && ok;
+  }
+  return ok;
+}
+
+bool TessResultRenderer::EndDocument() {
+  if (!happy_) return false;
+  bool ok = EndDocumentHandler();
+  if (next_) {
+    ok = next_->EndDocument() && ok;
+  }
+  return ok;
+}
+
+void TessResultRenderer::AppendString(const char* s) {
+  AppendData(s, strlen(s));
+}
+
+void TessResultRenderer::AppendData(const char* s, int len) {
+  if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
+}
+
+bool TessResultRenderer::BeginDocumentHandler() {
+  return happy_;
+}
+
+bool TessResultRenderer::EndDocumentHandler() {
+  return happy_;
+}
 
 
 /**********************************************************************
  * UTF8 Text Renderer interface implementation
  **********************************************************************/
-    TessTextRenderer::TessTextRenderer(const char *outputbase)
-            : TessResultRenderer(outputbase, "txt") {
-    }
+TessTextRenderer::TessTextRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "txt") {
+}
 
-    bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
-        const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
-        if (utf8 == nullptr) {
-            return false;
-        }
+bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
+  const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
+  if (utf8 == nullptr) {
+    return false;
+  }
 
-        AppendString(utf8.get());
+  AppendString(utf8.get());
 
-        const char* pageSeparator = api->GetStringVariable("page_separator");
-        if (pageSeparator != nullptr && *pageSeparator != '\0') {
-            AppendString(pageSeparator);
-        }
+  const char* pageSeparator = api->GetStringVariable("page_separator");
+  if (pageSeparator != nullptr && *pageSeparator != '\0') {
+    AppendString(pageSeparator);
+  }
 
-        return true;
-    }
+  return true;
+}
 
 /**********************************************************************
  * HOcr Text Renderer interface implementation
  **********************************************************************/
-    TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
-            : TessResultRenderer(outputbase, "hocr") {
-        font_info_ = false;
-    }
-
-    TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
-            : TessResultRenderer(outputbase, "hocr") {
-        font_info_ = font_info;
-    }
-
-    bool TessHOcrRenderer::BeginDocumentHandler() {
-        AppendString(
-                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-                "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
-                "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
-                "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
-                "lang=\"en\">\n <head>\n  <title>");
-        AppendString(title());
-        AppendString(
-                "</title>\n"
-                "<meta http-equiv=\"Content-Type\" content=\"text/html;"
-                "charset=utf-8\" />\n"
-                "  <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
-                "' />\n"
-                "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
-                " ocr_line ocrx_word ocrp_wconf");
-        if (font_info_)
-            AppendString(
-                    " ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
-        AppendString(
-                "'/>\n"
-                "</head>\n<body>\n");
-
-        return true;
-    }
-
-    bool TessHOcrRenderer::EndDocumentHandler() {
-        AppendString(" </body>\n</html>\n");
-
-        return true;
-    }
-
-    bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
-        const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
-        if (hocr == nullptr) return false;
-
-        AppendString(hocr.get());
-
-        return true;
-    }
+TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "hocr") {
+    font_info_ = false;
+}
+
+TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
+    : TessResultRenderer(outputbase, "hocr") {
+    font_info_ = font_info;
+}
+
+bool TessHOcrRenderer::BeginDocumentHandler() {
+  AppendString(
+      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+      "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
+      "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+      "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
+      "lang=\"en\">\n <head>\n  <title>");
+  AppendString(title());
+  AppendString(
+      "</title>\n"
+      "<meta http-equiv=\"Content-Type\" content=\"text/html;"
+      "charset=utf-8\" />\n"
+      "  <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
+              "' />\n"
+      "  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
+      " ocr_line ocrx_word ocrp_wconf");
+  if (font_info_)
+    AppendString(
+      " ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
+  AppendString(
+      "'/>\n"
+      "</head>\n<body>\n");
+
+  return true;
+}
+
+bool TessHOcrRenderer::EndDocumentHandler() {
+  AppendString(" </body>\n</html>\n");
+
+  return true;
+}
+
+bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
+  const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
+  if (hocr == nullptr) return false;
+
+  AppendString(hocr.get());
+
+  return true;
+}
 
 /**********************************************************************
  * TSV Text Renderer interface implementation
  **********************************************************************/
-    TessTsvRenderer::TessTsvRenderer(const char* outputbase)
-            : TessResultRenderer(outputbase, "tsv") {
-        font_info_ = false;
-    }
+TessTsvRenderer::TessTsvRenderer(const char* outputbase)
+    : TessResultRenderer(outputbase, "tsv") {
+  font_info_ = false;
+}
 
-    TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
-            : TessResultRenderer(outputbase, "tsv") {
-        font_info_ = font_info;
-    }
+TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
+    : TessResultRenderer(outputbase, "tsv") {
+  font_info_ = font_info;
+}
 
-    bool TessTsvRenderer::BeginDocumentHandler() {
-        // Output TSV column headings
-        AppendString(
-                "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
-                "num\tleft\ttop\twidth\theight\tconf\ttext\n");
-        return true;
-    }
+bool TessTsvRenderer::BeginDocumentHandler() {
+  // Output TSV column headings
+  AppendString(
+      "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
+      "num\tleft\ttop\twidth\theight\tconf\ttext\n");
+  return true;
+}
 
-    bool TessTsvRenderer::EndDocumentHandler() { return true; }
+bool TessTsvRenderer::EndDocumentHandler() { return true; }
 
-    bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
-        const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
-        if (tsv == nullptr) return false;
+bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
+  const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
+  if (tsv == nullptr) return false;
 
-        AppendString(tsv.get());
+  AppendString(tsv.get());
 
-        return true;
-    }
+  return true;
+}
 
 /**********************************************************************
  * UNLV Text Renderer interface implementation
  **********************************************************************/
-    TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
-            : TessResultRenderer(outputbase, "unlv") {
-    }
+TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "unlv") {
+}
 
-    bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
-        const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
-        if (unlv == nullptr) return false;
+bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
+  const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
+  if (unlv == nullptr) return false;
 
-        AppendString(unlv.get());
+  AppendString(unlv.get());
 
-        return true;
-    }
+  return true;
+}
 
 /**********************************************************************
  * BoxText Renderer interface implementation
  **********************************************************************/
-    TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
-            : TessResultRenderer(outputbase, "box") {
-    }
+TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
+    : TessResultRenderer(outputbase, "box") {
+}
 
-    bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
-        const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
-        if (text == nullptr) return false;
+bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
+  const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
+  if (text == nullptr) return false;
 
-        AppendString(text.get());
+  AppendString(text.get());
 
-        return true;
-    }
+  return true;
+}
 
 #ifndef DISABLED_LEGACY_ENGINE
 
 /**********************************************************************
  * Osd Text Renderer interface implementation
  **********************************************************************/
-    TessOsdRenderer::TessOsdRenderer(const char* outputbase)
-            : TessResultRenderer(outputbase, "osd") {}
+TessOsdRenderer::TessOsdRenderer(const char* outputbase)
+    : TessResultRenderer(outputbase, "osd") {}
 
-    bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
-        char* osd = api->GetOsdText(imagenum());
-        if (osd == nullptr) return false;
+bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
+  char* osd = api->GetOsdText(imagenum());
+  if (osd == nullptr) return false;
 
-        AppendString(osd);
-        delete[] osd;
+  AppendString(osd);
+  delete[] osd;
 
-        return true;
-    }
+  return true;
+}
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
diff --git a/src/api/renderer.h b/src/api/renderer.h
index cb91f3e005..5c31e4b848 100644
--- a/src/api/renderer.h
+++ b/src/api/renderer.h
@@ -27,7 +27,7 @@
 
 namespace tesseract {
 
-    class TessBaseAPI;
+class TessBaseAPI;
 
 /**
  * Interface for rendering tesseract results into a document, such as text,
@@ -42,129 +42,129 @@ namespace tesseract {
  * renderers can manage the associated state needed for the specific formats
  * in addition to the heuristics for producing it.
  */
-    class TESS_API TessResultRenderer {
-    public:
-        virtual ~TessResultRenderer();
-
-        // Takes ownership of pointer so must be new'd instance.
-        // Renderers aren't ordered, but appends the sequences of next parameter
-        // and existing next(). The renderers should be unique across both lists.
-        void insert(TessResultRenderer* next);
-
-        // Returns the next renderer or nullptr.
-        TessResultRenderer* next() { return next_; }
-
-        /**
-         * Starts a new document with the given title.
-         * This clears the contents of the output data.
-         * Title should use UTF-8 encoding.
-         */
-        bool BeginDocument(const char* title);
-
-        /**
-         * Adds the recognized text from the source image to the current document.
-         * Invalid if BeginDocument not yet called.
-         *
-         * Note that this API is a bit weird but is designed to fit into the
-         * current TessBaseAPI implementation where the api has lots of state
-         * information that we might want to add in.
-         */
-        bool AddImage(TessBaseAPI* api);
-
-        /**
-         * Finishes the document and finalizes the output data
-         * Invalid if BeginDocument not yet called.
-         */
-        bool EndDocument();
-
-        const char* file_extension() const { return file_extension_; }
-        const char* title() const { return title_.c_str(); }
-
-        // Is everything fine? Otherwise something went wrong.
-        bool happy() { return happy_; }
-
-        /**
-         * Returns the index of the last image given to AddImage
-         * (i.e. images are incremented whether the image succeeded or not)
-         *
-         * This is always defined. It means either the number of the
-         * current image, the last image ended, or in the completed document
-         * depending on when in the document lifecycle you are looking at it.
-         * Will return -1 if a document was never started.
-         */
-        int imagenum() const { return imagenum_; }
-
-    protected:
-        /**
-         * Called by concrete classes.
-         *
-         * outputbase is the name of the output file excluding
-         * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-         *
-         * extension indicates the file extension to be used for output
-         * files. For example "pdf" will produce a .pdf file, and "hocr"
-         * will produce .hocr files.
-         */
-        TessResultRenderer(const char *outputbase,
-                           const char* extension);
-
-        // Hook for specialized handling in BeginDocument()
-        virtual bool BeginDocumentHandler();
-
-        // This must be overridden to render the OCR'd results
-        virtual bool AddImageHandler(TessBaseAPI* api) = 0;
-
-        // Hook for specialized handling in EndDocument()
-        virtual bool EndDocumentHandler();
-
-        // Renderers can call this to append '\0' terminated strings into
-        // the output string returned by GetOutput.
-        // This method will grow the output buffer if needed.
-        void AppendString(const char* s);
-
-        // Renderers can call this to append binary byte sequences into
-        // the output string returned by GetOutput. Note that s is not necessarily
-        // '\0' terminated (and can contain '\0' within it).
-        // This method will grow the output buffer if needed.
-        void AppendData(const char* s, int len);
-
-    private:
-        const char* file_extension_;  // standard extension for generated output
-        STRING title_;                // title of document being renderered
-        int imagenum_;                // index of last image added
-
-        FILE* fout_;                  // output file pointer
-        TessResultRenderer* next_;    // Can link multiple renderers together
-        bool happy_;                  // I get grumpy when the disk fills up, etc.
-    };
+class TESS_API TessResultRenderer {
+  public:
+    virtual ~TessResultRenderer();
+
+    // Takes ownership of pointer so must be new'd instance.
+    // Renderers aren't ordered, but appends the sequences of next parameter
+    // and existing next(). The renderers should be unique across both lists.
+    void insert(TessResultRenderer* next);
+
+    // Returns the next renderer or nullptr.
+    TessResultRenderer* next() { return next_; }
+
+    /**
+     * Starts a new document with the given title.
+     * This clears the contents of the output data.
+     * Title should use UTF-8 encoding.
+     */
+    bool BeginDocument(const char* title);
+
+    /**
+     * Adds the recognized text from the source image to the current document.
+     * Invalid if BeginDocument not yet called.
+     *
+     * Note that this API is a bit weird but is designed to fit into the
+     * current TessBaseAPI implementation where the api has lots of state
+     * information that we might want to add in.
+     */
+    bool AddImage(TessBaseAPI* api);
+
+    /**
+     * Finishes the document and finalizes the output data
+     * Invalid if BeginDocument not yet called.
+     */
+    bool EndDocument();
+
+    const char* file_extension() const { return file_extension_; }
+    const char* title() const { return title_.c_str(); }
+
+    // Is everything fine? Otherwise something went wrong.
+    bool happy() { return happy_; }
+
+    /**
+     * Returns the index of the last image given to AddImage
+     * (i.e. images are incremented whether the image succeeded or not)
+     *
+     * This is always defined. It means either the number of the
+     * current image, the last image ended, or in the completed document
+     * depending on when in the document lifecycle you are looking at it.
+     * Will return -1 if a document was never started.
+     */
+    int imagenum() const { return imagenum_; }
+
+  protected:
+    /**
+     * Called by concrete classes.
+     *
+     * outputbase is the name of the output file excluding
+     * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
+     *
+     * extension indicates the file extension to be used for output
+     * files. For example "pdf" will produce a .pdf file, and "hocr"
+     * will produce .hocr files.
+     */
+    TessResultRenderer(const char *outputbase,
+                       const char* extension);
+
+    // Hook for specialized handling in BeginDocument()
+    virtual bool BeginDocumentHandler();
+
+    // This must be overridden to render the OCR'd results
+    virtual bool AddImageHandler(TessBaseAPI* api) = 0;
+
+    // Hook for specialized handling in EndDocument()
+    virtual bool EndDocumentHandler();
+
+    // Renderers can call this to append '\0' terminated strings into
+    // the output string returned by GetOutput.
+    // This method will grow the output buffer if needed.
+    void AppendString(const char* s);
+
+    // Renderers can call this to append binary byte sequences into
+    // the output string returned by GetOutput. Note that s is not necessarily
+    // '\0' terminated (and can contain '\0' within it).
+    // This method will grow the output buffer if needed.
+    void AppendData(const char* s, int len);
+
+  private:
+    const char* file_extension_;  // standard extension for generated output
+    STRING title_;                // title of document being renderered
+    int imagenum_;                // index of last image added
+
+    FILE* fout_;                  // output file pointer
+    TessResultRenderer* next_;    // Can link multiple renderers together
+    bool happy_;                  // I get grumpy when the disk fills up, etc.
+};
 
 /**
  * Renders tesseract output into a plain UTF-8 text string
  */
-    class TESS_API TessTextRenderer : public TessResultRenderer {
-    public:
-        explicit TessTextRenderer(const char *outputbase);
+class TESS_API TessTextRenderer : public TessResultRenderer {
+ public:
+  explicit TessTextRenderer(const char *outputbase);
 
-    protected:
-        virtual bool AddImageHandler(TessBaseAPI* api);
-    };
+ protected:
+  virtual bool AddImageHandler(TessBaseAPI* api);
+};
 
 /**
  * Renders tesseract output into an hocr text string
  */
-    class TESS_API TessHOcrRenderer : public TessResultRenderer {
-    public:
-        explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-        explicit TessHOcrRenderer(const char *outputbase);
+class TESS_API TessHOcrRenderer : public TessResultRenderer {
+ public:
+  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
+  explicit TessHOcrRenderer(const char *outputbase);
 
-    protected:
-        virtual bool BeginDocumentHandler();
-        virtual bool AddImageHandler(TessBaseAPI* api);
-        virtual bool EndDocumentHandler();
+ protected:
+  virtual bool BeginDocumentHandler();
+  virtual bool AddImageHandler(TessBaseAPI* api);
+  virtual bool EndDocumentHandler();
 
-    private:
-        bool font_info_;  // whether to print font information
-    };
+ private:
+  bool font_info_;  // whether to print font information
+};
 
 /**
  * Renders tesseract output into an alto text string
@@ -183,91 +183,91 @@ namespace tesseract {
 /**
  * Renders Tesseract output into a TSV string
  */
-    class TESS_API TessTsvRenderer : public TessResultRenderer {
-    public:
-        explicit TessTsvRenderer(const char* outputbase, bool font_info);
-        explicit TessTsvRenderer(const char* outputbase);
+class TESS_API TessTsvRenderer : public TessResultRenderer {
+ public:
+  explicit TessTsvRenderer(const char* outputbase, bool font_info);
+  explicit TessTsvRenderer(const char* outputbase);
 
-    protected:
-        virtual bool BeginDocumentHandler();
-        virtual bool AddImageHandler(TessBaseAPI* api);
-        virtual bool EndDocumentHandler();
+ protected:
+  virtual bool BeginDocumentHandler();
+  virtual bool AddImageHandler(TessBaseAPI* api);
+  virtual bool EndDocumentHandler();
 
-    private:
-        bool font_info_;              // whether to print font information
-    };
+ private:
+  bool font_info_;              // whether to print font information
+};
 
 /**
  * Renders tesseract output into searchable PDF
  */
-    class TESS_API TessPDFRenderer : public TessResultRenderer {
-    public:
-        // datadir is the location of the TESSDATA. We need it because
-        // we load a custom PDF font from this location.
-        TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false);
-
-    protected:
-        virtual bool BeginDocumentHandler();
-        virtual bool AddImageHandler(TessBaseAPI* api);
-        virtual bool EndDocumentHandler();
-
-    private:
-        // We don't want to have every image in memory at once,
-        // so we store some metadata as we go along producing
-        // PDFs one page at a time. At the end, that metadata is
-        // used to make everything that isn't easily handled in a
-        // streaming fashion.
-        long int obj_;                     // counter for PDF objects
-        GenericVector<long int> offsets_;  // offset of every PDF object in bytes
-        GenericVector<long int> pages_;    // object number for every /Page object
-        std::string datadir_;              // where to find the custom font
-        bool textonly_;                    // skip images if set
-        // Bookkeeping only. DIY = Do It Yourself.
-        void AppendPDFObjectDIY(size_t objectsize);
-        // Bookkeeping + emit data.
-        void AppendPDFObject(const char *data);
-        // Create the /Contents object for an entire page.
-        char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
-        // Turn an image into a PDF object. Only transcode if we have to.
-        static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
-                                  char** pdf_object, long int* pdf_object_size, const int jpg_quality);
-    };
+class TESS_API TessPDFRenderer : public TessResultRenderer {
+ public:
+  // datadir is the location of the TESSDATA. We need it because
+  // we load a custom PDF font from this location.
+  TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly = false);
+
+ protected:
+  virtual bool BeginDocumentHandler();
+  virtual bool AddImageHandler(TessBaseAPI* api);
+  virtual bool EndDocumentHandler();
+
+ private:
+  // We don't want to have every image in memory at once,
+  // so we store some metadata as we go along producing
+  // PDFs one page at a time. At the end, that metadata is
+  // used to make everything that isn't easily handled in a
+  // streaming fashion.
+  long int obj_;                     // counter for PDF objects
+  GenericVector<long int> offsets_;  // offset of every PDF object in bytes
+  GenericVector<long int> pages_;    // object number for every /Page object
+  std::string datadir_;              // where to find the custom font
+  bool textonly_;                    // skip images if set
+  // Bookkeeping only. DIY = Do It Yourself.
+  void AppendPDFObjectDIY(size_t objectsize);
+  // Bookkeeping + emit data.
+  void AppendPDFObject(const char *data);
+  // Create the /Contents object for an entire page.
+  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
+  // Turn an image into a PDF object. Only transcode if we have to.
+  static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
+                          char** pdf_object, long int* pdf_object_size, const int jpg_quality);
+};
 
 
 /**
  * Renders tesseract output into a plain UTF-8 text string
  */
-    class TESS_API TessUnlvRenderer : public TessResultRenderer {
-    public:
-        explicit TessUnlvRenderer(const char *outputbase);
+class TESS_API TessUnlvRenderer : public TessResultRenderer {
+ public:
+  explicit TessUnlvRenderer(const char *outputbase);
 
-    protected:
-        virtual bool AddImageHandler(TessBaseAPI* api);
-    };
+ protected:
+  virtual bool AddImageHandler(TessBaseAPI* api);
+};
 
 /**
  * Renders tesseract output into a plain UTF-8 text string
  */
-    class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-    public:
-        explicit TessBoxTextRenderer(const char *outputbase);
+class TESS_API TessBoxTextRenderer : public TessResultRenderer {
+ public:
+  explicit TessBoxTextRenderer(const char *outputbase);
 
-    protected:
-        virtual bool AddImageHandler(TessBaseAPI* api);
-    };
+ protected:
+  virtual bool AddImageHandler(TessBaseAPI* api);
+};
 
 #ifndef DISABLED_LEGACY_ENGINE
 
 /**
  * Renders tesseract output into an osd text string
  */
-    class TESS_API TessOsdRenderer : public TessResultRenderer {
-    public:
-        explicit TessOsdRenderer(const char* outputbase);
+class TESS_API TessOsdRenderer : public TessResultRenderer {
+ public:
+  explicit TessOsdRenderer(const char* outputbase);
 
-    protected:
-        virtual bool AddImageHandler(TessBaseAPI* api);
-    };
+ protected:
+  virtual bool AddImageHandler(TessBaseAPI* api);
+};
 
 #endif // ndef DISABLED_LEGACY_ENGINE
 
diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp
index 8ec5e7b75f..816dfe4faf 100644
--- a/src/api/tesseractmain.cpp
+++ b/src/api/tesseractmain.cpp
@@ -66,20 +66,20 @@ static void Win32WarningHandler(const char* module, const char* fmt,
 #endif   // _WIN32
 
 static void PrintVersionInfo() {
-    char* versionStrP;
+  char* versionStrP;
 
-    printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
+  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
 
-    versionStrP = getLeptonicaVersion();
-    printf(" %s\n", versionStrP);
-    lept_free(versionStrP);
+  versionStrP = getLeptonicaVersion();
+  printf(" %s\n", versionStrP);
+  lept_free(versionStrP);
 
-    versionStrP = getImagelibVersions();
-    printf("  %s\n", versionStrP);
-    lept_free(versionStrP);
+  versionStrP = getImagelibVersions();
+  printf("  %s\n", versionStrP);
+  lept_free(versionStrP);
 
 #ifdef USE_OPENCL
-    cl_platform_id platform[4];
+  cl_platform_id platform[4];
   cl_uint num_platforms;
 
   printf(" OpenCL info:\n");
@@ -118,155 +118,155 @@ static void PrintVersionInfo() {
 }
 
 static void PrintHelpForPSM() {
-    const char* msg =
-            "Page segmentation modes:\n"
-            "  0    Orientation and script detection (OSD) only.\n"
-            "  1    Automatic page segmentation with OSD.\n"
-            "  2    Automatic page segmentation, but no OSD, or OCR.\n"
-            "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
-            "  4    Assume a single column of text of variable sizes.\n"
-            "  5    Assume a single uniform block of vertically aligned text.\n"
-            "  6    Assume a single uniform block of text.\n"
-            "  7    Treat the image as a single text line.\n"
-            "  8    Treat the image as a single word.\n"
-            "  9    Treat the image as a single word in a circle.\n"
-            " 10    Treat the image as a single character.\n"
-            " 11    Sparse text. Find as much text as possible in no"
-            " particular order.\n"
-            " 12    Sparse text with OSD.\n"
-            " 13    Raw line. Treat the image as a single text line,\n"
-            "       bypassing hacks that are Tesseract-specific.\n";
+  const char* msg =
+      "Page segmentation modes:\n"
+      "  0    Orientation and script detection (OSD) only.\n"
+      "  1    Automatic page segmentation with OSD.\n"
+      "  2    Automatic page segmentation, but no OSD, or OCR.\n"
+      "  3    Fully automatic page segmentation, but no OSD. (Default)\n"
+      "  4    Assume a single column of text of variable sizes.\n"
+      "  5    Assume a single uniform block of vertically aligned text.\n"
+      "  6    Assume a single uniform block of text.\n"
+      "  7    Treat the image as a single text line.\n"
+      "  8    Treat the image as a single word.\n"
+      "  9    Treat the image as a single word in a circle.\n"
+      " 10    Treat the image as a single character.\n"
+      " 11    Sparse text. Find as much text as possible in no"
+      " particular order.\n"
+      " 12    Sparse text with OSD.\n"
+      " 13    Raw line. Treat the image as a single text line,\n"
+      "       bypassing hacks that are Tesseract-specific.\n";
 
 #ifdef DISABLED_LEGACY_ENGINE
-    const char* disabled_osd_msg =
+  const char* disabled_osd_msg =
       "\nNOTE: The OSD modes are currently disabled.\n";
   printf("%s%s", msg, disabled_osd_msg);
 #else
-    printf("%s", msg);
+  printf("%s", msg);
 #endif
 }
 
 #ifndef DISABLED_LEGACY_ENGINE
 static void PrintHelpForOEM() {
-    const char* msg =
-            "OCR Engine modes:\n"
-            "  0    Legacy engine only.\n"
-            "  1    Neural nets LSTM engine only.\n"
-            "  2    Legacy + LSTM engines.\n"
-            "  3    Default, based on what is available.\n";
-
-    printf("%s", msg);
+  const char* msg =
+      "OCR Engine modes:\n"
+      "  0    Legacy engine only.\n"
+      "  1    Neural nets LSTM engine only.\n"
+      "  2    Legacy + LSTM engines.\n"
+      "  3    Default, based on what is available.\n";
+
+  printf("%s", msg);
 }
 #endif  // ndef DISABLED_LEGACY_ENGINE
 
 static void PrintHelpExtra(const char* program) {
-    printf(
-            "Usage:\n"
-            "  %s --help | --help-extra | --help-psm | "
-            #ifndef DISABLED_LEGACY_ENGINE
-            "--help-oem | "
-            #endif
-            "--version\n"
-            "  %s --list-langs [--tessdata-dir PATH]\n"
-            "  %s --print-parameters [options...] [configfile...]\n"
-            "  %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
-            "\n"
-            "OCR options:\n"
-            "  --tessdata-dir PATH   Specify the location of tessdata path.\n"
-            "  --user-words PATH     Specify the location of user words file.\n"
-            "  --user-patterns PATH  Specify the location of user patterns file.\n"
-            "  --dpi VALUE           Specify DPI for input image.\n"
-            "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
-            "  -c VAR=VALUE          Set value for config variables.\n"
-            "                        Multiple -c arguments are allowed.\n"
-            "  --psm NUM             Specify page segmentation mode.\n"
-            #ifndef DISABLED_LEGACY_ENGINE
-            "  --oem NUM             Specify OCR Engine mode.\n"
-            #endif
-            "NOTE: These options must occur before any configfile.\n"
-            "\n",
-            program, program, program, program
-    );
-
-    PrintHelpForPSM();
+  printf(
+      "Usage:\n"
+      "  %s --help | --help-extra | --help-psm | "
+#ifndef DISABLED_LEGACY_ENGINE
+      "--help-oem | "
+#endif
+      "--version\n"
+      "  %s --list-langs [--tessdata-dir PATH]\n"
+      "  %s --print-parameters [options...] [configfile...]\n"
+      "  %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
+      "\n"
+      "OCR options:\n"
+      "  --tessdata-dir PATH   Specify the location of tessdata path.\n"
+      "  --user-words PATH     Specify the location of user words file.\n"
+      "  --user-patterns PATH  Specify the location of user patterns file.\n"
+      "  --dpi VALUE           Specify DPI for input image.\n"
+      "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
+      "  -c VAR=VALUE          Set value for config variables.\n"
+      "                        Multiple -c arguments are allowed.\n"
+      "  --psm NUM             Specify page segmentation mode.\n"
+#ifndef DISABLED_LEGACY_ENGINE
+      "  --oem NUM             Specify OCR Engine mode.\n"
+#endif
+      "NOTE: These options must occur before any configfile.\n"
+      "\n",
+      program, program, program, program
+  );
+
+  PrintHelpForPSM();
 #ifndef DISABLED_LEGACY_ENGINE
-    printf("\n");
-    PrintHelpForOEM();
+  printf("\n");
+  PrintHelpForOEM();
 #endif
 
-    printf(
-            "\n"
-            "Single options:\n"
-            "  -h, --help            Show minimal help message.\n"
-            "  --help-extra          Show extra help for advanced users.\n"
-            "  --help-psm            Show page segmentation modes.\n"
-            #ifndef DISABLED_LEGACY_ENGINE
-            "  --help-oem            Show OCR Engine modes.\n"
-            #endif
-            "  -v, --version         Show version information.\n"
-            "  --list-langs          List available languages for tesseract engine.\n"
-            "  --print-parameters    Print tesseract parameters.\n"
-    );
+  printf(
+      "\n"
+      "Single options:\n"
+      "  -h, --help            Show minimal help message.\n"
+      "  --help-extra          Show extra help for advanced users.\n"
+      "  --help-psm            Show page segmentation modes.\n"
+#ifndef DISABLED_LEGACY_ENGINE
+      "  --help-oem            Show OCR Engine modes.\n"
+#endif
+      "  -v, --version         Show version information.\n"
+      "  --list-langs          List available languages for tesseract engine.\n"
+      "  --print-parameters    Print tesseract parameters.\n"
+  );
 }
 
 static void PrintHelpMessage(const char* program) {
-    printf(
-            "Usage:\n"
-            "  %s --help | --help-extra | --version\n"
-            "  %s --list-langs\n"
-            "  %s imagename outputbase [options...] [configfile...]\n"
-            "\n"
-            "OCR options:\n"
-            "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
-            "NOTE: These options must occur before any configfile.\n"
-            "\n"
-            "Single options:\n"
-            "  --help                Show this help message.\n"
-            "  --help-extra          Show extra help for advanced users.\n"
-            "  --version             Show version information.\n"
-            "  --list-langs          List available languages for tesseract engine.\n",
-            program, program, program
-    );
+  printf(
+      "Usage:\n"
+      "  %s --help | --help-extra | --version\n"
+      "  %s --list-langs\n"
+      "  %s imagename outputbase [options...] [configfile...]\n"
+      "\n"
+      "OCR options:\n"
+      "  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
+      "NOTE: These options must occur before any configfile.\n"
+      "\n"
+      "Single options:\n"
+      "  --help                Show this help message.\n"
+      "  --help-extra          Show extra help for advanced users.\n"
+      "  --version             Show version information.\n"
+      "  --list-langs          List available languages for tesseract engine.\n",
+      program, program, program
+  );
 }
 
 static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
                                    char** argv) {
-    char opt1[256], opt2[255];
-    for (int i = 0; i < argc; i++) {
-        if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
-            strncpy(opt1, argv[i + 1], 255);
-            opt1[255] = '\0';
-            char* p = strchr(opt1, '=');
-            if (!p) {
-                fprintf(stderr, "Missing = in configvar assignment\n");
-                exit(EXIT_FAILURE);
-            }
-            *p = 0;
-            strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
-            opt2[254] = 0;
-            ++i;
-
-            if (!api->SetVariable(opt1, opt2)) {
-                fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
-            }
-        }
+  char opt1[256], opt2[255];
+  for (int i = 0; i < argc; i++) {
+    if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
+      strncpy(opt1, argv[i + 1], 255);
+      opt1[255] = '\0';
+      char* p = strchr(opt1, '=');
+      if (!p) {
+        fprintf(stderr, "Missing = in configvar assignment\n");
+        exit(EXIT_FAILURE);
+      }
+      *p = 0;
+      strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
+      opt2[254] = 0;
+      ++i;
+
+      if (!api->SetVariable(opt1, opt2)) {
+        fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
+      }
     }
+  }
 }
 
 static void PrintLangsList(tesseract::TessBaseAPI* api) {
-    GenericVector<STRING> languages;
-    api->GetAvailableLanguagesAsVector(&languages);
-    printf("List of available languages (%d):\n", languages.size());
-    for (int index = 0; index < languages.size(); ++index) {
-        STRING& string = languages[index];
-        printf("%s\n", string.string());
-    }
-    api->End();
+  GenericVector<STRING> languages;
+  api->GetAvailableLanguagesAsVector(&languages);
+  printf("List of available languages (%d):\n", languages.size());
+  for (int index = 0; index < languages.size(); ++index) {
+    STRING& string = languages[index];
+    printf("%s\n", string.string());
+  }
+  api->End();
 }
 
 static void PrintBanner() {
-    tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
-            tesseract::TessBaseAPI::Version());
+  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
+          tesseract::TessBaseAPI::Version());
 }
 
 /**
@@ -285,15 +285,15 @@ static void PrintBanner() {
  */
 static void FixPageSegMode(tesseract::TessBaseAPI* api,
                            tesseract::PageSegMode pagesegmode) {
-    if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
-        api->SetPageSegMode(pagesegmode);
+  if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
+    api->SetPageSegMode(pagesegmode);
 }
 
 static void checkArgValues(int arg, const char* mode, int count) {
-    if (arg >= count || arg < 0) {
-        printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
-        exit(EXIT_SUCCESS);
-    }
+  if (arg >= count || arg < 0) {
+    printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
+    exit(EXIT_SUCCESS);
+  }
 }
 
 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
@@ -304,218 +304,218 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
                       GenericVector<STRING>* vars_values, l_int32* arg_i,
                       tesseract::PageSegMode* pagesegmode,
                       tesseract::OcrEngineMode* enginemode) {
-    bool noocr = false;
-    int i;
-    for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
-        if (*image != nullptr && *outputbase == nullptr) {
-            // outputbase follows image, don't allow options at that position.
-            *outputbase = argv[i];
-        } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
-            PrintHelpMessage(argv[0]);
-            noocr = true;
-        } else if (strcmp(argv[i], "--help-extra") == 0) {
-            PrintHelpExtra(argv[0]);
-            noocr = true;
-        } else if ((strcmp(argv[i], "--help-psm") == 0)) {
-            PrintHelpForPSM();
-            noocr = true;
+  bool noocr = false;
+  int i;
+  for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
+    if (*image != nullptr && *outputbase == nullptr) {
+      // outputbase follows image, don't allow options at that position.
+      *outputbase = argv[i];
+    } else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
+      PrintHelpMessage(argv[0]);
+      noocr = true;
+    } else if (strcmp(argv[i], "--help-extra") == 0) {
+      PrintHelpExtra(argv[0]);
+      noocr = true;
+    } else if ((strcmp(argv[i], "--help-psm") == 0)) {
+      PrintHelpForPSM();
+      noocr = true;
 #ifndef DISABLED_LEGACY_ENGINE
-        } else if ((strcmp(argv[i], "--help-oem") == 0)) {
-            PrintHelpForOEM();
-            noocr = true;
+    } else if ((strcmp(argv[i], "--help-oem") == 0)) {
+      PrintHelpForOEM();
+      noocr = true;
 #endif
-        } else if ((strcmp(argv[i], "-v") == 0) ||
-                   (strcmp(argv[i], "--version") == 0)) {
-            PrintVersionInfo();
-            noocr = true;
-        } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
-            *lang = argv[i + 1];
-            ++i;
-        } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
-            *datapath = argv[i + 1];
-            ++i;
-        } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
-            *dpi = atoi(argv[i + 1]);
-            ++i;
-        } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
-            vars_vec->push_back("user_words_file");
-            vars_values->push_back(argv[i + 1]);
-            ++i;
-        } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
-            vars_vec->push_back("user_patterns_file");
-            vars_values->push_back(argv[i + 1]);
-            ++i;
-        } else if (strcmp(argv[i], "--list-langs") == 0) {
-            noocr = true;
-            *list_langs = true;
-        } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
-            checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
-            *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
-            ++i;
-        } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
+    } else if ((strcmp(argv[i], "-v") == 0) ||
+               (strcmp(argv[i], "--version") == 0)) {
+      PrintVersionInfo();
+      noocr = true;
+    } else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
+      *lang = argv[i + 1];
+      ++i;
+    } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
+      *datapath = argv[i + 1];
+      ++i;
+    } else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
+      *dpi = atoi(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
+      vars_vec->push_back("user_words_file");
+      vars_values->push_back(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
+      vars_vec->push_back("user_patterns_file");
+      vars_values->push_back(argv[i + 1]);
+      ++i;
+    } else if (strcmp(argv[i], "--list-langs") == 0) {
+      noocr = true;
+      *list_langs = true;
+    } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
+      checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
+      *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
+      ++i;
+    } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
 #ifndef DISABLED_LEGACY_ENGINE
-            int oem = atoi(argv[i + 1]);
-            checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
-            *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
+      int oem = atoi(argv[i + 1]);
+      checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
+      *enginemode = static_cast<tesseract::OcrEngineMode>(oem);
 #endif
-            ++i;
-        } else if (strcmp(argv[i], "--print-parameters") == 0) {
-            noocr = true;
-            *print_parameters = true;
-        } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
-            // handled properly after api init
-            ++i;
-        } else if (*image == nullptr) {
-            *image = argv[i];
-        } else {
-            // Unexpected argument.
-            fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
-            exit(EXIT_FAILURE);
-        }
+      ++i;
+    } else if (strcmp(argv[i], "--print-parameters") == 0) {
+      noocr = true;
+      *print_parameters = true;
+    } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
+      // handled properly after api init
+      ++i;
+    } else if (*image == nullptr) {
+      *image = argv[i];
+    } else {
+      // Unexpected argument.
+      fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
+      exit(EXIT_FAILURE);
     }
+  }
 
-    *arg_i = i;
+  *arg_i = i;
 
-    if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
-        // OSD = orientation and script detection.
-        if (*lang != nullptr && strcmp(*lang, "osd")) {
-            // If the user explicitly specifies a language (other than osd)
-            // or a script, only orientation can be detected.
-            fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
-        } else {
-            // That mode requires osd.traineddata to detect orientation and script.
-            *lang = "osd";
-        }
+  if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
+    // OSD = orientation and script detection.
+    if (*lang != nullptr && strcmp(*lang, "osd")) {
+      // If the user explicitly specifies a language (other than osd)
+      // or a script, only orientation can be detected.
+      fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
+    } else {
+      // That mode requires osd.traineddata to detect orientation and script.
+      *lang = "osd";
     }
+  }
 
-    if (*outputbase == nullptr && noocr == false) {
-        PrintHelpMessage(argv[0]);
-        exit(EXIT_FAILURE);
-    }
+  if (*outputbase == nullptr && noocr == false) {
+    PrintHelpMessage(argv[0]);
+    exit(EXIT_FAILURE);
+  }
 }
 
 static void PreloadRenderers(
-        tesseract::TessBaseAPI* api,
-        tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
-        tesseract::PageSegMode pagesegmode, const char* outputbase) {
-    if (pagesegmode == tesseract::PSM_OSD_ONLY) {
+    tesseract::TessBaseAPI* api,
+    tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
+    tesseract::PageSegMode pagesegmode, const char* outputbase) {
+  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
 #ifndef DISABLED_LEGACY_ENGINE
-        renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
+    renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
 #endif  // ndef DISABLED_LEGACY_ENGINE
-    } else {
-        bool b;
-        api->GetBoolVariable("tessedit_create_hocr", &b);
-        if (b) {
-            bool font_info;
-            api->GetBoolVariable("hocr_font_info", &font_info);
-            tesseract::TessHOcrRenderer* renderer =
-                    new tesseract::TessHOcrRenderer(outputbase, font_info);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create hOCR output file: %s\n",
-                        strerror(errno));
-            }
-        }
+  } else {
+    bool b;
+    api->GetBoolVariable("tessedit_create_hocr", &b);
+    if (b) {
+      bool font_info;
+      api->GetBoolVariable("hocr_font_info", &font_info);
+      tesseract::TessHOcrRenderer* renderer =
+          new tesseract::TessHOcrRenderer(outputbase, font_info);
+      if (renderer->happy()) {
+        renderers->push_back(renderer);
+      } else {
+        delete renderer;
+        tprintf("Error, could not create hOCR output file: %s\n",
+                strerror(errno));
+      }
+    }
 
-        api->GetBoolVariable("tessedit_create_alto", &b);
-        if (b) {
-            tesseract::TessAltoRenderer* renderer =
-                    new tesseract::TessAltoRenderer(outputbase);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create ALTO output file: %s\n",
-                        strerror(errno));
-            }
+    api->GetBoolVariable("tessedit_create_alto", &b);
+    if (b) {
+        tesseract::TessAltoRenderer* renderer =
+                new tesseract::TessAltoRenderer(outputbase);
+        if (renderer->happy()) {
+            renderers->push_back(renderer);
+        } else {
+            delete renderer;
+            tprintf("Error, could not create ALTO output file: %s\n",
+                    strerror(errno));
         }
+    }
 
-        api->GetBoolVariable("tessedit_create_tsv", &b);
-        if (b) {
-            bool font_info;
-            api->GetBoolVariable("hocr_font_info", &font_info);
-            tesseract::TessTsvRenderer* renderer =
-                    new tesseract::TessTsvRenderer(outputbase, font_info);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create TSV output file: %s\n",
-                        strerror(errno));
-            }
-        }
+    api->GetBoolVariable("tessedit_create_tsv", &b);
+    if (b) {
+      bool font_info;
+      api->GetBoolVariable("hocr_font_info", &font_info);
+      tesseract::TessTsvRenderer* renderer =
+          new tesseract::TessTsvRenderer(outputbase, font_info);
+      if (renderer->happy()) {
+        renderers->push_back(renderer);
+      } else {
+        delete renderer;
+        tprintf("Error, could not create TSV output file: %s\n",
+                strerror(errno));
+      }
+    }
 
-        api->GetBoolVariable("tessedit_create_pdf", &b);
-        if (b) {
-#ifdef WIN32
-            if (_setmode(_fileno(stdout), _O_BINARY) == -1)
+    api->GetBoolVariable("tessedit_create_pdf", &b);
+    if (b) {
+      #ifdef WIN32
+        if (_setmode(_fileno(stdout), _O_BINARY) == -1)
           tprintf("ERROR: cin to binary: %s", strerror(errno));
-#endif  // WIN32
-            bool textonly;
-            api->GetBoolVariable("textonly_pdf", &textonly);
-            tesseract::TessPDFRenderer* renderer =
-                    new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(),
-                                                   textonly);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create PDF output file: %s\n",
-                        strerror(errno));
-            }
-        }
+      #endif  // WIN32
+      bool textonly;
+      api->GetBoolVariable("textonly_pdf", &textonly);
+      tesseract::TessPDFRenderer* renderer =
+        new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(),
+                                       textonly);
+      if (renderer->happy()) {
+        renderers->push_back(renderer);
+      } else {
+        delete renderer;
+        tprintf("Error, could not create PDF output file: %s\n",
+                strerror(errno));
+      }
+    }
 
-        api->GetBoolVariable("tessedit_write_unlv", &b);
-        if (b) {
-            api->SetVariable("unlv_tilde_crunching", "true");
-            tesseract::TessUnlvRenderer* renderer =
-                    new tesseract::TessUnlvRenderer(outputbase);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create UNLV output file: %s\n",
-                        strerror(errno));
-            }
-        }
+    api->GetBoolVariable("tessedit_write_unlv", &b);
+    if (b) {
+      api->SetVariable("unlv_tilde_crunching", "true");
+      tesseract::TessUnlvRenderer* renderer =
+        new tesseract::TessUnlvRenderer(outputbase);
+      if (renderer->happy()) {
+        renderers->push_back(renderer);
+      } else {
+        delete renderer;
+        tprintf("Error, could not create UNLV output file: %s\n",
+                strerror(errno));
+      }
+    }
 
-        api->GetBoolVariable("tessedit_create_boxfile", &b);
-        if (b) {
-            tesseract::TessBoxTextRenderer* renderer =
-                    new tesseract::TessBoxTextRenderer(outputbase);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create BOX output file: %s\n",
-                        strerror(errno));
-            }
-        }
+    api->GetBoolVariable("tessedit_create_boxfile", &b);
+    if (b) {
+      tesseract::TessBoxTextRenderer* renderer =
+        new tesseract::TessBoxTextRenderer(outputbase);
+      if (renderer->happy()) {
+        renderers->push_back(renderer);
+      } else {
+        delete renderer;
+        tprintf("Error, could not create BOX output file: %s\n",
+                strerror(errno));
+      }
+    }
 
-        api->GetBoolVariable("tessedit_create_txt", &b);
-        if (b || renderers->empty()) {
-            tesseract::TessTextRenderer* renderer =
-                    new tesseract::TessTextRenderer(outputbase);
-            if (renderer->happy()) {
-                renderers->push_back(renderer);
-            } else {
-                delete renderer;
-                tprintf("Error, could not create TXT output file: %s\n",
-                        strerror(errno));
-            }
-        }
+    api->GetBoolVariable("tessedit_create_txt", &b);
+    if (b || renderers->empty()) {
+      tesseract::TessTextRenderer* renderer =
+        new tesseract::TessTextRenderer(outputbase);
+      if (renderer->happy()) {
+        renderers->push_back(renderer);
+      } else {
+        delete renderer;
+        tprintf("Error, could not create TXT output file: %s\n",
+                strerror(errno));
+      }
     }
+  }
 
-    if (!renderers->empty()) {
-        // Since the PointerVector auto-deletes, null-out the renderers that are
-        // added to the root, and leave the root in the vector.
-        for (int r = 1; r < renderers->size(); ++r) {
-            (*renderers)[0]->insert((*renderers)[r]);
-            (*renderers)[r] = nullptr;
-        }
+  if (!renderers->empty()) {
+    // Since the PointerVector auto-deletes, null-out the renderers that are
+    // added to the root, and leave the root in the vector.
+    for (int r = 1; r < renderers->size(); ++r) {
+      (*renderers)[0]->insert((*renderers)[r]);
+      (*renderers)[r] = nullptr;
     }
+  }
 }
 
 
@@ -525,135 +525,135 @@ static void PreloadRenderers(
  **********************************************************************/
 
 int main(int argc, char** argv) {
-    const char* lang = nullptr;
-    const char* image = nullptr;
-    const char* outputbase = nullptr;
-    const char* datapath = nullptr;
-    bool list_langs = false;
-    bool print_parameters = false;
-    l_int32 dpi = 0;
-    int arg_i = 1;
-    tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
+  const char* lang = nullptr;
+  const char* image = nullptr;
+  const char* outputbase = nullptr;
+  const char* datapath = nullptr;
+  bool list_langs = false;
+  bool print_parameters = false;
+  l_int32 dpi = 0;
+  int arg_i = 1;
+  tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
 #ifdef DISABLED_LEGACY_ENGINE
-    auto enginemode = tesseract::OEM_LSTM_ONLY;
+  auto enginemode = tesseract::OEM_LSTM_ONLY;
 #else
-    tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
+  tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
 #endif
-    /* main() calls functions like ParseArgs which call exit().
-     * This results in memory leaks if vars_vec and vars_values are
-     * declared as auto variables (destructor is not called then). */
-    static GenericVector<STRING> vars_vec;
-    static GenericVector<STRING> vars_values;
+  /* main() calls functions like ParseArgs which call exit().
+   * This results in memory leaks if vars_vec and vars_values are
+   * declared as auto variables (destructor is not called then). */
+  static GenericVector<STRING> vars_vec;
+  static GenericVector<STRING> vars_values;
 
 #if !defined(DEBUG)
-    // Disable debugging and informational messages from Leptonica.
-    setMsgSeverity(L_SEVERITY_ERROR);
+  // Disable debugging and informational messages from Leptonica.
+  setMsgSeverity(L_SEVERITY_ERROR);
 #endif
 
 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
-    /* Show libtiff errors and warnings on console (not in GUI). */
+  /* Show libtiff errors and warnings on console (not in GUI). */
   TIFFSetErrorHandler(Win32ErrorHandler);
   TIFFSetWarningHandler(Win32WarningHandler);
 #endif // HAVE_TIFFIO_H && _WIN32
 
-    ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
-              &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
-              &pagesegmode, &enginemode);
+  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
+            &list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
+            &pagesegmode, &enginemode);
 
-    if (lang == nullptr) {
-        // Set default language if none was given.
-        lang = "eng";
-    }
+  if (lang == nullptr) {
+    // Set default language if none was given.
+    lang = "eng";
+  }
 
-    if (image == nullptr && !list_langs && !print_parameters)
-        return EXIT_SUCCESS;
+  if (image == nullptr && !list_langs && !print_parameters)
+    return EXIT_SUCCESS;
 
-    PERF_COUNT_START("Tesseract:main")
+  PERF_COUNT_START("Tesseract:main")
 
-    // Call GlobalDawgCache here to create the global DawgCache object before
-    // the TessBaseAPI object. This fixes the order of destructor calls:
-    // first TessBaseAPI must be destructed, DawgCache must be the last object.
-    tesseract::Dict::GlobalDawgCache();
+  // Call GlobalDawgCache here to create the global DawgCache object before
+  // the TessBaseAPI object. This fixes the order of destructor calls:
+  // first TessBaseAPI must be destructed, DawgCache must be the last object.
+  tesseract::Dict::GlobalDawgCache();
 
-    // Avoid memory leak caused by auto variable when return is called.
-    static tesseract::TessBaseAPI api;
+  // Avoid memory leak caused by auto variable when return is called.
+  static tesseract::TessBaseAPI api;
 
-    api.SetOutputName(outputbase);
+  api.SetOutputName(outputbase);
 
-    const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
-                                     argc - arg_i, &vars_vec, &vars_values, false);
+  const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
+                             argc - arg_i, &vars_vec, &vars_values, false);
 
-    SetVariablesFromCLArgs(&api, argc, argv);
+  SetVariablesFromCLArgs(&api, argc, argv);
 
-    if (list_langs) {
-        PrintLangsList(&api);
-        return EXIT_SUCCESS;
-    }
+  if (list_langs) {
+    PrintLangsList(&api);
+    return EXIT_SUCCESS;
+  }
 
-    if (init_failed) {
-        fprintf(stderr, "Could not initialize tesseract.\n");
-        return EXIT_FAILURE;
-    }
+  if (init_failed) {
+    fprintf(stderr, "Could not initialize tesseract.\n");
+    return EXIT_FAILURE;
+  }
 
-    if (print_parameters) {
-        FILE* fout = stdout;
-        fprintf(stdout, "Tesseract parameters:\n");
-        api.PrintVariables(fout);
-        api.End();
-        return EXIT_SUCCESS;
-    }
+  if (print_parameters) {
+    FILE* fout = stdout;
+    fprintf(stdout, "Tesseract parameters:\n");
+    api.PrintVariables(fout);
+    api.End();
+    return EXIT_SUCCESS;
+  }
 
-    FixPageSegMode(&api, pagesegmode);
+  FixPageSegMode(&api, pagesegmode);
 
-    if (dpi) {
-        char dpi_string[255];
-        snprintf(dpi_string, 254, "%d", dpi);
-        api.SetVariable("user_defined_dpi", dpi_string);
-    }
+  if (dpi) {
+    char dpi_string[255];
+    snprintf(dpi_string, 254, "%d", dpi);
+    api.SetVariable("user_defined_dpi", dpi_string);
+  }
 
-    if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
-        int ret_val = EXIT_SUCCESS;
+  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
+    int ret_val = EXIT_SUCCESS;
 
-        Pix* pixs = pixRead(image);
-        if (!pixs) {
-            fprintf(stderr, "Leptonica can't process input file: %s\n", image);
-            return 2;
-        }
+    Pix* pixs = pixRead(image);
+    if (!pixs) {
+      fprintf(stderr, "Leptonica can't process input file: %s\n", image);
+      return 2;
+    }
 
-        api.SetImage(pixs);
+    api.SetImage(pixs);
 
-        tesseract::Orientation orientation;
-        tesseract::WritingDirection direction;
-        tesseract::TextlineOrder order;
-        float deskew_angle;
+    tesseract::Orientation orientation;
+    tesseract::WritingDirection direction;
+    tesseract::TextlineOrder order;
+    float deskew_angle;
 
-        const tesseract::PageIterator* it = api.AnalyseLayout();
-        if (it) {
-            it->Orientation(&orientation, &direction, &order, &deskew_angle);
-            tprintf(
-                    "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
-                    "Deskew angle: %.4f\n",
-                    orientation, direction, order, deskew_angle);
-        } else {
-            ret_val = EXIT_FAILURE;
-        }
+    const tesseract::PageIterator* it = api.AnalyseLayout();
+    if (it) {
+      it->Orientation(&orientation, &direction, &order, &deskew_angle);
+      tprintf(
+          "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
+          "Deskew angle: %.4f\n",
+          orientation, direction, order, deskew_angle);
+    } else {
+      ret_val = EXIT_FAILURE;
+    }
 
-        delete it;
+    delete it;
 
-        pixDestroy(&pixs);
-        return ret_val;
-    }
+    pixDestroy(&pixs);
+    return ret_val;
+  }
 
-    // set in_training_mode to true when using one of these configs:
-    // ambigs.train, box.train, box.train.stderr, linebox, rebox
-    bool b = false;
-    bool in_training_mode =
-            (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
-            (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
-            (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
+  // set in_training_mode to true when using one of these configs:
+  // ambigs.train, box.train, box.train.stderr, linebox, rebox
+  bool b = false;
+  bool in_training_mode =
+      (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
+      (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
+      (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
 
 #ifdef DISABLED_LEGACY_ENGINE
-    auto cur_psm = api.GetPageSegMode();
+  auto cur_psm = api.GetPageSegMode();
   auto osd_warning = std::string("");
   if (cur_psm == tesseract::PSM_OSD_ONLY) {
     const char* disabled_osd_msg =
@@ -673,37 +673,37 @@ int main(int argc, char** argv) {
   }
 #endif  // def DISABLED_LEGACY_ENGINE
 
-    // Avoid memory leak caused by auto variable when exit() is called.
-    static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
+  // Avoid memory leak caused by auto variable when exit() is called.
+  static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
 
-    if (in_training_mode) {
-        renderers.push_back(nullptr);
-    } else {
-        PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
-    }
+  if (in_training_mode) {
+    renderers.push_back(nullptr);
+  } else {
+    PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
+  }
 
-    bool banner = false;
-    if (outputbase != nullptr && strcmp(outputbase, "-") &&
-        strcmp(outputbase, "stdout")) {
-        banner = true;
-    }
+  bool banner = false;
+  if (outputbase != nullptr && strcmp(outputbase, "-") &&
+      strcmp(outputbase, "stdout")) {
+    banner = true;
+  }
 
-    if (!renderers.empty()) {
-        if (banner) PrintBanner();
+  if (!renderers.empty()) {
+    if (banner) PrintBanner();
 #ifdef DISABLED_LEGACY_ENGINE
-        if (!osd_warning.empty()) {
+    if (!osd_warning.empty()) {
       fprintf(stderr, "%s",osd_warning.c_str());
     }
 #endif
-        bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]);
-        if (!succeed) {
-            fprintf(stderr, "Error during processing.\n");
-            return EXIT_FAILURE;
-        }
+    bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]);
+    if (!succeed) {
+      fprintf(stderr, "Error during processing.\n");
+      return EXIT_FAILURE;
     }
+  }
 
-    PERF_COUNT_END
+  PERF_COUNT_END
 
-    return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
 }
 

From 664f7eabcd321b397092dafb505d45b26e988be5 Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Thu, 29 Nov 2018 21:11:53 -0500
Subject: [PATCH 5/9] Add #include <memory.h> to altorenderer.cpp

---
 src/api/altorenderer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
index a654ad818d..037de3c999 100644
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@@ -17,6 +17,7 @@
  **********************************************************************/
 
 #include "baseapi.h"
+#include <memory.h>
 #include "renderer.h"
 
 namespace tesseract {

From a74b6b104982006092bfa09626800e59c6bd7828 Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Thu, 29 Nov 2018 21:49:29 -0500
Subject: [PATCH 6/9] Small style fixes

---
 src/api/altorenderer.cpp | 86 +++++++++++++++++++---------------------
 src/api/capi.cpp         |  2 +-
 2 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
index 037de3c999..89c0e463d7 100644
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@@ -1,20 +1,17 @@
-/**********************************************************************
- * File:        altorenderer.cpp
- * Description: ALTO rendering interface
- * Author:      Jake Sebright
- *
- * (C) Copyright 2018
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
+// File:        altorenderer.cpp
+// Description: ALTO rendering interface
+// Author:      Jake Sebright
+
+// (C) Copyright 2018
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "baseapi.h"
 #include <memory.h>
@@ -22,16 +19,13 @@
 
 namespace tesseract {
 
-/**********************************************************************
- * Alto Text Renderer interface implementation
- **********************************************************************/
     TessAltoRenderer::TessAltoRenderer(const char *outputbase)
             : TessResultRenderer(outputbase, "xml") {
     }
 
-    /**
-    * Append the ALTO XML for the beginning of the document
-    */
+    ///
+    /// Append the ALTO XML for the beginning of the document
+    ///
     bool TessAltoRenderer::BeginDocumentHandler() {
         AppendString(
                 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
@@ -58,19 +52,19 @@ namespace tesseract {
         return true;
     }
 
-    /**
-    * Append the ALTO XML for the end of the document
-    */
+    ///
+    /// Append the ALTO XML for the end of the document
+    ///
     bool TessAltoRenderer::EndDocumentHandler() {
         AppendString("\t</Layout>\n</alto>\n");
 
         return true;
     }
 
-    /**
-    * Append the ALTO XML for the layout of the image
-    */
-    bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
+    ///
+    /// Append the ALTO XML for the layout of the image
+    ///
+    bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) {
         const std::unique_ptr<const char[]> hocr(api->GetAltoText(imagenum()));
         if (hocr == nullptr) return false;
 
@@ -79,9 +73,9 @@ namespace tesseract {
         return true;
     }
 
-    /**
-    * Add a unique ID to an ALTO element
-    */
+    ///
+    /// Add a unique ID to an ALTO element
+    ///
     static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) {
         const size_t BUFSIZE = 64;
         char id_buffer[BUFSIZE];
@@ -92,10 +86,10 @@ namespace tesseract {
         *alto_str += "\"";
     }
 
-    /**
-    * Add coordinates to specified TextBlock, TextLine, or String bounding box
-    * Add word confidence if adding to a String bounding box
-    */
+    ///
+    /// Add coordinates to specified TextBlock, TextLine, or String bounding box
+    /// Add word confidence if adding to a String bounding box
+    ///
     static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
                              STRING *alto_str) {
         int left, top, right, bottom;
@@ -131,18 +125,18 @@ namespace tesseract {
         }
     }
 
-    /**
-     * Make an XML-formatted string with ALTO markup from the internal
-     * data structures.
-     */
+    ///
+    /// Make an XML-formatted string with ALTO markup from the internal
+    /// data structures.
+    ///
         char *TessBaseAPI::GetAltoText(int page_number) {
             return GetAltoText(nullptr, page_number);
         }
 
-    /**
-     * Make an XML-formatted string with ALTO markup from the internal
-     * data structures.
-     */
+    ///
+    /// Make an XML-formatted string with ALTO markup from the internal
+    /// data structures.
+    ///
         char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
             if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
                 return nullptr;
@@ -253,4 +247,4 @@ namespace tesseract {
             return ret;
         }
 
-    }
\ No newline at end of file
+    }
diff --git a/src/api/capi.cpp b/src/api/capi.cpp
index 333bbcd3fe..cffdf613ea 100644
--- a/src/api/capi.cpp
+++ b/src/api/capi.cpp
@@ -68,7 +68,7 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
 
 TESS_API TessResultRenderer* TESS_CALL TessAltoRendererCreate(const char* outputbase)
 {
-    return new TessHOcrRenderer(outputbase);
+    return new TessAltoRenderer(outputbase);
 }
 
 TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,

From 7da328ef831fc08e39bfb5ec2a273cd1c3caa5f1 Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Thu, 29 Nov 2018 22:01:26 -0500
Subject: [PATCH 7/9] Dynamically generate tesseract version value

---
 src/api/altorenderer.cpp | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
index 89c0e463d7..5895b84b54 100644
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@@ -28,26 +28,28 @@ namespace tesseract {
     ///
     bool TessAltoRenderer::BeginDocumentHandler() {
         AppendString(
-                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
-                "<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd\">\n"
-                "\t<Description>\n"
-                "\t\t<MeasurementUnit>pixel</MeasurementUnit>\n"
-                "\t\t<sourceImageInformation>\n"
-                "\t\t\t<fileName>");
+        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+        "<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-0.xsd\">\n"
+        "\t<Description>\n"
+        "\t\t<MeasurementUnit>pixel</MeasurementUnit>\n"
+        "\t\t<sourceImageInformation>\n"
+        "\t\t\t<fileName>");
 
         AppendString(title());
 
         AppendString("\t\t\t</fileName>\n"
-                     "\t\t</sourceImageInformation>\n"
-                     "\t\t<OCRProcessing ID=\"OCR_0\">\n"
-                     "\t\t\t<ocrProcessingStep>\n"
-                     "\t\t\t\t<processingSoftware>\n"
-                     "\t\t\t\t\t<softwareName>tesseract 4.0.0</softwareName>\n"
-                     "\t\t\t\t</processingSoftware>\n"
-                     "\t\t\t</ocrProcessingStep>\n"
-                     "\t\t</OCRProcessing>\n"
-                     "\t</Description>\n"
-                     "\t<Layout>\n");
+        "\t\t</sourceImageInformation>\n"
+        "\t\t<OCRProcessing ID=\"OCR_0\">\n"
+        "\t\t\t<ocrProcessingStep>\n"
+        "\t\t\t\t<processingSoftware>\n"
+        "\t\t\t\t\t<softwareName>tesseract ");
+        AppendString(TessBaseAPI::Version());
+        AppendString("</softwareName>\n"
+        "\t\t\t\t</processingSoftware>\n"
+        "\t\t\t</ocrProcessingStep>\n"
+        "\t\t</OCRProcessing>\n"
+        "\t</Description>\n"
+        "\t<Layout>\n");
 
         return true;
     }

From 5c8cf13254729a668b8493bf4eb615a487495e47 Mon Sep 17 00:00:00 2001
From: Jake Sebright <jake@jakesebright.com>
Date: Thu, 29 Nov 2018 22:15:20 -0500
Subject: [PATCH 8/9] Re-organize functions alphabetically

---
 src/api/altorenderer.cpp | 180 +++++++++++++++++++--------------------
 1 file changed, 90 insertions(+), 90 deletions(-)

diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
index 5895b84b54..fe7c3f99ea 100644
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@@ -19,8 +19,56 @@
 
 namespace tesseract {
 
-    TessAltoRenderer::TessAltoRenderer(const char *outputbase)
-            : TessResultRenderer(outputbase, "xml") {
+    ///
+    /// Add coordinates to specified TextBlock, TextLine, or String bounding box
+    /// Add word confidence if adding to a String bounding box
+    ///
+    static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
+                             STRING *alto_str) {
+        int left, top, right, bottom;
+        it->BoundingBox(level, &left, &top, &right, &bottom);
+
+        int hpos = left;
+        int vpos = top;
+        int height = bottom - top;
+        int width = right - left;
+
+        *alto_str += " HPOS=\"";
+        alto_str->add_str_int("", hpos);
+        *alto_str += "\"";
+        *alto_str += " VPOS=\"";
+        alto_str->add_str_int("", vpos);
+        *alto_str += "\"";
+        *alto_str += " WIDTH=\"";
+        alto_str->add_str_int("", width);
+        *alto_str += "\"";
+        *alto_str += " HEIGHT=\"";
+        alto_str->add_str_int("", height);
+        *alto_str += "\"";
+
+        if (level == RIL_WORD) {
+            int wc = it->Confidence(RIL_WORD);
+            *alto_str += " WC=\"0.";
+            alto_str->add_str_int("", wc);
+            *alto_str += "\"";
+        }
+        if (level != RIL_WORD) {
+
+            *alto_str += ">";
+        }
+    }
+
+    ///
+    /// Add a unique ID to an ALTO element
+    ///
+    static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) {
+        const size_t BUFSIZE = 64;
+        char id_buffer[BUFSIZE];
+        snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
+        id_buffer[BUFSIZE - 1] = '\0';
+        *alto_str += " ID=\"";
+        *alto_str += id_buffer;
+        *alto_str += "\"";
     }
 
     ///
@@ -54,15 +102,6 @@ namespace tesseract {
         return true;
     }
 
-    ///
-    /// Append the ALTO XML for the end of the document
-    ///
-    bool TessAltoRenderer::EndDocumentHandler() {
-        AppendString("\t</Layout>\n</alto>\n");
-
-        return true;
-    }
-
     ///
     /// Append the ALTO XML for the layout of the image
     ///
@@ -76,97 +115,58 @@ namespace tesseract {
     }
 
     ///
-    /// Add a unique ID to an ALTO element
-    ///
-    static void AddIdToAlto(STRING *alto_str, const std::string base, int num1) {
-        const size_t BUFSIZE = 64;
-        char id_buffer[BUFSIZE];
-        snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
-        id_buffer[BUFSIZE - 1] = '\0';
-        *alto_str += " ID=\"";
-        *alto_str += id_buffer;
-        *alto_str += "\"";
-    }
-
-    ///
-    /// Add coordinates to specified TextBlock, TextLine, or String bounding box
-    /// Add word confidence if adding to a String bounding box
+    /// Append the ALTO XML for the end of the document
     ///
-    static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
-                             STRING *alto_str) {
-        int left, top, right, bottom;
-        it->BoundingBox(level, &left, &top, &right, &bottom);
-
-        int hpos = left;
-        int vpos = top;
-        int height = bottom - top;
-        int width = right - left;
-
-        *alto_str += " HPOS=\"";
-        alto_str->add_str_int("", hpos);
-        *alto_str += "\"";
-        *alto_str += " VPOS=\"";
-        alto_str->add_str_int("", vpos);
-        *alto_str += "\"";
-        *alto_str += " WIDTH=\"";
-        alto_str->add_str_int("", width);
-        *alto_str += "\"";
-        *alto_str += " HEIGHT=\"";
-        alto_str->add_str_int("", height);
-        *alto_str += "\"";
+    bool TessAltoRenderer::EndDocumentHandler() {
+        AppendString("\t</Layout>\n</alto>\n");
 
-        if (level == RIL_WORD) {
-            int wc = it->Confidence(RIL_WORD);
-            *alto_str += " WC=\"0.";
-            alto_str->add_str_int("", wc);
-            *alto_str += "\"";
-        }
-        if (level != RIL_WORD) {
+        return true;
+    }
 
-            *alto_str += ">";
-        }
+    TessAltoRenderer::TessAltoRenderer(const char *outputbase)
+        : TessResultRenderer(outputbase, "xml") {
     }
 
     ///
     /// Make an XML-formatted string with ALTO markup from the internal
     /// data structures.
     ///
-        char *TessBaseAPI::GetAltoText(int page_number) {
-            return GetAltoText(nullptr, page_number);
-        }
+    char *TessBaseAPI::GetAltoText(int page_number) {
+        return GetAltoText(nullptr, page_number);
+    }
 
     ///
     /// Make an XML-formatted string with ALTO markup from the internal
     /// data structures.
     ///
-        char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
-            if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
-                return nullptr;
-
-            int lcnt = 0, bcnt = 0, wcnt = 0;
-            int page_id = page_number;
-
-            STRING alto_str("");
-
-            if (input_file_ == nullptr)
-                SetInputName(nullptr);
-
-    #ifdef _WIN32
-            // convert input name from ANSI encoding to utf-8
-          int str16_len =
-              MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
-          wchar_t *uni16_str = new WCHAR[str16_len];
-          str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
-                                          uni16_str, str16_len);
-          int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
-                                             nullptr, nullptr);
-          char *utf8_str = new char[utf8_len];
-          WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
-                              utf8_len, nullptr, nullptr);
-          *input_file_ = utf8_str;
-          delete[] uni16_str;
-          delete[] utf8_str;
-    #endif
+    char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
+        if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
+            return nullptr;
+
+        int lcnt = 0, bcnt = 0, wcnt = 0;
+        int page_id = page_number;
+
+        STRING alto_str("");
+
+        if (input_file_ == nullptr)
+            SetInputName(nullptr);
+
+        #ifdef _WIN32
+                // convert input name from ANSI encoding to utf-8
+              int str16_len =
+                  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, nullptr, 0);
+              wchar_t *uni16_str = new WCHAR[str16_len];
+              str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
+                                              uni16_str, str16_len);
+              int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0,
+                                                 nullptr, nullptr);
+              char *utf8_str = new char[utf8_len];
+              WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
+                                  utf8_len, nullptr, nullptr);
+              *input_file_ = utf8_str;
+              delete[] uni16_str;
+              delete[] utf8_str;
+        #endif
 
             alto_str += "\t\t<Page WIDTH=\"";
             alto_str.add_str_int("", rect_width_);
@@ -247,6 +247,6 @@ namespace tesseract {
             strcpy(ret, alto_str.string());
             delete res_it;
             return ret;
-        }
-
     }
+
+}

From 62862e47ce55f33472bc26f23eed991441d7a59e Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Fri, 30 Nov 2018 05:53:10 +0100
Subject: [PATCH 9/9] Fix include statement

---
 src/api/altorenderer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp
index fe7c3f99ea..11af90f692 100644
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@@ -14,7 +14,7 @@
 // limitations under the License.
 
 #include "baseapi.h"
-#include <memory.h>
+#include <memory>
 #include "renderer.h"
 
 namespace tesseract {