feat: 集成Tesseract源码到项目中

Description: 由于仓库中的Tesseract不是最新版本导致产生了一个bug，因此将Tesseract源码集成到项目中 Log: no Change-Id: I088de95d6c6ab670406daa8d47ed2ed46929c2c0
2021-06-22 20:13:39 +08:00
parent 40c90fc3c7
commit 0cfed22ed4
439 changed files with 185083 additions and 13 deletions
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blamer.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blamer.cpp
@ -0,0 +1,578 @@
+///////////////////////////////////////////////////////////////////////
+// File:        blamer.cpp
+// Description: Module allowing precise error causes to be allocated.
+// Author:      Rike Antonova
+// Refactored:  Ray Smith
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "blamer.h"
+
+#include "blobs.h"   // for TPOINT, TWERD, TBLOB
+#include "errcode.h" // for ASSERT_HOST
+#if !defined(DISABLED_LEGACY_ENGINE)
+#  include "lm_pain_points.h" // for LMPainPoints
+#endif
+#include "matrix.h"     // for MATRIX
+#include "normalis.h"   // for DENORM
+#include "pageres.h"    // for WERD_RES
+#include "unicharset.h" // for UNICHARSET
+
+#include <cmath>   // for abs
+#include <cstdlib> // for abs
+
+namespace tesseract {
+
+// Names for each value of IncorrectResultReason enum. Keep in sync.
+const char kBlameCorrect[] = "corr";
+const char kBlameClassifier[] = "cl";
+const char kBlameChopper[] = "chop";
+const char kBlameClassLMTradeoff[] = "cl/LM";
+const char kBlamePageLayout[] = "pglt";
+const char kBlameSegsearchHeur[] = "ss_heur";
+const char kBlameSegsearchPP[] = "ss_pp";
+const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
+const char kBlameAdaption[] = "adapt";
+const char kBlameNoTruthSplit[] = "no_tr_spl";
+const char kBlameNoTruth[] = "no_tr";
+const char kBlameUnknown[] = "unkn";
+
+const char *const kIncorrectResultReasonNames[] = {
+    kBlameCorrect,    kBlameClassifier,    kBlameChopper,     kBlameClassLMTradeoff,
+    kBlamePageLayout, kBlameSegsearchHeur, kBlameSegsearchPP, kBlameClassOldLMTradeoff,
+    kBlameAdaption,   kBlameNoTruthSplit,  kBlameNoTruth,     kBlameUnknown};
+
+const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
+  return kIncorrectResultReasonNames[irr];
+}
+
+const char *BlamerBundle::IncorrectReason() const {
+  return kIncorrectResultReasonNames[incorrect_result_reason_];
+}
+
+// Functions to setup the blamer.
+// Whole word string, whole word bounding box.
+void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_str,
+                                const TBOX &word_box) {
+  truth_word_.InsertBox(0, word_box);
+  truth_has_char_boxes_ = false;
+  // Encode the string as UNICHAR_IDs.
+  std::vector<UNICHAR_ID> encoding;
+  std::vector<char> lengths;
+  unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
+  int total_length = 0;
+  for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
+    std::string uch(truth_str + total_length);
+    uch.resize(lengths[i] - total_length);
+    UNICHAR_ID id = encoding[i];
+    if (id != INVALID_UNICHAR_ID) {
+      uch = unicharset.get_normed_unichar(id);
+    }
+    truth_text_.push_back(uch);
+  }
+}
+
+// Single "character" string, "character" bounding box.
+// May be called multiple times to indicate the characters in a word.
+void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str,
+                                  const TBOX &char_box) {
+  std::string symbol_str(char_str);
+  UNICHAR_ID id = unicharset.unichar_to_id(char_str);
+  if (id != INVALID_UNICHAR_ID) {
+    std::string normed_uch(unicharset.get_normed_unichar(id));
+    if (normed_uch.length() > 0) {
+      symbol_str = normed_uch;
+    }
+  }
+  int length = truth_word_.length();
+  truth_text_.push_back(symbol_str);
+  truth_word_.InsertBox(length, char_box);
+  if (length == 0) {
+    truth_has_char_boxes_ = true;
+  } else if (truth_word_.BlobBox(length - 1) == char_box) {
+    truth_has_char_boxes_ = false;
+  }
+}
+
+// Marks that there is something wrong with the truth text, like it contains
+// reject characters.
+void BlamerBundle::SetRejectedTruth() {
+  incorrect_result_reason_ = IRR_NO_TRUTH;
+  truth_has_char_boxes_ = false;
+}
+
+// Returns true if the provided word_choice is correct.
+bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const {
+  if (word_choice == nullptr) {
+    return false;
+  }
+  const UNICHARSET *uni_set = word_choice->unicharset();
+  std::string normed_choice_str;
+  for (int i = 0; i < word_choice->length(); ++i) {
+    normed_choice_str += uni_set->get_normed_unichar(word_choice->unichar_id(i));
+  }
+  std::string truth_str = TruthString();
+  return truth_str == normed_choice_str;
+}
+
+void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug) {
+  debug += "Truth ";
+  for (auto &text : this->truth_text_) {
+    debug += text;
+  }
+  if (!this->truth_has_char_boxes_) {
+    debug += " (no char boxes)";
+  }
+  if (choice != nullptr) {
+    debug += " Choice ";
+    std::string choice_str;
+    choice->string_and_lengths(&choice_str, nullptr);
+    debug += choice_str;
+  }
+  if (msg.length() > 0) {
+    debug += "\n";
+    debug += msg;
+  }
+  debug += "\n";
+}
+
+// Sets up the norm_truth_word from truth_word using the given DENORM.
+void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) {
+  // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
+  norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
+  TPOINT topleft;
+  TPOINT botright;
+  TPOINT norm_topleft;
+  TPOINT norm_botright;
+  for (int b = 0; b < truth_word_.length(); ++b) {
+    const TBOX &box = truth_word_.BlobBox(b);
+    topleft.x = box.left();
+    topleft.y = box.top();
+    botright.x = box.right();
+    botright.y = box.bottom();
+    denorm.NormTransform(nullptr, topleft, &norm_topleft);
+    denorm.NormTransform(nullptr, botright, &norm_botright);
+    TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y);
+    norm_truth_word_.InsertBox(b, norm_box);
+  }
+}
+
+// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
+// bundles) where the right edge/ of the left-hand word is word1_right,
+// and the left edge of the right-hand word is word2_left.
+void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
+                               BlamerBundle *bundle2) const {
+  std::string debug_str;
+  // Find truth boxes that correspond to the split in the blobs.
+  int b;
+  int begin2_truth_index = -1;
+  if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
+    debug_str = "Looking for truth split at";
+    debug_str += " end1_x " + std::to_string(word1_right);
+    debug_str += " begin2_x " + std::to_string(word2_left);
+    debug_str += "\nnorm_truth_word boxes:\n";
+    if (norm_truth_word_.length() > 1) {
+      norm_truth_word_.BlobBox(0).print_to_str(debug_str);
+      for (b = 1; b < norm_truth_word_.length(); ++b) {
+        norm_truth_word_.BlobBox(b).print_to_str(debug_str);
+        if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
+            (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
+          begin2_truth_index = b;
+          debug_str += "Split found";
+          break;
+        }
+      }
+      debug_str += '\n';
+    }
+  }
+  // Populate truth information in word and word2 with the first and second
+  // part of the original truth.
+  if (begin2_truth_index > 0) {
+    bundle1->truth_has_char_boxes_ = true;
+    bundle1->norm_box_tolerance_ = norm_box_tolerance_;
+    bundle2->truth_has_char_boxes_ = true;
+    bundle2->norm_box_tolerance_ = norm_box_tolerance_;
+    BlamerBundle *curr_bb = bundle1;
+    for (b = 0; b < norm_truth_word_.length(); ++b) {
+      if (b == begin2_truth_index) {
+        curr_bb = bundle2;
+      }
+      curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
+      curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
+      curr_bb->truth_text_.push_back(truth_text_[b]);
+    }
+  } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
+    bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
+    bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
+  } else {
+    debug_str += "Truth split not found";
+    debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n";
+    bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
+    bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
+  }
+}
+
+// "Joins" the blames from bundle1 and bundle2 into *this.
+void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2,
+                              bool debug) {
+  std::string debug_str;
+  IncorrectResultReason irr = incorrect_result_reason_;
+  if (irr != IRR_NO_TRUTH_SPLIT) {
+    debug_str = "";
+  }
+  if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
+      bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
+      bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
+    debug_str += "Blame from part 1: ";
+    debug_str += bundle1.debug_;
+    irr = bundle1.incorrect_result_reason_;
+  }
+  if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
+      bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
+      bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
+    debug_str += "Blame from part 2: ";
+    debug_str += bundle2.debug_;
+    if (irr == IRR_CORRECT) {
+      irr = bundle2.incorrect_result_reason_;
+    } else if (irr != bundle2.incorrect_result_reason_) {
+      irr = IRR_UNKNOWN;
+    }
+  }
+  incorrect_result_reason_ = irr;
+  if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
+    SetBlame(irr, debug_str, nullptr, debug);
+  }
+}
+
+// If a blob with the same bounding box as one of the truth character
+// bounding boxes is not classified as the corresponding truth character
+// blames character classifier for incorrect answer.
+void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
+                                   const BLOB_CHOICE_LIST &choices, bool debug) {
+  if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
+    return; // Nothing to do here.
+  }
+
+  for (int b = 0; b < norm_truth_word_.length(); ++b) {
+    const TBOX &truth_box = norm_truth_word_.BlobBox(b);
+    // Note that we are more strict on the bounding box boundaries here
+    // than in other places (chopper, segmentation search), since we do
+    // not have the ability to check the previous and next bounding box.
+    if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
+      bool found = false;
+      bool incorrect_adapted = false;
+      UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
+      const char *truth_str = truth_text_[b].c_str();
+      // We promise not to modify the list or its contents, using a
+      // const BLOB_CHOICE* below.
+      BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST *>(&choices));
+      for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
+        const BLOB_CHOICE *choice = choices_it.data();
+        if (strcmp(truth_str, unicharset.get_normed_unichar(choice->unichar_id())) == 0) {
+          found = true;
+          break;
+        } else if (choice->IsAdapted()) {
+          incorrect_adapted = true;
+          incorrect_adapted_id = choice->unichar_id();
+        }
+      } // end choices_it for loop
+      if (!found) {
+        std::string debug_str = "unichar ";
+        debug_str += truth_str;
+        debug_str += " not found in classification list";
+        SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
+      } else if (incorrect_adapted) {
+        std::string debug_str = "better rating for adapted ";
+        debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
+        debug_str += " than for correct ";
+        debug_str += truth_str;
+        SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
+      }
+      break;
+    }
+  } // end iterating over blamer_bundle->norm_truth_word
+}
+
+// Checks whether chops were made at all the character bounding box
+// boundaries in word->truth_word. If not - blames the chopper for an
+// incorrect answer.
+void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
+  if (NoTruth() || !truth_has_char_boxes_ || word->chopped_word->blobs.empty()) {
+    return;
+  }
+  std::string debug_str;
+  bool missing_chop = false;
+  int num_blobs = word->chopped_word->blobs.size();
+  int box_index = 0;
+  int blob_index = 0;
+  int16_t truth_x = -1;
+  while (box_index < truth_word_.length() && blob_index < num_blobs) {
+    truth_x = norm_truth_word_.BlobBox(box_index).right();
+    TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
+    if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
+      ++blob_index;
+      continue; // encountered an extra chop, keep looking
+    } else if (curr_blob->bounding_box().right() > truth_x + norm_box_tolerance_) {
+      missing_chop = true;
+      break;
+    } else {
+      ++blob_index;
+    }
+  }
+  if (missing_chop || box_index < norm_truth_word_.length()) {
+    std::string debug_str;
+    if (missing_chop) {
+      debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
+      debug_str += ") at Bounding Box=";
+      TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
+      curr_blob->bounding_box().print_to_str(debug_str);
+      debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
+    } else {
+      debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
+      debug_str += " truth box(es)";
+    }
+    debug_str += "\nMaximally chopped word boxes:\n";
+    for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
+      TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
+      curr_blob->bounding_box().print_to_str(debug_str);
+      debug_str += '\n';
+    }
+    debug_str += "Truth  bounding  boxes:\n";
+    for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
+      norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
+      debug_str += '\n';
+    }
+    SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
+  }
+}
+
+// Blames the classifier or the language model if, after running only the
+// chopper, best_choice is incorrect and no blame has been yet set.
+// Blames the classifier if best_choice is classifier's top choice and is a
+// dictionary word (i.e. language model could not have helped).
+// Otherwise, blames the language model (formerly permuter word adjustment).
+void BlamerBundle::BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
+                                              bool valid_permuter, bool debug) {
+  if (valid_permuter) {
+    // Find out whether best choice is a top choice.
+    best_choice_is_dict_and_top_choice_ = true;
+    for (int i = 0; i < word->best_choice->length(); ++i) {
+      BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
+      ASSERT_HOST(!blob_choice_it.empty());
+      BLOB_CHOICE *first_choice = nullptr;
+      for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
+           blob_choice_it.forward()) { // find first non-fragment choice
+        if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
+          first_choice = blob_choice_it.data();
+          break;
+        }
+      }
+      ASSERT_HOST(first_choice != nullptr);
+      if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
+        best_choice_is_dict_and_top_choice_ = false;
+        break;
+      }
+    }
+  }
+  std::string debug_str;
+  if (best_choice_is_dict_and_top_choice_) {
+    debug_str = "Best choice is: incorrect, top choice, dictionary word";
+    debug_str += " with permuter ";
+    debug_str += word->best_choice->permuter_name();
+  } else {
+    debug_str = "Classifier/Old LM tradeoff is to blame";
+  }
+  SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER : IRR_CLASS_OLD_LM_TRADEOFF,
+           debug_str, word->best_choice, debug);
+}
+
+// Sets up the correct_segmentation_* to mark the correct bounding boxes.
+void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
+#ifndef DISABLED_LEGACY_ENGINE
+  params_training_bundle_.StartHypothesisList();
+#endif //  ndef DISABLED_LEGACY_ENGINE
+  if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
+    return; // Nothing to do here.
+  }
+
+  std::string debug_str = "Blamer computing correct_segmentation_cols\n";
+  int curr_box_col = 0;
+  int next_box_col = 0;
+  int num_blobs = word->NumBlobs();
+  if (num_blobs == 0) {
+    return; // No blobs to play with.
+  }
+  int blob_index = 0;
+  int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
+  for (int truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
+       ++blob_index) {
+    ++next_box_col;
+    int16_t curr_box_x = next_box_x;
+    if (blob_index + 1 < num_blobs) {
+      next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
+    }
+    int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
+    debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
+    debug_str += " " + std::to_string(truth_x);
+    debug_str += "\n";
+    if (curr_box_x > (truth_x + norm_box_tolerance_)) {
+      break;                                                  // failed to find a matching box
+    } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
+               (blob_index + 1 >= num_blobs ||                // next box can't be included
+                next_box_x > truth_x + norm_box_tolerance_)) {
+      correct_segmentation_cols_.push_back(curr_box_col);
+      correct_segmentation_rows_.push_back(next_box_col - 1);
+      ++truth_idx;
+      debug_str += "col=" + std::to_string(curr_box_col);
+      debug_str += " row=" + std::to_string(next_box_col - 1);
+      debug_str += "\n";
+      curr_box_col = next_box_col;
+    }
+  }
+  if (blob_index < num_blobs || // trailing blobs
+      correct_segmentation_cols_.size() != norm_truth_word_.length()) {
+    debug_str += 
+        "Blamer failed to find correct segmentation"
+        " (tolerance=" +
+        std::to_string(norm_box_tolerance_);
+    if (blob_index >= num_blobs) {
+      debug_str += " blob == nullptr";
+    }
+    debug_str += ")\n";
+    debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
+    debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
+    debug_str += "\n";
+    SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
+    correct_segmentation_cols_.clear();
+    correct_segmentation_rows_.clear();
+  }
+}
+
+// Returns true if a guided segmentation search is needed.
+bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
+  return incorrect_result_reason_ == IRR_CORRECT && !segsearch_is_looking_for_blame_ &&
+         truth_has_char_boxes_ && !ChoiceIsCorrect(best_choice);
+}
+
+#if !defined(DISABLED_LEGACY_ENGINE)
+// Setup ready to guide the segmentation search to the correct segmentation.
+void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
+                                    UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
+                                    tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
+                                    WERD_RES *word_res) {
+  segsearch_is_looking_for_blame_ = true;
+  if (debug) {
+    tprintf("segsearch starting to look for blame\n");
+  }
+  // Fill pain points for any unclassifed blob corresponding to the
+  // correct segmentation state.
+  debug_str += "Correct segmentation:\n";
+  for (int idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
+    debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
+    debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
+    debug_str += "\n";
+    if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
+                             wildcard_id) &&
+        !pain_points->GeneratePainPoint(
+            correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
+            tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
+      segsearch_is_looking_for_blame_ = false;
+      debug_str += "\nFailed to insert pain point\n";
+      SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
+      break;
+    }
+  } // end for blamer_bundle->correct_segmentation_cols/rows
+}
+#endif // !defined(DISABLED_LEGACY_ENGINE)
+
+// Returns true if the guided segsearch is in progress.
+bool BlamerBundle::GuidedSegsearchStillGoing() const {
+  return segsearch_is_looking_for_blame_;
+}
+
+// The segmentation search has ended. Sets the blame appropriately.
+void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
+  // If we are still looking for blame (i.e. best_choice is incorrect, but a
+  // path representing the correct segmentation could be constructed), we can
+  // blame segmentation search pain point prioritization if the rating of the
+  // path corresponding to the correct segmentation is better than that of
+  // best_choice (i.e. language model would have done the correct thing, but
+  // because of poor pain point prioritization the correct segmentation was
+  // never explored). Otherwise we blame the tradeoff between the language model
+  // and the classifier, since even after exploring the path corresponding to
+  // the correct segmentation incorrect best_choice would have been chosen.
+  // One special case when we blame the classifier instead is when best choice
+  // is incorrect, but it is a dictionary word and it classifier's top choice.
+  if (segsearch_is_looking_for_blame_) {
+    segsearch_is_looking_for_blame_ = false;
+    if (best_choice_is_dict_and_top_choice_) {
+      debug_str = "Best choice is: incorrect, top choice, dictionary word";
+      debug_str += " with permuter ";
+      debug_str += best_choice->permuter_name();
+      SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
+    } else if (best_correctly_segmented_rating_ < best_choice->rating()) {
+      debug_str += "Correct segmentation state was not explored";
+      SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
+    } else {
+      if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
+        debug_str += "Correct segmentation paths were pruned by LM\n";
+      } else {
+        debug_str += "Best correct segmentation rating " +
+                                  std::to_string(best_correctly_segmented_rating_);
+        debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
+      }
+      SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
+    }
+  }
+}
+
+// If the bundle is null or still does not indicate the correct result,
+// fix it and use some backup reason for the blame.
+void BlamerBundle::LastChanceBlame(bool debug, WERD_RES *word) {
+  if (word->blamer_bundle == nullptr) {
+    word->blamer_bundle = new BlamerBundle();
+    word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", word->best_choice, debug);
+  } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
+    word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", word->best_choice, debug);
+  } else {
+    bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
+    IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
+    if (irr == IRR_CORRECT && !correct) {
+      std::string debug_str = "Choice is incorrect after recognition";
+      word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, debug);
+    } else if (irr != IRR_CORRECT && correct) {
+      if (debug) {
+        tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str());
+      }
+      word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
+      word->blamer_bundle->debug_ = "";
+    }
+  }
+}
+
+// Sets the misadaption debug if this word is incorrect, as this word is
+// being adapted to.
+void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug) {
+  if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) {
+    misadaption_debug_ = "misadapt to word (";
+    misadaption_debug_ += best_choice->permuter_name();
+    misadaption_debug_ += "): ";
+    FillDebugString("", best_choice, misadaption_debug_);
+    if (debug) {
+      tprintf("%s\n", misadaption_debug_.c_str());
+    }
+  }
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blamer.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blamer.h
@ -0,0 +1,350 @@
+///////////////////////////////////////////////////////////////////////
+// File:        blamer.h
+// Description: Module allowing precise error causes to be allocated.
+// Author:      Rike Antonova
+// Refactored:  Ray Smith
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
+#define TESSERACT_CCSTRUCT_BLAMER_H_
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
+#endif
+#include "boxword.h" // for BoxWord
+#ifndef DISABLED_LEGACY_ENGINE
+#  include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra...
+#endif                                 //  ndef DISABLED_LEGACY_ENGINE
+#include "ratngs.h"                    // for BLOB_CHOICE_LIST (ptr only)
+#include "rect.h"                      // for TBOX
+#include "tprintf.h"                   // for tprintf
+
+#include <tesseract/unichar.h> // for UNICHAR_ID
+
+#include <cstdint> // for int16_t
+#include <cstring> // for memcpy
+#include <vector>  // for std::vector
+
+namespace tesseract {
+
+class DENORM;
+class MATRIX;
+class UNICHARSET;
+class WERD_RES;
+
+struct MATRIX_COORD;
+struct TWERD;
+
+class LMPainPoints;
+
+static const int16_t kBlamerBoxTolerance = 5;
+
+// Enum for expressing the source of error.
+// Note: Please update kIncorrectResultReasonNames when modifying this enum.
+enum IncorrectResultReason {
+  // The text recorded in best choice == truth text
+  IRR_CORRECT,
+  // Either: Top choice is incorrect and is a dictionary word (language model
+  // is unlikely to help correct such errors, so blame the classifier).
+  // Or: the correct unichar was not included in shortlist produced by the
+  // classifier at all.
+  IRR_CLASSIFIER,
+  // Chopper have not found one or more splits that correspond to the correct
+  // character bounding boxes recorded in BlamerBundle::truth_word.
+  IRR_CHOPPER,
+  // Classifier did include correct unichars for each blob in the correct
+  // segmentation, however its rating could have been too bad to allow the
+  // language model to pull out the correct choice. On the other hand the
+  // strength of the language model might have been too weak to favor the
+  // correct answer, this we call this case a classifier-language model
+  // tradeoff error.
+  IRR_CLASS_LM_TRADEOFF,
+  // Page layout failed to produce the correct bounding box. Blame page layout
+  // if the truth was not found for the word, which implies that the bounding
+  // box of the word was incorrect (no truth word had a similar bounding box).
+  IRR_PAGE_LAYOUT,
+  // SegSearch heuristic prevented one or more blobs from the correct
+  // segmentation state to be classified (e.g. the blob was too wide).
+  IRR_SEGSEARCH_HEUR,
+  // The correct segmentaiton state was not explored because of poor SegSearch
+  // pain point prioritization. We blame SegSearch pain point prioritization
+  // if the best rating of a choice constructed from correct segmentation is
+  // better than that of the best choice (i.e. if we got to explore the correct
+  // segmentation state, language model would have picked the correct choice).
+  IRR_SEGSEARCH_PP,
+  // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
+  // and thus use the old language model (permuters).
+  // TODO(antonova): integrate the new language mode with chopper
+  IRR_CLASS_OLD_LM_TRADEOFF,
+  // If there is an incorrect adaptive template match with a better score than
+  // a correct one (either pre-trained or adapted), mark this as adaption error.
+  IRR_ADAPTION,
+  // split_and_recog_word() failed to find a suitable split in truth.
+  IRR_NO_TRUTH_SPLIT,
+  // Truth is not available for this word (e.g. when words in corrected content
+  // file are turned into ~~~~ because an appropriate alignment was not found.
+  IRR_NO_TRUTH,
+  // The text recorded in best choice != truth text, but none of the above
+  // reasons are set.
+  IRR_UNKNOWN,
+
+  IRR_NUM_REASONS
+};
+
+// Blamer-related information to determine the source of errors.
+struct BlamerBundle {
+  static const char *IncorrectReasonName(IncorrectResultReason irr);
+  BlamerBundle()
+      : truth_has_char_boxes_(false)
+      , incorrect_result_reason_(IRR_CORRECT)
+      , lattice_data_(nullptr) {
+    ClearResults();
+  }
+  BlamerBundle(const BlamerBundle &other) {
+    this->CopyTruth(other);
+    this->CopyResults(other);
+  }
+  ~BlamerBundle() {
+    delete[] lattice_data_;
+  }
+
+  // Accessors.
+  std::string TruthString() const {
+    std::string truth_str;
+    for (auto &text : truth_text_) {
+      truth_str += text;
+    }
+    return truth_str;
+  }
+  IncorrectResultReason incorrect_result_reason() const {
+    return incorrect_result_reason_;
+  }
+  bool NoTruth() const {
+    return incorrect_result_reason_ == IRR_NO_TRUTH || incorrect_result_reason_ == IRR_PAGE_LAYOUT;
+  }
+  bool HasDebugInfo() const {
+    return debug_.length() > 0 || misadaption_debug_.length() > 0;
+  }
+  const std::string &debug() const {
+    return debug_;
+  }
+  const std::string &misadaption_debug() const {
+    return misadaption_debug_;
+  }
+  void UpdateBestRating(float rating) {
+    if (rating < best_correctly_segmented_rating_) {
+      best_correctly_segmented_rating_ = rating;
+    }
+  }
+  int correct_segmentation_length() const {
+    return correct_segmentation_cols_.size();
+  }
+  // Returns true if the given ratings matrix col,row position is included
+  // in the correct segmentation path at the given index.
+  bool MatrixPositionCorrect(int index, const MATRIX_COORD &coord) {
+    return correct_segmentation_cols_[index] == coord.col &&
+           correct_segmentation_rows_[index] == coord.row;
+  }
+  void set_best_choice_is_dict_and_top_choice(bool value) {
+    best_choice_is_dict_and_top_choice_ = value;
+  }
+  const char *lattice_data() const {
+    return lattice_data_;
+  }
+  int lattice_size() const {
+    return lattice_size_; // size of lattice_data in bytes
+  }
+  void set_lattice_data(const char *data, int size) {
+    lattice_size_ = size;
+    delete[] lattice_data_;
+    lattice_data_ = new char[lattice_size_];
+    memcpy(lattice_data_, data, lattice_size_);
+  }
+#ifndef DISABLED_LEGACY_ENGINE
+  const tesseract::ParamsTrainingBundle &params_training_bundle() const {
+    return params_training_bundle_;
+  }
+  // Adds a new ParamsTrainingHypothesis to the current hypothesis list.
+  void AddHypothesis(const tesseract::ParamsTrainingHypothesis &hypo) {
+    params_training_bundle_.AddHypothesis(hypo);
+  }
+#endif // ndef DISABLED_LEGACY_ENGINE
+
+  // Functions to setup the blamer.
+  // Whole word string, whole word bounding box.
+  void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box);
+  // Single "character" string, "character" bounding box.
+  // May be called multiple times to indicate the characters in a word.
+  void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box);
+  // Marks that there is something wrong with the truth text, like it contains
+  // reject characters.
+  void SetRejectedTruth();
+
+  // Returns true if the provided word_choice is correct.
+  bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const;
+
+  void ClearResults() {
+    norm_truth_word_.DeleteAllBoxes();
+    norm_box_tolerance_ = 0;
+    if (!NoTruth()) {
+      incorrect_result_reason_ = IRR_CORRECT;
+    }
+    debug_ = "";
+    segsearch_is_looking_for_blame_ = false;
+    best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
+    correct_segmentation_cols_.clear();
+    correct_segmentation_rows_.clear();
+    best_choice_is_dict_and_top_choice_ = false;
+    delete[] lattice_data_;
+    lattice_data_ = nullptr;
+    lattice_size_ = 0;
+  }
+  void CopyTruth(const BlamerBundle &other) {
+    truth_has_char_boxes_ = other.truth_has_char_boxes_;
+    truth_word_ = other.truth_word_;
+    truth_text_ = other.truth_text_;
+    incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
+  }
+  void CopyResults(const BlamerBundle &other) {
+    norm_truth_word_ = other.norm_truth_word_;
+    norm_box_tolerance_ = other.norm_box_tolerance_;
+    incorrect_result_reason_ = other.incorrect_result_reason_;
+    segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
+    best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
+    correct_segmentation_cols_ = other.correct_segmentation_cols_;
+    correct_segmentation_rows_ = other.correct_segmentation_rows_;
+    best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_;
+    if (other.lattice_data_ != nullptr) {
+      lattice_data_ = new char[other.lattice_size_];
+      memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
+      lattice_size_ = other.lattice_size_;
+    } else {
+      lattice_data_ = nullptr;
+    }
+  }
+  const char *IncorrectReason() const;
+
+  // Appends choice and truth details to the given debug string.
+  void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug);
+
+  // Sets up the norm_truth_word from truth_word using the given DENORM.
+  void SetupNormTruthWord(const DENORM &denorm);
+
+  // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
+  // bundles) where the right edge/ of the left-hand word is word1_right,
+  // and the left edge of the right-hand word is word2_left.
+  void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
+                   BlamerBundle *bundle2) const;
+  // "Joins" the blames from bundle1 and bundle2 into *this.
+  void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug);
+
+  // If a blob with the same bounding box as one of the truth character
+  // bounding boxes is not classified as the corresponding truth character
+  // blames character classifier for incorrect answer.
+  void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
+                       const BLOB_CHOICE_LIST &choices, bool debug);
+
+  // Checks whether chops were made at all the character bounding box
+  // boundaries in word->truth_word. If not - blames the chopper for an
+  // incorrect answer.
+  void SetChopperBlame(const WERD_RES *word, bool debug);
+  // Blames the classifier or the language model if, after running only the
+  // chopper, best_choice is incorrect and no blame has been yet set.
+  // Blames the classifier if best_choice is classifier's top choice and is a
+  // dictionary word (i.e. language model could not have helped).
+  // Otherwise, blames the language model (formerly permuter word adjustment).
+  void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
+                                  bool valid_permuter, bool debug);
+  // Sets up the correct_segmentation_* to mark the correct bounding boxes.
+  void SetupCorrectSegmentation(const TWERD *word, bool debug);
+
+  // Returns true if a guided segmentation search is needed.
+  bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
+  // Setup ready to guide the segmentation search to the correct segmentation.
+  void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
+                        bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
+                        double max_char_wh_ratio, WERD_RES *word_res);
+  // Returns true if the guided segsearch is in progress.
+  bool GuidedSegsearchStillGoing() const;
+  // The segmentation search has ended. Sets the blame appropriately.
+  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);
+
+  // If the bundle is null or still does not indicate the correct result,
+  // fix it and use some backup reason for the blame.
+  static void LastChanceBlame(bool debug, WERD_RES *word);
+
+  // Sets the misadaption debug if this word is incorrect, as this word is
+  // being adapted to.
+  void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);
+
+private:
+  // Copy assignment operator (currently unused, therefore private).
+  BlamerBundle &operator=(const BlamerBundle &other) = delete;
+  void SetBlame(IncorrectResultReason irr, const std::string &msg, const WERD_CHOICE *choice,
+                bool debug) {
+    incorrect_result_reason_ = irr;
+    debug_ = IncorrectReason();
+    debug_ += " to blame: ";
+    FillDebugString(msg, choice, debug_);
+    if (debug) {
+      tprintf("SetBlame(): %s", debug_.c_str());
+    }
+  }
+
+private:
+  // Set to true when bounding boxes for individual unichars are recorded.
+  bool truth_has_char_boxes_;
+  // Variables used by the segmentation search when looking for the blame.
+  // Set to true while segmentation search is continued after the usual
+  // termination condition in order to look for the blame.
+  bool segsearch_is_looking_for_blame_;
+  // Set to true if best choice is a dictionary word and
+  // classifier's top choice.
+  bool best_choice_is_dict_and_top_choice_;
+  // Tolerance for bounding box comparisons in normalized space.
+  int norm_box_tolerance_;
+  // The true_word (in the original image coordinate space) contains ground
+  // truth bounding boxes for this WERD_RES.
+  tesseract::BoxWord truth_word_;
+  // Same as above, but in normalized coordinates
+  // (filled in by WERD_RES::SetupForRecognition()).
+  tesseract::BoxWord norm_truth_word_;
+  // Contains ground truth unichar for each of the bounding boxes in truth_word.
+  std::vector<std::string> truth_text_;
+  // The reason for incorrect OCR result.
+  IncorrectResultReason incorrect_result_reason_;
+  // Debug text associated with the blame.
+  std::string debug_;
+  // Misadaption debug information (filled in if this word was misadapted to).
+  std::string misadaption_debug_;
+  // Vectors populated by SegSearch to indicate column and row indices that
+  // correspond to blobs with correct bounding boxes.
+  std::vector<int> correct_segmentation_cols_;
+  std::vector<int> correct_segmentation_rows_;
+  // Best rating for correctly segmented path
+  // (set and used by SegSearch when looking for blame).
+  float best_correctly_segmented_rating_;
+  int lattice_size_; // size of lattice_data in bytes
+  // Serialized segmentation search lattice.
+  char *lattice_data_;
+  // Information about hypotheses (paths) explored by the segmentation search.
+#ifndef DISABLED_LEGACY_ENGINE
+  tesseract::ParamsTrainingBundle params_training_bundle_;
+#endif // ndef DISABLED_LEGACY_ENGINE
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_BLAMER_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobbox.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobbox.cpp
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobbox.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobbox.h
@ -0,0 +1,853 @@
+/**********************************************************************
+ * File:        blobbox.h  (Formerly blobnbox.h)
+ * Description: Code for the textord blob class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef BLOBBOX_H
+#define BLOBBOX_H
+
+#include "elst.h"       // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
+#include "elst2.h"      // for ELIST2_ITERATOR, ELIST2IZEH, ELIST2_LINK
+#include "errcode.h"    // for ASSERT_HOST
+#include "ocrblock.h"   // for BLOCK
+#include "params.h"     // for DoubleParam, double_VAR_H
+#include "pdblock.h"    // for PDBLK
+#include "points.h"     // for FCOORD, ICOORD, ICOORDELT_LIST
+#include "quspline.h"   // for QSPLINE
+#include "rect.h"       // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+#include "statistc.h"   // for STATS
+#include "stepblob.h"   // for C_BLOB
+#include "tprintf.h"    // for tprintf
+#include "werd.h"       // for WERD_LIST
+
+#include <cinttypes> // for PRId32
+#include <cmath>     // for std::sqrt
+#include <cstdint>   // for int16_t, int32_t
+
+struct Pix;
+
+namespace tesseract {
+
+class C_OUTLINE;
+
+enum PITCH_TYPE {
+  PITCH_DUNNO,       // insufficient data
+  PITCH_DEF_FIXED,   // definitely fixed
+  PITCH_MAYBE_FIXED, // could be
+  PITCH_DEF_PROP,
+  PITCH_MAYBE_PROP,
+  PITCH_CORR_FIXED,
+  PITCH_CORR_PROP
+};
+
+// The possible tab-stop types of each side of a BLOBNBOX.
+// The ordering is important, as it is used for deleting dead-ends in the
+// search. ALIGNED, CONFIRMED and VLINE should remain greater than the
+// non-aligned, unset, or deleted members.
+enum TabType {
+  TT_NONE,          // Not a tab.
+  TT_DELETED,       // Not a tab after detailed analysis.
+  TT_MAYBE_RAGGED,  // Initial designation of a tab-stop candidate.
+  TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
+  TT_CONFIRMED,     // Aligned with neighbours.
+  TT_VLINE          // Detected as a vertical line.
+};
+
+// The possible region types of a BLOBNBOX.
+// Note: keep all the text types > BRT_UNKNOWN and all the image types less.
+// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
+// *Type static functions below.
+enum BlobRegionType {
+  BRT_NOISE,     // Neither text nor image.
+  BRT_HLINE,     // Horizontal separator line.
+  BRT_VLINE,     // Vertical separator line.
+  BRT_RECTIMAGE, // Rectangular image.
+  BRT_POLYIMAGE, // Non-rectangular image.
+  BRT_UNKNOWN,   // Not determined yet.
+  BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
+  BRT_TEXT,      // Convincing text.
+
+  BRT_COUNT // Number of possibilities.
+};
+
+// enum for elements of arrays that refer to neighbours.
+// NOTE: keep in this order, so ^2 can be used to flip direction.
+enum BlobNeighbourDir { BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE, BND_COUNT };
+
+// enum for special type of text characters, such as math symbol or italic.
+enum BlobSpecialTextType {
+  BSTT_NONE,    // No special.
+  BSTT_ITALIC,  // Italic style.
+  BSTT_DIGIT,   // Digit symbols.
+  BSTT_MATH,    // Mathematical symbols (not including digit).
+  BSTT_UNCLEAR, // Characters with low recognition rate.
+  BSTT_SKIP,    // Characters that we skip labeling (usually too small).
+  BSTT_COUNT
+};
+
+inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) {
+  return static_cast<BlobNeighbourDir>(dir ^ 2);
+}
+
+// BlobTextFlowType indicates the quality of neighbouring information
+// related to a chain of connected components, either horizontally or
+// vertically. Also used by ColPartition for the collection of blobs
+// within, which should all have the same value in most cases.
+enum BlobTextFlowType {
+  BTFT_NONE,          // No text flow set yet.
+  BTFT_NONTEXT,       // Flow too poor to be likely text.
+  BTFT_NEIGHBOURS,    // Neighbours support flow in this direction.
+  BTFT_CHAIN,         // There is a weak chain of text in this direction.
+  BTFT_STRONG_CHAIN,  // There is a strong chain of text in this direction.
+  BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
+  BTFT_LEADER,        // Leader dots/dashes etc.
+  BTFT_COUNT
+};
+
+// Returns true if type1 dominates type2 in a merge. Mostly determined by the
+// ordering of the enum, LEADER is weak and dominates nothing.
+// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
+// this cannot be true if t1 == t2, so the result is undefined.
+inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
+  // LEADER always loses.
+  if (type1 == BTFT_LEADER) {
+    return false;
+  }
+  if (type2 == BTFT_LEADER) {
+    return true;
+  }
+  // With those out of the way, the ordering of the enum determines the result.
+  return type1 >= type2;
+}
+
+class ColPartition;
+
+class BLOBNBOX;
+ELISTIZEH(BLOBNBOX)
+class BLOBNBOX : public ELIST_LINK {
+public:
+  BLOBNBOX() {
+    ReInit();
+  }
+  explicit BLOBNBOX(C_BLOB *srcblob) {
+    box = srcblob->bounding_box();
+    ReInit();
+    cblob_ptr = srcblob;
+    area = static_cast<int>(srcblob->area());
+  }
+  ~BLOBNBOX() {
+    if (owns_cblob_) {
+      delete cblob_ptr;
+    }
+  }
+
+  static void clear_blobnboxes(BLOBNBOX_LIST *boxes) {
+    BLOBNBOX_IT it = boxes;
+    // A BLOBNBOX generally doesn't own its blobs, so if they do, you
+    // have to delete them explicitly.
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      BLOBNBOX *box = it.data();
+      // TODO: remove next line, currently still needed for resultiterator_test.
+      delete box->remove_cblob();
+    }
+  }
+
+  static BLOBNBOX *RealBlob(C_OUTLINE *outline) {
+    auto *blob = new C_BLOB(outline);
+    return new BLOBNBOX(blob);
+  }
+
+  // Rotates the box and the underlying blob.
+  void rotate(FCOORD rotation);
+
+  // Methods that act on the box without touching the underlying blob.
+  // Reflect the box in the y-axis, leaving the underlying blob untouched.
+  void reflect_box_in_y_axis();
+  // Rotates the box by the angle given by rotation.
+  // If the blob is a diacritic, then only small rotations for skew
+  // correction can be applied.
+  void rotate_box(FCOORD rotation);
+  // Moves just the box by the given vector.
+  void translate_box(ICOORD v) {
+    if (IsDiacritic()) {
+      box.move(v);
+      base_char_top_ += v.y();
+      base_char_bottom_ += v.y();
+    } else {
+      box.move(v);
+      set_diacritic_box(box);
+    }
+  }
+  void merge(BLOBNBOX *nextblob);
+  void really_merge(BLOBNBOX *other);
+  void chop(                 // fake chop blob
+      BLOBNBOX_IT *start_it, // location of this
+      BLOBNBOX_IT *blob_it,  // iterator
+      FCOORD rotation,       // for landscape
+      float xheight);        // line height
+
+  void NeighbourGaps(int gaps[BND_COUNT]) const;
+  void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const;
+  void CleanNeighbours();
+  // Returns positive if there is at least one side neighbour that has a
+  // similar stroke width and is not on the other side of a rule line.
+  int GoodTextBlob() const;
+  // Returns the number of side neighbours that are of type BRT_NOISE.
+  int NoisyNeighbours() const;
+
+  // Returns true if the blob is noise and has no owner.
+  bool DeletableNoise() const {
+    return owner() == nullptr && region_type() == BRT_NOISE;
+  }
+
+  // Returns true, and sets vert_possible/horz_possible if the blob has some
+  // feature that makes it individually appear to flow one way.
+  // eg if it has a high aspect ratio, yet has a complex shape, such as a
+  // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
+  bool DefiniteIndividualFlow();
+
+  // Returns true if there is no tabstop violation in merging this and other.
+  bool ConfirmNoTabViolation(const BLOBNBOX &other) const;
+
+  // Returns true if other has a similar stroke width to this.
+  bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance,
+                           double constant_tolerance) const;
+
+  // Returns a bounding box of the outline contained within the
+  // given horizontal range.
+  TBOX BoundsWithinLimits(int left, int right);
+
+  // Estimates and stores the baseline position based on the shape of the
+  // outline.
+  void EstimateBaselinePosition();
+
+  // Simple accessors.
+  const TBOX &bounding_box() const {
+    return box;
+  }
+  // Set the bounding box. Use with caution.
+  // Normally use compute_bounding_box instead.
+  void set_bounding_box(const TBOX &new_box) {
+    box = new_box;
+    base_char_top_ = box.top();
+    base_char_bottom_ = box.bottom();
+  }
+  void compute_bounding_box() {
+    box = cblob_ptr->bounding_box();
+    base_char_top_ = box.top();
+    base_char_bottom_ = box.bottom();
+    baseline_y_ = box.bottom();
+  }
+  const TBOX &reduced_box() const {
+    return red_box;
+  }
+  void set_reduced_box(TBOX new_box) {
+    red_box = new_box;
+    reduced = true;
+  }
+  int32_t enclosed_area() const {
+    return area;
+  }
+  bool joined_to_prev() const {
+    return joined;
+  }
+  bool red_box_set() const {
+    return reduced;
+  }
+  int repeated_set() const {
+    return repeated_set_;
+  }
+  void set_repeated_set(int set_id) {
+    repeated_set_ = set_id;
+  }
+  C_BLOB *cblob() const {
+    return cblob_ptr;
+  }
+  C_BLOB *remove_cblob() {
+    auto blob = cblob_ptr;
+    cblob_ptr = nullptr;
+    owns_cblob_ = false;
+    return blob;
+  }
+  TabType left_tab_type() const {
+    return left_tab_type_;
+  }
+  void set_left_tab_type(TabType new_type) {
+    left_tab_type_ = new_type;
+  }
+  TabType right_tab_type() const {
+    return right_tab_type_;
+  }
+  void set_right_tab_type(TabType new_type) {
+    right_tab_type_ = new_type;
+  }
+  BlobRegionType region_type() const {
+    return region_type_;
+  }
+  void set_region_type(BlobRegionType new_type) {
+    region_type_ = new_type;
+  }
+  BlobSpecialTextType special_text_type() const {
+    return spt_type_;
+  }
+  void set_special_text_type(BlobSpecialTextType new_type) {
+    spt_type_ = new_type;
+  }
+  BlobTextFlowType flow() const {
+    return flow_;
+  }
+  void set_flow(BlobTextFlowType value) {
+    flow_ = value;
+  }
+  bool vert_possible() const {
+    return vert_possible_;
+  }
+  void set_vert_possible(bool value) {
+    vert_possible_ = value;
+  }
+  bool horz_possible() const {
+    return horz_possible_;
+  }
+  void set_horz_possible(bool value) {
+    horz_possible_ = value;
+  }
+  int left_rule() const {
+    return left_rule_;
+  }
+  void set_left_rule(int new_left) {
+    left_rule_ = new_left;
+  }
+  int right_rule() const {
+    return right_rule_;
+  }
+  void set_right_rule(int new_right) {
+    right_rule_ = new_right;
+  }
+  int left_crossing_rule() const {
+    return left_crossing_rule_;
+  }
+  void set_left_crossing_rule(int new_left) {
+    left_crossing_rule_ = new_left;
+  }
+  int right_crossing_rule() const {
+    return right_crossing_rule_;
+  }
+  void set_right_crossing_rule(int new_right) {
+    right_crossing_rule_ = new_right;
+  }
+  float horz_stroke_width() const {
+    return horz_stroke_width_;
+  }
+  void set_horz_stroke_width(float width) {
+    horz_stroke_width_ = width;
+  }
+  float vert_stroke_width() const {
+    return vert_stroke_width_;
+  }
+  void set_vert_stroke_width(float width) {
+    vert_stroke_width_ = width;
+  }
+  float area_stroke_width() const {
+    return area_stroke_width_;
+  }
+  tesseract::ColPartition *owner() const {
+    return owner_;
+  }
+  void set_owner(tesseract::ColPartition *new_owner) {
+    owner_ = new_owner;
+  }
+  bool leader_on_left() const {
+    return leader_on_left_;
+  }
+  void set_leader_on_left(bool flag) {
+    leader_on_left_ = flag;
+  }
+  bool leader_on_right() const {
+    return leader_on_right_;
+  }
+  void set_leader_on_right(bool flag) {
+    leader_on_right_ = flag;
+  }
+  BLOBNBOX *neighbour(BlobNeighbourDir n) const {
+    return neighbours_[n];
+  }
+  bool good_stroke_neighbour(BlobNeighbourDir n) const {
+    return good_stroke_neighbours_[n];
+  }
+  void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good) {
+    neighbours_[n] = neighbour;
+    good_stroke_neighbours_[n] = good;
+  }
+  bool IsDiacritic() const {
+    return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
+  }
+  int base_char_top() const {
+    return base_char_top_;
+  }
+  int base_char_bottom() const {
+    return base_char_bottom_;
+  }
+  int baseline_position() const {
+    return baseline_y_;
+  }
+  int line_crossings() const {
+    return line_crossings_;
+  }
+  void set_line_crossings(int value) {
+    line_crossings_ = value;
+  }
+  void set_diacritic_box(const TBOX &diacritic_box) {
+    base_char_top_ = diacritic_box.top();
+    base_char_bottom_ = diacritic_box.bottom();
+  }
+  BLOBNBOX *base_char_blob() const {
+    return base_char_blob_;
+  }
+  void set_base_char_blob(BLOBNBOX *blob) {
+    base_char_blob_ = blob;
+  }
+  void set_owns_cblob(bool value) {
+    owns_cblob_ = value;
+  }
+
+  bool UniquelyVertical() const {
+    return vert_possible_ && !horz_possible_;
+  }
+  bool UniquelyHorizontal() const {
+    return horz_possible_ && !vert_possible_;
+  }
+
+  // Returns true if the region type is text.
+  static bool IsTextType(BlobRegionType type) {
+    return type == BRT_TEXT || type == BRT_VERT_TEXT;
+  }
+  // Returns true if the region type is image.
+  static bool IsImageType(BlobRegionType type) {
+    return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
+  }
+  // Returns true if the region type is line.
+  static bool IsLineType(BlobRegionType type) {
+    return type == BRT_HLINE || type == BRT_VLINE;
+  }
+  // Returns true if the region type cannot be merged.
+  static bool UnMergeableType(BlobRegionType type) {
+    return IsLineType(type) || IsImageType(type);
+  }
+  // Helper to call CleanNeighbours on all blobs on the list.
+  static void CleanNeighbours(BLOBNBOX_LIST *blobs);
+  // Helper to delete all the deletable blobs on the list.
+  static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs);
+  // Helper to compute edge offsets for  all the blobs on the list.
+  // See coutln.h for an explanation of edge offsets.
+  static void ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs);
+
+#ifndef GRAPHICS_DISABLED
+  // Helper to draw all the blobs on the list in the given body_colour,
+  // with child outlines in the child_colour.
+  static void PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
+                        ScrollView::Color child_colour, ScrollView *win);
+  // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
+  // given list in the given body_colour, with child outlines in the
+  // child_colour.
+  static void PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
+                             ScrollView::Color child_colour, ScrollView *win);
+
+  static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type);
+
+  // Keep in sync with BlobRegionType.
+  ScrollView::Color BoxColor() const;
+
+  void plot(ScrollView *window,              // window to draw in
+            ScrollView::Color blob_colour,   // for outer bits
+            ScrollView::Color child_colour); // for holes
+#endif
+
+  // Initializes members set by StrokeWidth and beyond, without discarding
+  // stored area and strokewidth values, which are expensive to calculate.
+  void ReInit() {
+    joined = false;
+    reduced = false;
+    repeated_set_ = 0;
+    left_tab_type_ = TT_NONE;
+    right_tab_type_ = TT_NONE;
+    region_type_ = BRT_UNKNOWN;
+    flow_ = BTFT_NONE;
+    spt_type_ = BSTT_SKIP;
+    left_rule_ = 0;
+    right_rule_ = 0;
+    left_crossing_rule_ = 0;
+    right_crossing_rule_ = 0;
+    if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0) {
+      area_stroke_width_ = 2.0f * area / cblob()->perimeter();
+    }
+    owner_ = nullptr;
+    base_char_top_ = box.top();
+    base_char_bottom_ = box.bottom();
+    baseline_y_ = box.bottom();
+    line_crossings_ = 0;
+    base_char_blob_ = nullptr;
+    horz_possible_ = false;
+    vert_possible_ = false;
+    leader_on_left_ = false;
+    leader_on_right_ = false;
+    ClearNeighbours();
+  }
+
+  void ClearNeighbours() {
+    for (int n = 0; n < BND_COUNT; ++n) {
+      neighbours_[n] = nullptr;
+      good_stroke_neighbours_[n] = false;
+    }
+  }
+
+private:
+  C_BLOB *cblob_ptr = nullptr;               // edgestep blob
+  TBOX box;                                  // bounding box
+  TBOX red_box;                              // bounding box
+  int32_t area = 0;                          // enclosed area
+  int32_t repeated_set_ = 0;                 // id of the set of repeated blobs
+  TabType left_tab_type_ = TT_NONE;          // Indicates tab-stop assessment
+  TabType right_tab_type_ = TT_NONE;         // Indicates tab-stop assessment
+  BlobRegionType region_type_ = BRT_UNKNOWN; // Type of region this blob belongs to
+  BlobTextFlowType flow_ = BTFT_NONE;        // Quality of text flow.
+  BlobSpecialTextType spt_type_;             // Special text type.
+  bool joined = false;                       // joined to prev
+  bool reduced = false;                      // reduced box set
+  int16_t left_rule_ = 0;                    // x-coord of nearest but not crossing rule line
+  int16_t right_rule_ = 0;                   // x-coord of nearest but not crossing rule line
+  int16_t left_crossing_rule_;               // x-coord of nearest or crossing rule line
+  int16_t right_crossing_rule_;              // x-coord of nearest or crossing rule line
+  int16_t base_char_top_;                    // y-coord of top/bottom of diacritic base,
+  int16_t base_char_bottom_;                 // if it exists else top/bottom of this blob.
+  int16_t baseline_y_;                       // Estimate of baseline position.
+  int32_t line_crossings_;                   // Number of line intersections touched.
+  BLOBNBOX *base_char_blob_;                 // The blob that was the base char.
+  tesseract::ColPartition *owner_;           // Who will delete me when I am not needed
+  BLOBNBOX *neighbours_[BND_COUNT];
+  float horz_stroke_width_ = 0.0f; // Median horizontal stroke width
+  float vert_stroke_width_ = 0.0f; // Median vertical stroke width
+  float area_stroke_width_ = 0.0f; // Stroke width from area/perimeter ratio.
+  bool good_stroke_neighbours_[BND_COUNT];
+  bool horz_possible_;   // Could be part of horizontal flow.
+  bool vert_possible_;   // Could be part of vertical flow.
+  bool leader_on_left_;  // There is a leader to the left.
+  bool leader_on_right_; // There is a leader to the right.
+  // Iff true, then the destructor should delete the cblob_ptr.
+  // TODO(rays) migrate all uses to correctly setting this flag instead of
+  // deleting the C_BLOB before deleting the BLOBNBOX.
+  bool owns_cblob_ = false;
+};
+
+class TO_ROW : public ELIST2_LINK {
+public:
+  static const int kErrorWeight = 3;
+
+  TO_ROW() {
+    clear();
+  }                   // empty
+  TO_ROW(             // constructor
+      BLOBNBOX *blob, // from first blob
+      float top,      // of row //target height
+      float bottom, float row_size);
+
+  void print() const;
+  float max_y() const { // access function
+    return y_max;
+  }
+  float min_y() const {
+    return y_min;
+  }
+  float mean_y() const {
+    return (y_min + y_max) / 2.0f;
+  }
+  float initial_min_y() const {
+    return initial_y_min;
+  }
+  float line_m() const { // access to line fit
+    return m;
+  }
+  float line_c() const {
+    return c;
+  }
+  float line_error() const {
+    return error;
+  }
+  float parallel_c() const {
+    return para_c;
+  }
+  float parallel_error() const {
+    return para_error;
+  }
+  float believability() const { // baseline goodness
+    return credibility;
+  }
+  float intercept() const { // real parallel_c
+    return y_origin;
+  }
+  void add_blob(      // put in row
+      BLOBNBOX *blob, // blob to add
+      float top,      // of row //target height
+      float bottom, float row_size);
+  void insert_blob( // put in row in order
+      BLOBNBOX *blob);
+
+  BLOBNBOX_LIST *blob_list() { // get list
+    return &blobs;
+  }
+
+  void set_line(   // set line spec
+      float new_m, // line to set
+      float new_c, float new_error) {
+    m = new_m;
+    c = new_c;
+    error = new_error;
+  }
+  void set_parallel_line( // set fixed gradient line
+      float gradient,     // page gradient
+      float new_c, float new_error) {
+    para_c = new_c;
+    para_error = new_error;
+    credibility = blobs.length() - kErrorWeight * new_error;
+    y_origin = new_c / std::sqrt(1 + gradient * gradient);
+    // real intercept
+  }
+  void set_limits(     // set min,max
+      float new_min,   // bottom and
+      float new_max) { // top of row
+    y_min = new_min;
+    y_max = new_max;
+  }
+  void compute_vertical_projection();
+  // get projection
+
+  bool rep_chars_marked() const {
+    return num_repeated_sets_ != -1;
+  }
+  void clear_rep_chars_marked() {
+    num_repeated_sets_ = -1;
+  }
+  int num_repeated_sets() const {
+    return num_repeated_sets_;
+  }
+  void set_num_repeated_sets(int num_sets) {
+    num_repeated_sets_ = num_sets;
+  }
+
+  // true when dead
+  bool merged = false;
+  bool all_caps;             // had no ascenders
+  bool used_dm_model;        // in guessing pitch
+  int16_t projection_left;   // start of projection
+  int16_t projection_right;  // start of projection
+  PITCH_TYPE pitch_decision; // how strong is decision
+  float fixed_pitch;         // pitch or 0
+  float fp_space;            // sp if fixed pitch
+  float fp_nonsp;            // nonsp if fixed pitch
+  float pr_space;            // sp if prop
+  float pr_nonsp;            // non sp if prop
+  float spacing;             // to "next" row
+  float xheight;             // of line
+  int xheight_evidence;      // number of blobs of height xheight
+  float ascrise;             // ascenders
+  float descdrop;            // descenders
+  float body_size;           // of CJK characters.  Assumed to be
+                             // xheight+ascrise for non-CJK text.
+  int32_t min_space;         // min size for real space
+  int32_t max_nonspace;      // max size of non-space
+  int32_t space_threshold;   // space vs nonspace
+  float kern_size;           // average non-space
+  float space_size;          // average space
+  WERD_LIST rep_words;       // repeated chars
+  ICOORDELT_LIST char_cells; // fixed pitch cells
+  QSPLINE baseline;          // curved baseline
+  STATS projection;          // vertical projection
+
+private:
+  void clear(); // clear all values to reasonable defaults
+
+  BLOBNBOX_LIST blobs; // blobs in row
+  float y_min;         // coords
+  float y_max;
+  float initial_y_min;
+  float m, c;   // line spec
+  float error;  // line error
+  float para_c; // constrained fit
+  float para_error;
+  float y_origin;         // rotated para_c;
+  float credibility;      // baseline believability
+  int num_repeated_sets_; // number of sets of repeated blobs
+                          // set to -1 if we have not searched
+                          // for repeated blobs in this row yet
+};
+
+ELIST2IZEH(TO_ROW)
+class TESS_API TO_BLOCK : public ELIST_LINK {
+public:
+  TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
+    clear();
+  }                      // empty
+  TO_BLOCK(              // constructor
+      BLOCK *src_block); // real block
+  ~TO_BLOCK();
+
+  void clear(); // clear all scalar members.
+
+  TO_ROW_LIST *get_rows() { // access function
+    return &row_list;
+  }
+
+  // Rotate all the blobnbox lists and the underlying block. Then update the
+  // median size statistic from the blobs list.
+  void rotate(const FCOORD &rotation) {
+    BLOBNBOX_LIST *blobnbox_list[] = {&blobs,       &underlines,  &noise_blobs,
+                                      &small_blobs, &large_blobs, nullptr};
+    for (BLOBNBOX_LIST **list = blobnbox_list; *list != nullptr; ++list) {
+      BLOBNBOX_IT it(*list);
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        it.data()->rotate(rotation);
+      }
+    }
+    // Rotate the block
+    ASSERT_HOST(block->pdblk.poly_block() != nullptr);
+    block->rotate(rotation);
+    // Update the median size statistic from the blobs list.
+    STATS widths(0, block->pdblk.bounding_box().width());
+    STATS heights(0, block->pdblk.bounding_box().height());
+    BLOBNBOX_IT blob_it(&blobs);
+    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+      widths.add(blob_it.data()->bounding_box().width(), 1);
+      heights.add(blob_it.data()->bounding_box().height(), 1);
+    }
+    block->set_median_size(static_cast<int>(widths.median() + 0.5),
+                           static_cast<int>(heights.median() + 0.5));
+  }
+
+  void print_rows() { // debug info
+    TO_ROW_IT row_it = &row_list;
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      auto row = row_it.data();
+      tprintf("Row range (%g,%g), para_c=%g, blobcount=%" PRId32 "\n", row->min_y(), row->max_y(),
+              row->parallel_c(), row->blob_list()->length());
+    }
+  }
+
+  // Reorganizes the blob lists with a different definition of small, medium
+  // and large, compared to the original definition.
+  // Height is still the primary filter key, but medium width blobs of small
+  // height become medium, and very wide blobs of small height stay small.
+  void ReSetAndReFilterBlobs();
+
+  // Deletes noise blobs from all lists where not owned by a ColPartition.
+  void DeleteUnownedNoise();
+
+  // Computes and stores the edge offsets on each blob for use in feature
+  // extraction, using greyscale if the supplied grey and thresholds pixes
+  // are 8-bit or otherwise (if nullptr or not 8 bit) the original binary
+  // edge step outlines.
+  // Thresholds must either be the same size as grey or an integer down-scale
+  // of grey.
+  // See coutln.h for an explanation of edge offsets.
+  void ComputeEdgeOffsets(Image thresholds, Image grey);
+
+#ifndef GRAPHICS_DISABLED
+  // Draw the noise blobs from all lists in red.
+  void plot_noise_blobs(ScrollView *to_win);
+  // Draw the blobs on on the various lists in the block in different colors.
+  void plot_graded_blobs(ScrollView *to_win);
+#endif
+
+  BLOBNBOX_LIST blobs;       // medium size
+  BLOBNBOX_LIST underlines;  // underline blobs
+  BLOBNBOX_LIST noise_blobs; // very small
+  BLOBNBOX_LIST small_blobs; // fairly small
+  BLOBNBOX_LIST large_blobs; // big blobs
+  BLOCK *block;              // real block
+  PITCH_TYPE pitch_decision; // how strong is decision
+  float line_spacing;        // estimate
+  // line_size is a lower-bound estimate of the font size in pixels of
+  // the text in the block (with ascenders and descenders), being a small
+  // (1.25) multiple of the median height of filtered blobs.
+  // In most cases the font size will be bigger, but it will be closer
+  // if the text is allcaps, or in a no-x-height script.
+  float line_size;       // estimate
+  float max_blob_size;   // line assignment limit
+  float baseline_offset; // phase shift
+  float xheight;         // median blob size
+  float fixed_pitch;     // pitch or 0
+  float kern_size;       // average non-space
+  float space_size;      // average space
+  int32_t min_space;     // min definite space
+  int32_t max_nonspace;  // max definite
+  float fp_space;        // sp if fixed pitch
+  float fp_nonsp;        // nonsp if fixed pitch
+  float pr_space;        // sp if prop
+  float pr_nonsp;        // non sp if prop
+  TO_ROW *key_row;       // starting row
+
+private:
+  TO_ROW_LIST row_list; // temporary rows
+};
+
+ELISTIZEH(TO_BLOCK)
+extern double_VAR_H(textord_error_weight, 3, "Weighting for error in believability");
+void find_cblob_limits( // get y limits
+    C_BLOB *blob,       // blob to search
+    float leftx,        // x limits
+    float rightx,
+    FCOORD rotation, // for landscape
+    float &ymin,     // output y limits
+    float &ymax);
+void find_cblob_vlimits( // get y limits
+    C_BLOB *blob,        // blob to search
+    float leftx,         // x limits
+    float rightx,
+    float &ymin, // output y limits
+    float &ymax);
+void find_cblob_hlimits( // get x limits
+    C_BLOB *blob,        // blob to search
+    float bottomy,       // y limits
+    float topy,
+    float &xmin, // output x limits
+    float &xymax);
+C_BLOB *crotate_cblob( // rotate it
+    C_BLOB *blob,      // blob to search
+    FCOORD rotation    // for landscape
+);
+TBOX box_next(      // get bounding box
+    BLOBNBOX_IT *it // iterator to blobds
+);
+TBOX box_next_pre_chopped( // get bounding box
+    BLOBNBOX_IT *it        // iterator to blobds
+);
+void vertical_cblob_projection( // project outlines
+    C_BLOB *blob,               // blob to project
+    STATS *stats                // output
+);
+void vertical_coutline_projection( // project outlines
+    C_OUTLINE *outline,            // outline to project
+    STATS *stats                   // output
+);
+#ifndef GRAPHICS_DISABLED
+void plot_blob_list(ScrollView *win,                 // window to draw in
+                    BLOBNBOX_LIST *list,             // blob list
+                    ScrollView::Color body_colour,   // colour to draw
+                    ScrollView::Color child_colour); // colour of child
+#endif                                               // !GRAPHICS_DISABLED
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobs.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobs.cpp
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobs.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blobs.h
@ -0,0 +1,476 @@
+/******************************************************************************
+ *
+ * File:        blobs.h
+ * Description: Blob definition
+ * Author:      Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1989, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+
+#ifndef BLOBS_H
+#define BLOBS_H
+
+#include "clst.h"       // for CLIST_ITERATOR, CLISTIZEH
+#include "normalis.h"   // for DENORM
+#include "points.h"     // for FCOORD, ICOORD
+#include "rect.h"       // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+
+#include <tesseract/publictypes.h> // for OcrEngineMode
+
+#include <cstdint> // for int16_t
+
+struct Pix;
+
+namespace tesseract {
+
+class BLOCK;
+class C_BLOB;
+class C_OUTLINE;
+class LLSQ;
+class ROW;
+class WERD;
+
+/*----------------------------------------------------------------------
+              T y p e s
+----------------------------------------------------------------------*/
+
+struct TPOINT {
+  TPOINT() : x(0), y(0) {}
+  TPOINT(int16_t vx, int16_t vy) : x(vx), y(vy) {}
+  TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {}
+
+  void operator+=(const TPOINT &other) {
+    x += other.x;
+    y += other.y;
+  }
+  void operator/=(int divisor) {
+    x /= divisor;
+    y /= divisor;
+  }
+  bool operator==(const TPOINT &other) const {
+    return x == other.x && y == other.y;
+  }
+  // Returns true when the two line segments cross each other.
+  // (Moved from outlines.cpp).
+  static bool IsCrossed(const TPOINT &a0, const TPOINT &a1, const TPOINT &b0, const TPOINT &b1);
+
+  // Assign the difference from point p1 to point p2.
+  void diff(const TPOINT &p1, const TPOINT &p2) {
+    x = p1.x - p2.x;
+    y = p1.y - p2.y;
+  }
+
+  // Return cross product.
+  int cross(const TPOINT &other) const {
+    return x * other.y - y * other.x;
+  }
+
+  // Return scalar or dot product.
+  int dot(const TPOINT &other) const {
+    return x * other.x + y * other.y;
+  }
+
+  // Calculate length of vector.
+  int length() const {
+    return x * x + y * y;
+  }
+
+  int16_t x; // absolute x coord.
+  int16_t y; // absolute y coord.
+};
+
+using VECTOR = TPOINT; // structure for coordinates.
+
+struct EDGEPT {
+  EDGEPT() = default;
+  EDGEPT(const EDGEPT &src) : next(nullptr), prev(nullptr) {
+    CopyFrom(src);
+  }
+  EDGEPT &operator=(const EDGEPT &src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Copies the data elements, but leaves the pointers untouched.
+  void CopyFrom(const EDGEPT &src) {
+    pos = src.pos;
+    vec = src.vec;
+    is_hidden = src.is_hidden;
+    runlength = src.runlength;
+    dir = src.dir;
+    fixed = src.fixed;
+    src_outline = src.src_outline;
+    start_step = src.start_step;
+    step_count = src.step_count;
+  }
+  // Returns the squared distance between the points, with the x-component
+  // weighted by x_factor.
+  int WeightedDistance(const EDGEPT &other, int x_factor) const {
+    int x_dist = pos.x - other.pos.x;
+    int y_dist = pos.y - other.pos.y;
+    return x_dist * x_dist * x_factor + y_dist * y_dist;
+  }
+  // Returns true if the positions are equal.
+  bool EqualPos(const EDGEPT &other) const {
+    return pos == other.pos;
+  }
+  // Returns the bounding box of the outline segment from *this to *end.
+  // Ignores hidden edge flags.
+  TBOX SegmentBox(const EDGEPT *end) const {
+    TBOX box(pos.x, pos.y, pos.x, pos.y);
+    const EDGEPT *pt = this;
+    do {
+      pt = pt->next;
+      if (pt->pos.x < box.left()) {
+        box.set_left(pt->pos.x);
+      }
+      if (pt->pos.x > box.right()) {
+        box.set_right(pt->pos.x);
+      }
+      if (pt->pos.y < box.bottom()) {
+        box.set_bottom(pt->pos.y);
+      }
+      if (pt->pos.y > box.top()) {
+        box.set_top(pt->pos.y);
+      }
+    } while (pt != end && pt != this);
+    return box;
+  }
+  // Returns the area of the outline segment from *this to *end.
+  // Ignores hidden edge flags.
+  int SegmentArea(const EDGEPT *end) const {
+    int area = 0;
+    const EDGEPT *pt = this->next;
+    do {
+      TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
+      area += origin_vec.cross(pt->vec);
+      pt = pt->next;
+    } while (pt != end && pt != this);
+    return area;
+  }
+  // Returns true if the number of points in the outline segment from *this to
+  // *end is less that min_points and false if we get back to *this first.
+  // Ignores hidden edge flags.
+  bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const {
+    int count = 0;
+    const EDGEPT *pt = this;
+    do {
+      if (pt == end) {
+        return true;
+      }
+      pt = pt->next;
+      ++count;
+    } while (pt != this && count <= min_points);
+    return false;
+  }
+
+  // Accessors to hide or reveal a cut edge from feature extractors.
+  void Hide() {
+    is_hidden = true;
+  }
+  void Reveal() {
+    is_hidden = false;
+  }
+  bool IsHidden() const {
+    return is_hidden;
+  }
+  void MarkChop() {
+    dir = 1;
+  }
+  bool IsChopPt() const {
+    return dir != 0;
+  }
+
+  TPOINT pos; // position
+  VECTOR vec; // vector to next point
+  bool is_hidden = false;
+  uint8_t runlength = 0;
+  int8_t dir = 0;
+  int8_t fixed = 0;
+  EDGEPT *next = nullptr;           // anticlockwise element
+  EDGEPT *prev = nullptr;           // clockwise element
+  C_OUTLINE *src_outline = nullptr; // Outline it came from.
+  // The following fields are not used if src_outline is nullptr.
+  int start_step = 0; // Location of pos in src_outline.
+  int step_count = 0; // Number of steps used (may wrap around).
+};
+
+// For use in chop and findseam to keep a list of which EDGEPTs were inserted.
+CLISTIZEH(EDGEPT)
+
+struct TESSLINE {
+  TESSLINE() : is_hole(false), loop(nullptr), next(nullptr) {}
+  TESSLINE(const TESSLINE &src) : loop(nullptr), next(nullptr) {
+    CopyFrom(src);
+  }
+  ~TESSLINE() {
+    Clear();
+  }
+  TESSLINE &operator=(const TESSLINE &src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Consume the circular list of EDGEPTs to make a TESSLINE.
+  static TESSLINE *BuildFromOutlineList(EDGEPT *outline);
+  // Copies the data and the outline, but leaves next untouched.
+  void CopyFrom(const TESSLINE &src);
+  // Deletes owned data.
+  void Clear();
+  // Normalize in-place using the DENORM.
+  void Normalize(const DENORM &denorm);
+  // Rotates by the given rotation in place.
+  void Rotate(const FCOORD rotation);
+  // Moves by the given vec in place.
+  void Move(const ICOORD vec);
+  // Scales by the given factor in place.
+  void Scale(float factor);
+  // Sets up the start and vec members of the loop from the pos members.
+  void SetupFromPos();
+  // Recomputes the bounding box from the points in the loop.
+  void ComputeBoundingBox();
+  // Computes the min and max cross product of the outline points with the
+  // given vec and returns the results in min_xp and max_xp. Geometrically
+  // this is the left and right edge of the outline perpendicular to the
+  // given direction, but to get the distance units correct, you would
+  // have to divide by the modulus of vec.
+  void MinMaxCrossProduct(const TPOINT vec, int *min_xp, int *max_xp) const;
+
+  TBOX bounding_box() const;
+  // Returns true if *this and other have equal bounding boxes.
+  bool SameBox(const TESSLINE &other) const {
+    return topleft == other.topleft && botright == other.botright;
+  }
+  // Returns true if the given line segment crosses any outline of this blob.
+  bool SegmentCrosses(const TPOINT &pt1, const TPOINT &pt2) const {
+    if (Contains(pt1) && Contains(pt2)) {
+      EDGEPT *pt = loop;
+      do {
+        if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) {
+          return true;
+        }
+        pt = pt->next;
+      } while (pt != loop);
+    }
+    return false;
+  }
+  // Returns true if the point is contained within the outline box.
+  bool Contains(const TPOINT &pt) const {
+    return topleft.x <= pt.x && pt.x <= botright.x && botright.y <= pt.y && pt.y <= topleft.y;
+  }
+
+#ifndef GRAPHICS_DISABLED
+  void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color);
+#endif // !GRAPHICS_DISABLED
+
+  // Returns the first outline point that has a different src_outline to its
+  // predecessor, or, if all the same, the lowest indexed point.
+  EDGEPT *FindBestStartPt() const;
+
+  int BBArea() const {
+    return (botright.x - topleft.x) * (topleft.y - botright.y);
+  }
+
+  TPOINT topleft;  // Top left of loop.
+  TPOINT botright; // Bottom right of loop.
+  TPOINT start;    // Start of loop.
+  bool is_hole;    // True if this is a hole/child outline.
+  EDGEPT *loop;    // Edgeloop.
+  TESSLINE *next;  // Next outline in blob.
+};                 // Outline structure.
+
+struct TBLOB {
+  TBLOB() : outlines(nullptr) {}
+  TBLOB(const TBLOB &src) : outlines(nullptr) {
+    CopyFrom(src);
+  }
+  ~TBLOB() {
+    Clear();
+  }
+  TBLOB &operator=(const TBLOB &src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Factory to build a TBLOB from a C_BLOB with polygonal approximation along
+  // the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
+  // contain pointers to the input C_OUTLINEs that enable higher-resolution
+  // feature extraction that does not use the polygonal approximation.
+  static TBLOB *PolygonalCopy(bool allow_detailed_fx, C_BLOB *src);
+  // Factory builds a blob with no outlines, but copies the other member data.
+  static TBLOB *ShallowCopy(const TBLOB &src);
+  // Normalizes the blob for classification only if needed.
+  // (Normally this means a non-zero classify rotation.)
+  // If no Normalization is needed, then nullptr is returned, and the input blob
+  // can be used directly. Otherwise a new TBLOB is returned which must be
+  // deleted after use.
+  TBLOB *ClassifyNormalizeIfNeeded() const;
+
+  // Copies the data and the outlines, but leaves next untouched.
+  void CopyFrom(const TBLOB &src);
+  // Deletes owned data.
+  void Clear();
+  // Sets up the built-in DENORM and normalizes the blob in-place.
+  // For parameters see DENORM::SetupNormalization, plus the inverse flag for
+  // this blob and the Pix for the full image.
+  void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
+                 float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift,
+                 float final_yshift, bool inverse, Image pix);
+  // Rotates by the given rotation in place.
+  void Rotate(const FCOORD rotation);
+  // Moves by the given vec in place.
+  void Move(const ICOORD vec);
+  // Scales by the given factor in place.
+  void Scale(float factor);
+  // Recomputes the bounding boxes of the outlines.
+  void ComputeBoundingBoxes();
+
+  // Returns the number of outlines.
+  int NumOutlines() const;
+
+  TBOX bounding_box() const;
+
+  // Returns true if the given line segment crosses any outline of this blob.
+  bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const {
+    for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
+      if (outline->SegmentCrosses(pt1, pt2)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  // Returns true if the point is contained within any of the outline boxes.
+  bool Contains(const TPOINT &pt) const {
+    for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
+      if (outline->Contains(pt)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Finds and deletes any duplicate outlines in this blob, without deleting
+  // their EDGEPTs.
+  void EliminateDuplicateOutlines();
+
+  // Swaps the outlines of *this and next if needed to keep the centers in
+  // increasing x.
+  void CorrectBlobOrder(TBLOB *next);
+
+  const DENORM &denorm() const {
+    return denorm_;
+  }
+
+#ifndef GRAPHICS_DISABLED
+  void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color);
+#endif // !GRAPHICS_DISABLED
+
+  int BBArea() const {
+    int total_area = 0;
+    for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
+      total_area += outline->BBArea();
+    }
+    return total_area;
+  }
+
+  // Computes the center of mass and second moments for the old baseline and
+  // 2nd moment normalizations. Returns the outline length.
+  // The input denorm should be the normalizations that have been applied from
+  // the image to the current state of this TBLOB.
+  int ComputeMoments(FCOORD *center, FCOORD *second_moments) const;
+  // Computes the precise bounding box of the coords that are generated by
+  // GetEdgeCoords. This may be different from the bounding box of the polygon.
+  void GetPreciseBoundingBox(TBOX *precise_box) const;
+  // Adds edges to the given vectors.
+  // For all the edge steps in all the outlines, or polygonal approximation
+  // where there are no edge steps, collects the steps into x_coords/y_coords.
+  // x_coords is a collection of the x-coords of vertical edges for each
+  // y-coord starting at box.bottom().
+  // y_coords is a collection of the y-coords of horizontal edges for each
+  // x-coord starting at box.left().
+  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
+  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
+  void GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
+                     std::vector<std::vector<int>> &y_coords) const;
+
+  TESSLINE *outlines; // List of outlines in blob.
+
+private: // TODO(rays) Someday the data members will be private too.
+  // For all the edge steps in all the outlines, or polygonal approximation
+  // where there are no edge steps, collects the steps into the bounding_box,
+  // llsq and/or the x_coords/y_coords. Both are used in different kinds of
+  // normalization.
+  // For a description of x_coords, y_coords, see GetEdgeCoords above.
+  void CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
+                    std::vector<std::vector<int>> *x_coords,
+                    std::vector<std::vector<int>> *y_coords) const;
+
+private:
+  // DENORM indicating the transformations that this blob has undergone so far.
+  DENORM denorm_;
+}; // Blob structure.
+
+struct TWERD {
+  TWERD() : latin_script(false) {}
+  TWERD(const TWERD &src) {
+    CopyFrom(src);
+  }
+  ~TWERD() {
+    Clear();
+  }
+  TWERD &operator=(const TWERD &src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
+  // approximation along the way.
+  static TWERD *PolygonalCopy(bool allow_detailed_fx, WERD *src);
+  // Baseline normalizes the blobs in-place, recording the normalization in the
+  // DENORMs in the blobs.
+  void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
+                   float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
+                   const TBOX *norm_box, DENORM *word_denorm);
+  // Copies the data and the blobs, but leaves next untouched.
+  void CopyFrom(const TWERD &src);
+  // Deletes owned data.
+  void Clear();
+  // Recomputes the bounding boxes of the blobs.
+  void ComputeBoundingBoxes();
+
+  // Returns the number of blobs in the word.
+  int NumBlobs() const {
+    return blobs.size();
+  }
+  TBOX bounding_box() const;
+
+  // Merges the blobs from start to end, not including end, and deletes
+  // the blobs between start and end.
+  void MergeBlobs(int start, int end);
+
+#ifndef GRAPHICS_DISABLED
+  void plot(ScrollView *window);
+#endif // !GRAPHICS_DISABLED
+
+  std::vector<TBLOB *> blobs; // Blobs in word.
+  bool latin_script;          // This word is in a latin-based script.
+};
+
+/*----------------------------------------------------------------------
+              F u n c t i o n s
+----------------------------------------------------------------------*/
+// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
+bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location);
+
+void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blread.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blread.cpp
@ -0,0 +1,74 @@
+/**********************************************************************
+ * File:        blread.cpp  (Formerly pdread.c)
+ * Description: Friend function of BLOCK to read the uscan pd file.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "blread.h"
+
+#include "ocrblock.h"  // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only)
+#include "scanutils.h" // for tfscanf
+
+#include <cstdio> // for fclose, fopen, FILE
+
+namespace tesseract {
+
+#define UNLV_EXT ".uzn" // unlv zone file
+
+/**********************************************************************
+ * read_unlv_file
+ *
+ * Read a whole unlv zone file to make a list of blocks.
+ **********************************************************************/
+
+bool read_unlv_file(   // print list of sides
+    std::string &name, // basename of file
+    int32_t xsize,     // image size
+    int32_t ysize,     // image size
+    BLOCK_LIST *blocks // output list
+) {
+  FILE *pdfp;   // file pointer
+  BLOCK *block; // current block
+  int x;        // current top-down coords
+  int y;
+  int width; // of current block
+  int height;
+  BLOCK_IT block_it = blocks; // block iterator
+
+  name += UNLV_EXT; // add extension
+  if ((pdfp = fopen(name.c_str(), "rb")) == nullptr) {
+    return false; // didn't read one
+  } else {
+    while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
+      // make rect block
+      block = new BLOCK(name.c_str(), true, 0, 0, static_cast<int16_t>(x),
+                        static_cast<int16_t>(ysize - y - height), static_cast<int16_t>(x + width),
+                        static_cast<int16_t>(ysize - y));
+      // on end of list
+      block_it.add_to_end(block);
+    }
+    fclose(pdfp);
+  }
+  tprintf("UZN file %s loaded.\n", name.c_str());
+  return true;
+}
+
+void FullPageBlock(int width, int height, BLOCK_LIST *blocks) {
+  BLOCK_IT block_it(blocks);
+  auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
+  block_it.add_to_end(block);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blread.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/blread.h
@ -0,0 +1,40 @@
+/**********************************************************************
+ * File:        blread.h  (Formerly pdread.h)
+ * Description: Friend function of BLOCK to read the uscan pd file.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef BLREAD_H
+#define BLREAD_H
+
+#include <cstdint> // for int32_t
+#include <string>  // for std::string
+
+namespace tesseract {
+
+class BLOCK_LIST;
+
+bool read_unlv_file(   // print list of sides
+    std::string &name, // basename of file
+    int32_t xsize,     // image size
+    int32_t ysize,     // image size
+    BLOCK_LIST *blocks // output list
+);
+
+void FullPageBlock(int width, int height, BLOCK_LIST *blocks);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxread.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxread.cpp
@ -0,0 +1,282 @@
+/**********************************************************************
+ * File:        boxread.cpp
+ * Description: Read data from a box file.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 2007, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "boxread.h"
+
+#include "errcode.h" // for ERRCODE, TESSEXIT
+#include "fileerr.h" // for CANTOPENFILE
+#include "rect.h"    // for TBOX
+#include "tprintf.h" // for tprintf
+
+#include <tesseract/unichar.h> // for UNICHAR
+#include "helpers.h"           // for chomp_string
+
+#include <climits> // for INT_MAX
+#include <cstring> // for strchr, strcmp
+#include <fstream> // for std::ifstream
+#include <locale>  // for std::locale::classic
+#include <sstream> // for std::stringstream
+#include <string>  // for std::string
+
+namespace tesseract {
+
+// Special char code used to identify multi-blob labels.
+static const char *kMultiBlobLabelCode = "WordStr";
+
+// Returns the box file name corresponding to the given image_filename.
+static std::string BoxFileName(const char *image_filename) {
+  std::string box_filename = image_filename;
+  size_t length = box_filename.length();
+  std::string last = (length > 8) ? box_filename.substr(length - 8) : "";
+  if (last == ".bin.png" || last == ".nrm.png") {
+    box_filename.resize(length - 8);
+  } else {
+    size_t lastdot = box_filename.find_last_of('.');
+    if (lastdot < length) {
+      box_filename.resize(lastdot);
+    }
+  }
+  box_filename += ".box";
+  return box_filename;
+}
+
+// Open the boxfile based on the given image filename.
+FILE *OpenBoxFile(const char *fname) {
+  std::string filename = BoxFileName(fname);
+  FILE *box_file = nullptr;
+  if (!(box_file = fopen(filename.c_str(), "rb"))) {
+    CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.c_str());
+  }
+  return box_file;
+}
+
+// Reads all boxes from the given filename.
+// Reads a specific target_page number if >= 0, or all pages otherwise.
+// Skips blanks if skip_blanks is true.
+// The UTF-8 label of the box is put in texts, and the full box definition as
+// a string is put in box_texts, with the corresponding page number in pages.
+// Each of the output vectors is optional (may be nullptr).
+// Returns false if no boxes are found.
+bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector<TBOX> *boxes,
+                  std::vector<std::string> *texts, std::vector<std::string> *box_texts,
+                  std::vector<int> *pages) {
+  std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
+  std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
+  if (box_data.empty()) {
+    return false;
+  }
+  // Convert the array of bytes to a string, so it can be used by the parser.
+  box_data.push_back('\0');
+  return ReadMemBoxes(target_page, skip_blanks, &box_data[0],
+                      /*continue_on_failure*/ true, boxes, texts, box_texts, pages);
+}
+
+// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
+bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure,
+                  std::vector<TBOX> *boxes, std::vector<std::string> *texts,
+                  std::vector<std::string> *box_texts, std::vector<int> *pages) {
+  std::string box_str(box_data);
+  std::vector<std::string> lines = split(box_str, '\n');
+  if (lines.empty()) {
+    return false;
+  }
+  int num_boxes = 0;
+  for (auto &line : lines) {
+    int page = 0;
+    std::string utf8_str;
+    TBOX box;
+    if (!ParseBoxFileStr(line.c_str(), &page, utf8_str, &box)) {
+      if (continue_on_failure) {
+        continue;
+      } else {
+        return false;
+      }
+    }
+    if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) {
+      continue;
+    }
+    if (target_page >= 0 && page != target_page) {
+      continue;
+    }
+    if (boxes != nullptr) {
+      boxes->push_back(box);
+    }
+    if (texts != nullptr) {
+      texts->push_back(utf8_str);
+    }
+    if (box_texts != nullptr) {
+      std::string full_text;
+      MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
+      box_texts->push_back(full_text);
+    }
+    if (pages != nullptr) {
+      pages->push_back(page);
+    }
+    ++num_boxes;
+  }
+  return num_boxes > 0;
+}
+
+// TODO(rays) convert all uses of ReadNextBox to use the new ReadAllBoxes.
+// Box files are used ONLY DURING TRAINING, but by both processes of
+// creating tr files with tesseract, and unicharset_extractor.
+// ReadNextBox factors out the code to interpret a line of a box
+// file so that applybox and unicharset_extractor interpret the same way.
+// This function returns the next valid box file utf8 string and coords
+// and returns true, or false on eof (and closes the file).
+// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
+// for valid utf-8 and allows space or tab between fields.
+// utf8_str is set with the unichar string, and bounding box with the box.
+// If there are page numbers in the file, it reads them all.
+bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box) {
+  return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
+}
+
+// As ReadNextBox above, but get a specific page number. (0-based)
+// Use -1 to read any page number. Files without page number all
+// read as if they are page 0.
+bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
+                 TBOX *bounding_box) {
+  int page = 0;
+  char buff[kBoxReadBufSize]; // boxfile read buffer
+  char *buffptr = buff;
+
+  while (fgets(buff, sizeof(buff) - 1, box_file)) {
+    (*line_number)++;
+
+    buffptr = buff;
+    const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
+    if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
+      buffptr += 3; // Skip unicode file designation.
+    }
+    // Check for blank lines in box file
+    if (*buffptr == '\n' || *buffptr == '\0') {
+      continue;
+    }
+    // Skip blank boxes.
+    if (*buffptr == ' ' || *buffptr == '\t') {
+      continue;
+    }
+    if (*buffptr != '\0') {
+      if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
+        tprintf("Box file format error on line %i; ignored\n", *line_number);
+        continue;
+      }
+      if (target_page >= 0 && target_page != page) {
+        continue; // Not on the appropriate page.
+      }
+      return true; // Successfully read a box.
+    }
+  }
+  fclose(box_file);
+  return false; // EOF
+}
+
+// Parses the given box file string into a page_number, utf8_str, and
+// bounding_box. Returns true on a successful parse.
+// The box file is assumed to contain box definitions, one per line, of the
+// following format for blob-level boxes:
+//   <UTF8 str> <left> <bottom> <right> <top> <page id>
+// and for word/line-level boxes:
+//   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
+// See applyybox.cpp for more information.
+bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
+                     TBOX *bounding_box) {
+  *bounding_box = TBOX(); // Initialize it to empty.
+  utf8_str = "";
+  char uch[kBoxReadBufSize];
+  const char *buffptr = boxfile_str;
+  // Read the unichar without messing up on Tibetan.
+  // According to issue 253 the utf-8 surrogates 85 and A0 are treated
+  // as whitespace by sscanf, so it is more reliable to just find
+  // ascii space and tab.
+  int uch_len = 0;
+  // Skip unicode file designation, if present.
+  const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
+  if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
+    buffptr += 3;
+  }
+  // Allow a single blank as the UTF-8 string. Check for empty string and
+  // then blindly eat the first character.
+  if (*buffptr == '\0') {
+    return false;
+  }
+  do {
+    uch[uch_len++] = *buffptr++;
+  } while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
+           uch_len < kBoxReadBufSize - 1);
+  uch[uch_len] = '\0';
+  if (*buffptr != '\0') {
+    ++buffptr;
+  }
+  int x_min = INT_MAX;
+  int y_min = INT_MAX;
+  int x_max = INT_MIN;
+  int y_max = INT_MIN;
+  *page_number = 0;
+  std::stringstream stream(buffptr);
+  stream.imbue(std::locale::classic());
+  stream >> x_min;
+  stream >> y_min;
+  stream >> x_max;
+  stream >> y_max;
+  stream >> *page_number;
+  if (x_max < x_min || y_max < y_min) {
+    tprintf("Bad box coordinates in boxfile string! %s\n", ubuf);
+    return false;
+  }
+  // Test for long space-delimited string label.
+  if (strcmp(uch, kMultiBlobLabelCode) == 0 && (buffptr = strchr(buffptr, '#')) != nullptr) {
+    strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
+    uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
+    chomp_string(uch);
+    uch_len = strlen(uch);
+  }
+  // Validate UTF8 by making unichars with it.
+  int used = 0;
+  while (used < uch_len) {
+    tesseract::UNICHAR ch(uch + used, uch_len - used);
+    int new_used = ch.utf8_len();
+    if (new_used == 0) {
+      tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", uch + used, uch[used], used + 1);
+      return false;
+    }
+    used += new_used;
+  }
+  utf8_str = uch;
+  if (x_min > x_max) {
+    std::swap(x_min, x_max);
+  }
+  if (y_min > y_max) {
+    std::swap(y_min, y_max);
+  }
+  bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
+  return true; // Successfully read a box.
+}
+
+// Creates a box file string from a unichar string, TBOX and page number.
+void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str) {
+  box_str = unichar_str;
+  box_str += " " + std::to_string(box.left());
+  box_str += " " + std::to_string(box.bottom());
+  box_str += " " + std::to_string(box.right());
+  box_str += " " + std::to_string(box.top());
+  box_str += " " + std::to_string(page_num);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxread.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxread.h
@ -0,0 +1,89 @@
+/**********************************************************************
+ * File:        boxread.h
+ * Description: Read data from a box file.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 2007, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCUTIL_BOXREAD_H_
+#define TESSERACT_CCUTIL_BOXREAD_H_
+
+#include <cstdio> // for FILE
+#include <string> // for std::string
+#include <vector> // for std::vector
+
+#include <tesseract/export.h> // for TESS_API
+
+namespace tesseract {
+
+class TBOX;
+
+// Size of buffer used to read a line from a box file.
+const int kBoxReadBufSize = 1024;
+
+// Open the boxfile based on the given image filename.
+// Returns nullptr if the box file cannot be opened.
+TESS_API
+FILE *OpenBoxFile(const char *filename);
+
+// Reads all boxes from the given filename.
+// Reads a specific target_page number if >= 0, or all pages otherwise.
+// Skips blanks if skip_blanks is true.
+// The UTF-8 label of the box is put in texts, and the full box definition as
+// a string is put in box_texts, with the corresponding page number in pages.
+// Each of the output vectors is optional (may be nullptr).
+// Returns false if no boxes are found.
+bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector<TBOX> *boxes,
+                  std::vector<std::string> *texts, std::vector<std::string> *box_texts,
+                  std::vector<int> *pages);
+
+// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
+// continue_on_failure allows reading to continue even if an invalid box is
+// encountered and will return true if it succeeds in reading some boxes.
+// It otherwise gives up and returns false on encountering an invalid box.
+TESS_API
+bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure,
+                  std::vector<TBOX> *boxes, std::vector<std::string> *texts,
+                  std::vector<std::string> *box_texts, std::vector<int> *pages);
+
+// ReadNextBox factors out the code to interpret a line of a box
+// file so that applybox and unicharset_extractor interpret the same way.
+// This function returns the next valid box file utf8 string and coords
+// and returns true, or false on eof (and closes the file).
+// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
+// for valid utf-8 and allows space or tab between fields.
+// utf8_str is set with the unichar string, and bounding box with the box.
+// If there are page numbers in the file, it reads them all.
+TESS_API
+bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box);
+// As ReadNextBox above, but get a specific page number. (0-based)
+// Use -1 to read any page number. Files without page number all
+// read as if they are page 0.
+TESS_API
+bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
+                 TBOX *bounding_box);
+
+// Parses the given box file string into a page_number, utf8_str, and
+// bounding_box. Returns true on a successful parse.
+TESS_API
+bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
+                     TBOX *bounding_box);
+
+// Creates a box file string from a unichar string, TBOX and page number.
+TESS_API
+void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str);
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCUTIL_BOXREAD_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxword.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxword.cpp
@ -0,0 +1,205 @@
+///////////////////////////////////////////////////////////////////////
+// File:        boxword.cpp
+// Description: Class to represent the bounding boxes of the output.
+// Author:      Ray Smith
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "boxword.h"
+#include "blobs.h"
+#include "host.h" // for NearlyEqual
+#include "normalis.h"
+#include "ocrblock.h"
+#include "pageres.h"
+
+namespace tesseract {
+
+// Clip output boxes to input blob boxes for bounds that are within this
+// tolerance. Otherwise, the blob may be chopped and we have to just use
+// the word bounding box.
+const int kBoxClipTolerance = 2;
+
+BoxWord::BoxWord() : length_(0) {}
+
+BoxWord::BoxWord(const BoxWord &src) {
+  CopyFrom(src);
+}
+
+BoxWord &BoxWord::operator=(const BoxWord &src) {
+  CopyFrom(src);
+  return *this;
+}
+
+void BoxWord::CopyFrom(const BoxWord &src) {
+  bbox_ = src.bbox_;
+  length_ = src.length_;
+  boxes_.clear();
+  boxes_.reserve(length_);
+  for (int i = 0; i < length_; ++i) {
+    boxes_.push_back(src.boxes_[i]);
+  }
+}
+
+// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
+// switch back to original image coordinates.
+BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) {
+  auto *boxword = new BoxWord();
+  // Count the blobs.
+  boxword->length_ = tessword->NumBlobs();
+  // Allocate memory.
+  boxword->boxes_.reserve(boxword->length_);
+
+  for (int b = 0; b < boxword->length_; ++b) {
+    TBLOB *tblob = tessword->blobs[b];
+    TBOX blob_box;
+    for (TESSLINE *outline = tblob->outlines; outline != nullptr; outline = outline->next) {
+      EDGEPT *edgept = outline->loop;
+      // Iterate over the edges.
+      do {
+        if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
+          ICOORD pos(edgept->pos.x, edgept->pos.y);
+          TPOINT denormed;
+          tblob->denorm().DenormTransform(nullptr, edgept->pos, &denormed);
+          pos.set_x(denormed.x);
+          pos.set_y(denormed.y);
+          TBOX pt_box(pos, pos);
+          blob_box += pt_box;
+        }
+        edgept = edgept->next;
+      } while (edgept != outline->loop);
+    }
+    boxword->boxes_.push_back(blob_box);
+  }
+  boxword->ComputeBoundingBox();
+  return boxword;
+}
+
+// Clean up the bounding boxes from the polygonal approximation by
+// expanding slightly, then clipping to the blobs from the original_word
+// that overlap. If not null, the block provides the inverse rotation.
+void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
+  for (int i = 0; i < length_; ++i) {
+    TBOX box = boxes_[i];
+    // Expand by a single pixel, as the poly approximation error is 1 pixel.
+    box = TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
+    // Now find the original box that matches.
+    TBOX original_box;
+    C_BLOB_IT b_it(original_word->cblob_list());
+    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+      TBOX blob_box = b_it.data()->bounding_box();
+      if (block != nullptr) {
+        blob_box.rotate(block->re_rotation());
+      }
+      if (blob_box.major_overlap(box)) {
+        original_box += blob_box;
+      }
+    }
+    if (!original_box.null_box()) {
+      if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance)) {
+        box.set_left(original_box.left());
+      }
+      if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance)) {
+        box.set_right(original_box.right());
+      }
+      if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
+        box.set_top(original_box.top());
+      }
+      if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance)) {
+        box.set_bottom(original_box.bottom());
+      }
+    }
+    original_box = original_word->bounding_box();
+    if (block != nullptr) {
+      original_box.rotate(block->re_rotation());
+    }
+    boxes_[i] = box.intersection(original_box);
+  }
+  ComputeBoundingBox();
+}
+
+// Merges the boxes from start to end, not including end, and deletes
+// the boxes between start and end.
+void BoxWord::MergeBoxes(int start, int end) {
+  start = ClipToRange(start, 0, length_);
+  end = ClipToRange(end, 0, length_);
+  if (end <= start + 1) {
+    return;
+  }
+  for (int i = start + 1; i < end; ++i) {
+    boxes_[start] += boxes_[i];
+  }
+  int shrinkage = end - 1 - start;
+  length_ -= shrinkage;
+  for (int i = start + 1; i < length_; ++i) {
+    boxes_[i] = boxes_[i + shrinkage];
+  }
+  boxes_.resize(length_);
+}
+
+// Inserts a new box before the given index.
+// Recomputes the bounding box.
+void BoxWord::InsertBox(int index, const TBOX &box) {
+  if (index < length_) {
+    boxes_.insert(boxes_.begin() + index, box);
+  } else {
+    boxes_.push_back(box);
+  }
+  length_ = boxes_.size();
+  ComputeBoundingBox();
+}
+
+// Changes the box at the given index to the new box.
+// Recomputes the bounding box.
+void BoxWord::ChangeBox(int index, const TBOX &box) {
+  boxes_[index] = box;
+  ComputeBoundingBox();
+}
+
+// Deletes the box with the given index, and shuffles up the rest.
+// Recomputes the bounding box.
+void BoxWord::DeleteBox(int index) {
+  ASSERT_HOST(0 <= index && index < length_);
+  boxes_.erase(boxes_.begin() + index);
+  --length_;
+  ComputeBoundingBox();
+}
+
+// Deletes all the boxes stored in BoxWord.
+void BoxWord::DeleteAllBoxes() {
+  length_ = 0;
+  boxes_.clear();
+  bbox_ = TBOX();
+}
+
+// Computes the bounding box of the word.
+void BoxWord::ComputeBoundingBox() {
+  bbox_ = TBOX();
+  for (int i = 0; i < length_; ++i) {
+    bbox_ += boxes_[i];
+  }
+}
+
+// This and other putatively are the same, so call the (permanent) callback
+// for each blob index where the bounding boxes match.
+// The callback is deleted on completion.
+void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const {
+  for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
+    TBOX blob_box = other.blobs[i]->bounding_box();
+    if (blob_box == boxes_[i]) {
+      cb(i);
+    }
+  }
+}
+
+} // namespace tesseract.
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxword.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/boxword.h
@ -0,0 +1,97 @@
+///////////////////////////////////////////////////////////////////////
+// File:        boxword.h
+// Description: Class to represent the bounding boxes of the output.
+// Author:      Ray Smith
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
+#define TESSERACT_CSTRUCT_BOXWORD_H_
+
+#include "rect.h" // for TBOX
+
+#include <functional> // for std::function
+
+namespace tesseract {
+
+class BLOCK;
+class WERD;
+struct TWERD;
+
+// Class to hold an array of bounding boxes for an output word and
+// the bounding box of the whole word.
+class BoxWord {
+public:
+  BoxWord();
+  explicit BoxWord(const BoxWord &src);
+  ~BoxWord() = default;
+
+  BoxWord &operator=(const BoxWord &src);
+
+  void CopyFrom(const BoxWord &src);
+
+  // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
+  // switch back to original image coordinates.
+  static BoxWord *CopyFromNormalized(TWERD *tessword);
+
+  // Clean up the bounding boxes from the polygonal approximation by
+  // expanding slightly, then clipping to the blobs from the original_word
+  // that overlap. If not null, the block provides the inverse rotation.
+  void ClipToOriginalWord(const BLOCK *block, WERD *original_word);
+
+  // Merges the boxes from start to end, not including end, and deletes
+  // the boxes between start and end.
+  void MergeBoxes(int start, int end);
+
+  // Inserts a new box before the given index.
+  // Recomputes the bounding box.
+  void InsertBox(int index, const TBOX &box);
+
+  // Changes the box at the given index to the new box.
+  // Recomputes the bounding box.
+  void ChangeBox(int index, const TBOX &box);
+
+  // Deletes the box with the given index, and shuffles up the rest.
+  // Recomputes the bounding box.
+  void DeleteBox(int index);
+
+  // Deletes all the boxes stored in BoxWord.
+  void DeleteAllBoxes();
+
+  // This and other putatively are the same, so call the (permanent) callback
+  // for each blob index where the bounding boxes match.
+  // The callback is deleted on completion.
+  void ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const;
+
+  const TBOX &bounding_box() const {
+    return bbox_;
+  }
+  int length() const {
+    return length_;
+  }
+  const TBOX &BlobBox(int index) const {
+    return boxes_[index];
+  }
+
+private:
+  void ComputeBoundingBox();
+
+  TBOX bbox_;
+  int length_;
+  std::vector<TBOX> boxes_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_CSTRUCT_BOXWORD_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ccstruct.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ccstruct.cpp
@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ccstruct.cpp
+// Description: ccstruct class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "ccstruct.h"
+
+namespace tesseract {
+
+// APPROXIMATIONS of the fractions of the character cell taken by
+// the descenders, ascenders, and x-height.
+const double CCStruct::kDescenderFraction = 0.25;
+const double CCStruct::kXHeightFraction = 0.5;
+const double CCStruct::kAscenderFraction = 0.25;
+const double CCStruct::kXHeightCapRatio =
+    CCStruct::kXHeightFraction / (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+CCStruct::~CCStruct() = default;
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ccstruct.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ccstruct.h
@ -0,0 +1,41 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ccstruct.h
+// Description: ccstruct class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
+#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
+
+#include "ccutil.h" // for CCUtil
+
+namespace tesseract {
+class TESS_API CCStruct : public CCUtil {
+public:
+  CCStruct() = default;
+  ~CCStruct() override;
+
+  // Globally accessible constants.
+  // APPROXIMATIONS of the fractions of the character cell taken by
+  // the descenders, ascenders, and x-height.
+  static const double kDescenderFraction; // = 0.25;
+  static const double kXHeightFraction;   // = 0.5;
+  static const double kAscenderFraction;  // = 0.25;
+  // Derived value giving the x-height as a fraction of cap-height.
+  static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
+};
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/coutln.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/coutln.cpp
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/coutln.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/coutln.h
@ -0,0 +1,297 @@
+/**********************************************************************
+ * File:        coutln.h
+ * Description: Code for the C_OUTLINE class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef COUTLN_H
+#define COUTLN_H
+
+#include "elst.h"       // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
+#include "mod128.h"     // for DIR128, DIRBITS
+#include "points.h"     // for ICOORD, FCOORD
+#include "rect.h"       // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+
+#include <tesseract/export.h> // for DLLSYM
+
+#include <cstdint> // for int16_t, int32_t
+#include <bitset>  // for std::bitset<16>
+
+struct Pix;
+
+namespace tesseract {
+
+class CRACKEDGE;
+class DENORM;
+
+#define INTERSECTING INT16_MAX // no winding number
+
+// mask to get step
+#define STEP_MASK 3
+
+enum C_OUTLINE_FLAGS {
+  COUT_INVERSE // White on black blob
+};
+
+// Simple struct to hold the 3 values needed to compute a more precise edge
+// position and direction. The offset_numerator is the difference between the
+// grey threshold and the mean pixel value. pixel_diff is the difference between
+// the pixels in the edge. Consider the following row of pixels: p1 p2 p3 p4 p5
+// Say the image was thresholded  at threshold t, making p1, p2, p3 black
+// and p4, p5 white (p1, p2, p3 < t, and p4, p5 >= t), but suppose that
+// max(p[i+1] - p[i]) is p3 - p2. Then the extrapolated position of the edge,
+// based on the maximum gradient, is at the crack between p2 and p3 plus the
+// offset (t - (p2+p3)/2)/(p3 - p2). We store the pixel difference p3-p2
+// denominator in pixel_diff and the offset numerator, relative to the original
+// binary edge (t - (p2+p3)/2) - (p3 -p2) in offset_numerator.
+// The sign of offset_numerator and pixel_diff are manipulated to ensure
+// that the pixel_diff, which will be used as a weight, is always positive.
+// The direction stores the quantized feature direction for the given step
+// computed from the edge gradient. (Using binary_angle_plus_pi.)
+// If the pixel_diff is zero, it means that the direction of the gradient
+// is in conflict with the step direction, so this step is to be ignored.
+struct EdgeOffset {
+  int8_t offset_numerator;
+  uint8_t pixel_diff;
+  uint8_t direction;
+};
+
+class C_OUTLINE; // forward declaration
+
+ELISTIZEH(C_OUTLINE)
+class C_OUTLINE : public ELIST_LINK {
+public:
+  C_OUTLINE() {
+    stepcount = 0;
+    offsets = nullptr;
+  }
+  C_OUTLINE(              // constructor
+      CRACKEDGE *startpt, // from edge detector
+      ICOORD bot_left,    // bounding box //length of loop
+      ICOORD top_right, int16_t length);
+  C_OUTLINE(ICOORD startpt,                       // start of loop
+            DIR128 *new_steps,                    // steps in loop
+            int16_t length);                      // length of loop
+                                                  // outline to copy
+  C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); // and rotate
+
+  // Build a fake outline, given just a bounding box and append to the list.
+  static void FakeOutline(const TBOX &box, C_OUTLINE_LIST *outlines);
+
+  ~C_OUTLINE() { // destructor
+    delete[] offsets;
+  }
+
+  bool flag(                        // test flag
+      C_OUTLINE_FLAGS mask) const { // flag to test
+    return flags[mask];
+  }
+  void set_flag(            // set flag value
+      C_OUTLINE_FLAGS mask, // flag to test
+      bool value) {         // value to set
+    flags.set(mask, value);
+  }
+
+  C_OUTLINE_LIST *child() { // get child list
+    return &children;
+  }
+
+  // access function
+  const TBOX &bounding_box() const {
+    return box;
+  }
+  void set_step(         // set a step
+      int16_t stepindex, // index of step
+      int8_t stepdir) {  // chain code
+    int shift = stepindex % 4 * 2;
+    uint8_t mask = 3 << shift;
+    steps[stepindex / 4] = ((stepdir << shift) & mask) | (steps[stepindex / 4] & ~mask);
+    // squeeze 4 into byte
+  }
+  void set_step(         // set a step
+      int16_t stepindex, // index of step
+      DIR128 stepdir) {  // direction
+    // clean it
+    int8_t chaindir = stepdir.get_dir() >> (DIRBITS - 2);
+    // difference
+    set_step(stepindex, chaindir);
+    // squeeze 4 into byte
+  }
+
+  int32_t pathlength() const { // get path length
+    return stepcount;
+  }
+  // Return step at a given index as a DIR128.
+  DIR128 step_dir(int index) const {
+    return DIR128(
+        static_cast<int16_t>(((steps[index / 4] >> (index % 4 * 2)) & STEP_MASK) << (DIRBITS - 2)));
+  }
+  // Return the step vector for the given outline position.
+  ICOORD step(int index) const { // index of step
+    return step_coords[chain_code(index)];
+  }
+  // get start position
+  const ICOORD &start_pos() const {
+    return start;
+  }
+  // Returns the position at the given index on the outline.
+  // NOT to be used lightly, as it has to iterate the outline to find out.
+  ICOORD position_at_index(int index) const {
+    ICOORD pos = start;
+    for (int i = 0; i < index; ++i) {
+      pos += step(i);
+    }
+    return pos;
+  }
+  // Returns the sub-pixel accurate position given the integer position pos
+  // at the given index on the outline. pos may be a return value of
+  // position_at_index, or computed by repeatedly adding step to the
+  // start_pos() in the usual way.
+  FCOORD sub_pixel_pos_at_index(const ICOORD &pos, int index) const {
+    const ICOORD &step_to_next(step(index));
+    FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f, pos.y() + step_to_next.y() / 2.0f);
+    if (offsets != nullptr && offsets[index].pixel_diff > 0) {
+      float offset = offsets[index].offset_numerator;
+      offset /= offsets[index].pixel_diff;
+      if (step_to_next.x() != 0) {
+        f_pos.set_y(f_pos.y() + offset);
+      } else {
+        f_pos.set_x(f_pos.x() + offset);
+      }
+    }
+    return f_pos;
+  }
+  // Returns the step direction for the given index or -1 if there is none.
+  int direction_at_index(int index) const {
+    if (offsets != nullptr && offsets[index].pixel_diff > 0) {
+      return offsets[index].direction;
+    }
+    return -1;
+  }
+  // Returns the edge strength for the given index.
+  // If there are no recorded edge strengths, returns 1 (assuming the image
+  // is binary). Returns 0 if the gradient direction conflicts with the
+  // step direction, indicating that this position could be skipped.
+  int edge_strength_at_index(int index) const {
+    if (offsets != nullptr) {
+      return offsets[index].pixel_diff;
+    }
+    return 1;
+  }
+  // Return the step as a chain code (0-3) related to the standard feature
+  // direction of binary_angle_plus_pi by:
+  // chain_code * 64 = feature direction.
+  int chain_code(int index) const { // index of step
+    return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK;
+  }
+
+  int32_t area() const;       // Returns area of self and 1st level children.
+  int32_t perimeter() const;  // Total perimeter of self and 1st level children.
+  int32_t outer_area() const; // Returns area of self only.
+  int32_t count_transitions(  // count maxima
+      int32_t threshold);     // size threshold
+
+  bool operator<( // containment test
+      const C_OUTLINE &other) const;
+  bool operator>( // containment test
+      C_OUTLINE &other) const {
+    return other < *this; // use the < to do it
+  }
+  int16_t winding_number(   // get winding number
+      ICOORD testpt) const; // around this point
+                            // get direction
+  int16_t turn_direction() const;
+  void reverse(); // reverse direction
+
+  void move(             // reposition outline
+      const ICOORD vec); // by vector
+
+  // Returns true if *this and its children are legally nested.
+  // The outer area of a child should have the opposite sign to the
+  // parent. If not, it means we have discarded an outline in between
+  // (probably due to excessive length).
+  bool IsLegallyNested() const;
+
+  // If this outline is smaller than the given min_size, delete this and
+  // remove from its list, via *it, after checking that *it points to this.
+  // Otherwise, if any children of this are too small, delete them.
+  // On entry, *it must be an iterator pointing to this. If this gets deleted
+  // then this is extracted from *it, so an iteration can continue.
+  void RemoveSmallRecursive(int min_size, C_OUTLINE_IT *it);
+
+  // Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
+  // pix is 8-bit. Does nothing otherwise.
+  void ComputeEdgeOffsets(int threshold, Image pix);
+  // Adds sub-pixel resolution EdgeOffsets for the outline using only
+  // a binary image source.
+  void ComputeBinaryOffsets();
+
+  // Renders the outline to the given pix, with left and top being
+  // the coords of the upper-left corner of the pix.
+  void render(int left, int top, Image pix) const;
+
+  // Renders just the outline to the given pix (no fill), with left and top
+  // being the coords of the upper-left corner of the pix.
+  void render_outline(int left, int top, Image pix) const;
+
+#ifndef GRAPHICS_DISABLED
+  void plot(                           // draw one
+      ScrollView *window,              // window to draw in
+      ScrollView::Color colour) const; // colour to draw it
+  // Draws the outline in the given colour, normalized using the given denorm,
+  // making use of sub-pixel accurate information if available.
+  void plot_normed(const DENORM &denorm, ScrollView::Color colour, ScrollView *window) const;
+#endif // !GRAPHICS_DISABLED
+
+  C_OUTLINE &operator=(const C_OUTLINE &source);
+
+  static C_OUTLINE *deep_copy(const C_OUTLINE *src) {
+    auto *outline = new C_OUTLINE;
+    *outline = *src;
+    return outline;
+  }
+
+  static ICOORD chain_step(int chaindir);
+
+  // The maximum length of any outline. The stepcount is stored as 16 bits,
+  // but it is probably not a good idea to increase this constant by much
+  // and switch to 32 bits, as it plays an important role in keeping huge
+  // outlines invisible, which prevents bad speed behavior.
+  static const int kMaxOutlineLength = 16000;
+
+private:
+  // Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
+  // by the step, increment, and vertical step ? x : y position * increment
+  // at step s Mod stepcount respectively. Used to add or subtract the
+  // direction and position to/from accumulators of a small neighbourhood.
+  void increment_step(int s, int increment, ICOORD *pos, int *dir_counts, int *pos_totals) const;
+  int step_mem() const {
+    return (stepcount + 3) / 4;
+  }
+
+  TBOX box;                // bounding box
+  ICOORD start;            // start coord
+  int16_t stepcount;       // no of steps
+  std::bitset<16> flags;   // flags about outline
+  std::vector<uint8_t> steps; // step array
+  EdgeOffset *offsets;     // Higher precision edge.
+  C_OUTLINE_LIST children; // child elements
+  static ICOORD step_coords[4];
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/crakedge.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/crakedge.h
@ -0,0 +1,42 @@
+/**********************************************************************
+ * File:        crakedge.h      (Formerly: crkedge.h)
+ * Description: Structures for the Crack following edge detector.
+ * Author:      Ray Smith
+ * Created:     Fri Mar 22 16:06:38 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef CRAKEDGE_H
+#define CRAKEDGE_H
+
+#include "mod128.h"
+#include "points.h"
+
+namespace tesseract {
+
+class CRACKEDGE {
+public:
+  CRACKEDGE() = default;
+
+  ICOORD pos;   /*position of crack */
+  int8_t stepx; // edge step
+  int8_t stepy;
+  int8_t stepdir;  // chaincode
+  CRACKEDGE *prev; /*previous point */
+  CRACKEDGE *next; /*next point */
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/debugpixa.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/debugpixa.h
@ -0,0 +1,58 @@
+#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+
+#include "image.h"
+
+#include <allheaders.h>
+
+namespace tesseract {
+
+// Class to hold a Pixa collection of debug images with captions and save them
+// to a PDF file.
+class DebugPixa {
+public:
+  // TODO(rays) add another constructor with size control.
+  DebugPixa() {
+    pixa_ = pixaCreate(0);
+#ifdef TESSERACT_DISABLE_DEBUG_FONTS
+    fonts_ = NULL;
+#else
+    fonts_ = bmfCreate(nullptr, 14);
+#endif
+  }
+  // If the filename_ has been set and there are any debug images, they are
+  // written to the set filename_.
+  ~DebugPixa() {
+    pixaDestroy(&pixa_);
+    bmfDestroy(&fonts_);
+  }
+
+  // Adds the given pix to the set of pages in the PDF file, with the given
+  // caption added to the top.
+  void AddPix(const Image pix, const char *caption) {
+    int depth = pixGetDepth(pix);
+    int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
+    Image pix_debug =
+        pixAddSingleTextblock(pix, fonts_, caption, color, L_ADD_BELOW, nullptr);
+    pixaAddPix(pixa_, pix_debug, L_INSERT);
+  }
+
+  // Sets the destination filename and enables images to be written to a PDF
+  // on destruction.
+  void WritePDF(const char *filename) {
+    if (pixaGetCount(pixa_) > 0) {
+      pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
+      pixaClear(pixa_);
+    }
+  }
+
+private:
+  // The collection of images to put in the PDF.
+  Pixa *pixa_;
+  // The fonts used to draw text captions.
+  L_Bmf *fonts_;
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/detlinefit.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/detlinefit.cpp
@ -0,0 +1,302 @@
+///////////////////////////////////////////////////////////////////////
+// File:        detlinefit.cpp
+// Description: Deterministic least median squares line fitting.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "detlinefit.h"
+#include "helpers.h"    // for IntCastRounded
+#include "statistc.h"
+#include "tprintf.h"
+
+#include <algorithm>
+#include <cfloat> // for FLT_MAX
+
+namespace tesseract {
+
+// The number of points to consider at each end.
+const int kNumEndPoints = 3;
+// The minimum number of points at which to switch to number of points
+// for badly fitted lines.
+// To ensure a sensible error metric, kMinPointsForErrorCount should be at
+// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
+// ComputeUpperQuartileError.
+const int kMinPointsForErrorCount = 16;
+// The maximum real distance to use before switching to number of
+// mis-fitted points, which will get square-rooted for true distance.
+const int kMaxRealDistance = 2.0;
+
+DetLineFit::DetLineFit() : square_length_(0.0) {}
+
+// Delete all Added points.
+void DetLineFit::Clear() {
+  pts_.clear();
+  distances_.clear();
+}
+
+// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
+void DetLineFit::Add(const ICOORD &pt) {
+  pts_.emplace_back(pt, 0);
+}
+// Associates a half-width with the given point if a point overlaps the
+// previous point by more than half the width, and its distance is further
+// than the previous point, then the more distant point is ignored in the
+// distance calculation. Useful for ignoring i dots and other diacritics.
+void DetLineFit::Add(const ICOORD &pt, int halfwidth) {
+  pts_.emplace_back(pt, halfwidth);
+}
+
+// Fits a line to the points, ignoring the skip_first initial points and the
+// skip_last final points, returning the fitted line as a pair of points,
+// and the upper quartile error.
+double DetLineFit::Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2) {
+  // Do something sensible with no points.
+  if (pts_.empty()) {
+    pt1->set_x(0);
+    pt1->set_y(0);
+    *pt2 = *pt1;
+    return 0.0;
+  }
+  // Count the points and find the first and last kNumEndPoints.
+  int pt_count = pts_.size();
+  ICOORD *starts[kNumEndPoints];
+  if (skip_first >= pt_count) {
+    skip_first = pt_count - 1;
+  }
+  int start_count = 0;
+  int end_i = std::min(skip_first + kNumEndPoints, pt_count);
+  for (int i = skip_first; i < end_i; ++i) {
+    starts[start_count++] = &pts_[i].pt;
+  }
+  ICOORD *ends[kNumEndPoints];
+  if (skip_last >= pt_count) {
+    skip_last = pt_count - 1;
+  }
+  int end_count = 0;
+  end_i = std::max(0, pt_count - kNumEndPoints - skip_last);
+  for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
+    ends[end_count++] = &pts_[i].pt;
+  }
+  // 1 or 2 points need special treatment.
+  if (pt_count <= 2) {
+    *pt1 = *starts[0];
+    if (pt_count > 1) {
+      *pt2 = *ends[0];
+    } else {
+      *pt2 = *pt1;
+    }
+    return 0.0;
+  }
+  // Although with between 2 and 2*kNumEndPoints-1 points, there will be
+  // overlap in the starts, ends sets, this is OK and taken care of by the
+  // if (*start != *end) test below, which also tests for equal input points.
+  double best_uq = -1.0;
+  // Iterate each pair of points and find the best fitting line.
+  for (int i = 0; i < start_count; ++i) {
+    ICOORD *start = starts[i];
+    for (int j = 0; j < end_count; ++j) {
+      ICOORD *end = ends[j];
+      if (*start != *end) {
+        ComputeDistances(*start, *end);
+        // Compute the upper quartile error from the line.
+        double dist = EvaluateLineFit();
+        if (dist < best_uq || best_uq < 0.0) {
+          best_uq = dist;
+          *pt1 = *start;
+          *pt2 = *end;
+        }
+      }
+    }
+  }
+  // Finally compute the square root to return the true distance.
+  return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
+}
+
+// Constrained fit with a supplied direction vector. Finds the best line_pt,
+// that is one of the supplied points having the median cross product with
+// direction, ignoring points that have a cross product outside of the range
+// [min_dist, max_dist]. Returns the resulting error metric using the same
+// reduced set of points.
+// *Makes use of floating point arithmetic*
+double DetLineFit::ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist,
+                                  bool debug, ICOORD *line_pt) {
+  ComputeConstrainedDistances(direction, min_dist, max_dist);
+  // Do something sensible with no points or computed distances.
+  if (pts_.empty() || distances_.empty()) {
+    line_pt->set_x(0);
+    line_pt->set_y(0);
+    return 0.0;
+  }
+  auto median_index = distances_.size() / 2;
+  std::nth_element(distances_.begin(), distances_.begin() + median_index, distances_.end());
+  *line_pt = distances_[median_index].data();
+  if (debug) {
+    tprintf("Constrained fit to dir %g, %g = %d, %d :%zu distances:\n", direction.x(), direction.y(),
+            line_pt->x(), line_pt->y(), distances_.size());
+    for (int i = 0; i < distances_.size(); ++i) {
+      tprintf("%d: %d, %d -> %g\n", i, distances_[i].data().x(), distances_[i].data().y(),
+              distances_[i].key());
+    }
+    tprintf("Result = %zu\n", median_index);
+  }
+  // Center distances on the fitted point.
+  double dist_origin = direction * *line_pt;
+  for (auto &distance : distances_) {
+    distance.key() -= dist_origin;
+  }
+  return sqrt(EvaluateLineFit());
+}
+
+// Returns true if there were enough points at the last call to Fit or
+// ConstrainedFit for the fitted points to be used on a badly fitted line.
+bool DetLineFit::SufficientPointsForIndependentFit() const {
+  return distances_.size() >= kMinPointsForErrorCount;
+}
+
+// Backwards compatible fit returning a gradient and constant.
+// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
+// function in preference to the LMS class.
+double DetLineFit::Fit(float *m, float *c) {
+  ICOORD start, end;
+  double error = Fit(&start, &end);
+  if (end.x() != start.x()) {
+    *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
+    *c = start.y() - *m * start.x();
+  } else {
+    *m = 0.0f;
+    *c = 0.0f;
+  }
+  return error;
+}
+
+// Backwards compatible constrained fit with a supplied gradient.
+// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
+// to avoid potential difficulties with infinite gradients.
+double DetLineFit::ConstrainedFit(double m, float *c) {
+  // Do something sensible with no points.
+  if (pts_.empty()) {
+    *c = 0.0f;
+    return 0.0;
+  }
+  double cos = 1.0 / sqrt(1.0 + m * m);
+  FCOORD direction(cos, m * cos);
+  ICOORD line_pt;
+  double error = ConstrainedFit(direction, -FLT_MAX, FLT_MAX, false, &line_pt);
+  *c = line_pt.y() - line_pt.x() * m;
+  return error;
+}
+
+// Computes and returns the squared evaluation metric for a line fit.
+double DetLineFit::EvaluateLineFit() {
+  // Compute the upper quartile error from the line.
+  double dist = ComputeUpperQuartileError();
+  if (distances_.size() >= kMinPointsForErrorCount && dist > kMaxRealDistance * kMaxRealDistance) {
+    // Use the number of mis-fitted points as the error metric, as this
+    // gives a better measure of fit for badly fitted lines where more
+    // than a quarter are badly fitted.
+    double threshold = kMaxRealDistance * sqrt(square_length_);
+    dist = NumberOfMisfittedPoints(threshold);
+  }
+  return dist;
+}
+
+// Computes the absolute error distances of the points from the line,
+// and returns the squared upper-quartile error distance.
+double DetLineFit::ComputeUpperQuartileError() {
+  int num_errors = distances_.size();
+  if (num_errors == 0) {
+    return 0.0;
+  }
+  // Get the absolute values of the errors.
+  for (int i = 0; i < num_errors; ++i) {
+    if (distances_[i].key() < 0) {
+      distances_[i].key() = -distances_[i].key();
+    }
+  }
+  // Now get the upper quartile distance.
+  auto index = 3 * num_errors / 4;
+  std::nth_element(distances_.begin(), distances_.begin() + index, distances_.end());
+  double dist = distances_[index].key();
+  // The true distance is the square root of the dist squared / square_length.
+  // Don't bother with the square root. Just return the square distance.
+  return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
+}
+
+// Returns the number of sample points that have an error more than threshold.
+int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
+  int num_misfits = 0;
+  int num_dists = distances_.size();
+  // Get the absolute values of the errors.
+  for (int i = 0; i < num_dists; ++i) {
+    if (distances_[i].key() > threshold) {
+      ++num_misfits;
+    }
+  }
+  return num_misfits;
+}
+
+// Computes all the cross product distances of the points from the line,
+// storing the actual (signed) cross products in distances.
+// Ignores distances of points that are further away than the previous point,
+// and overlaps the previous point by at least half.
+void DetLineFit::ComputeDistances(const ICOORD &start, const ICOORD &end) {
+  distances_.clear();
+  ICOORD line_vector = end;
+  line_vector -= start;
+  square_length_ = line_vector.sqlength();
+  int line_length = IntCastRounded(sqrt(square_length_));
+  // Compute the distance of each point from the line.
+  int prev_abs_dist = 0;
+  int prev_dot = 0;
+  for (int i = 0; i < pts_.size(); ++i) {
+    ICOORD pt_vector = pts_[i].pt;
+    pt_vector -= start;
+    int dot = line_vector % pt_vector;
+    // Compute |line_vector||pt_vector|sin(angle between)
+    int dist = line_vector * pt_vector;
+    int abs_dist = dist < 0 ? -dist : dist;
+    if (abs_dist > prev_abs_dist && i > 0) {
+      // Ignore this point if it overlaps the previous one.
+      int separation = abs(dot - prev_dot);
+      if (separation < line_length * pts_[i].halfwidth ||
+          separation < line_length * pts_[i - 1].halfwidth) {
+        continue;
+      }
+    }
+    distances_.emplace_back(dist, pts_[i].pt);
+    prev_abs_dist = abs_dist;
+    prev_dot = dot;
+  }
+}
+
+// Computes all the cross product distances of the points perpendicular to
+// the given direction, ignoring distances outside of the give distance range,
+// storing the actual (signed) cross products in distances_.
+void DetLineFit::ComputeConstrainedDistances(const FCOORD &direction, double min_dist,
+                                             double max_dist) {
+  distances_.clear();
+  square_length_ = direction.sqlength();
+  // Compute the distance of each point from the line.
+  for (auto &pt : pts_) {
+    FCOORD pt_vector = pt.pt;
+    // Compute |line_vector||pt_vector|sin(angle between)
+    double dist = direction * pt_vector;
+    if (min_dist <= dist && dist <= max_dist) {
+      distances_.emplace_back(dist, pt.pt);
+    }
+  }
+}
+
+} // namespace tesseract.
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/detlinefit.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/detlinefit.h
@ -0,0 +1,157 @@
+///////////////////////////////////////////////////////////////////////
+// File:        detlinefit.h
+// Description: Deterministic least upper-quartile squares line fitting.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
+#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
+
+#include "kdpair.h"
+#include "points.h"
+
+namespace tesseract {
+
+// This class fits a line to a set of ICOORD points.
+// There is no restriction on the direction of the line, as it
+// uses a vector method, ie no concern over infinite gradients.
+// The fitted line has the least upper quartile of squares of perpendicular
+// distances of all source points from the line, subject to the constraint
+// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
+// i.e. the 9 combinations of one of the first 3 and last 3 points.
+// A fundamental assumption of this algorithm is that one of the first 3 and
+// one of the last 3 points are near the best line fit.
+// The points must be Added in line order for the algorithm to work properly.
+// No floating point calculations are needed* to make an accurate fit,
+// and no random numbers are needed** so the algorithm is deterministic,
+// architecture-stable, and compiler-stable as well as stable to minor
+// changes in the input.
+// *A single floating point division is used to compute each line's distance.
+// This is unlikely to result in choice of a different line, but if it does,
+// it would be easy to replace with a 64 bit integer calculation.
+// **Random numbers are used in the nth_item function, but the worst
+// non-determinism that can result is picking a different result among equals,
+// and that wouldn't make any difference to the end-result distance, so the
+// randomness does not affect the determinism of the algorithm. The random
+// numbers are only there to guarantee average linear time.
+// Fitting time is linear, but with a high constant, as it tries 9 different
+// lines and computes the distance of all points each time.
+// This class is aimed at replacing the LLSQ (linear least squares) and
+// LMS (least median of squares) classes that are currently used for most
+// of the line fitting in Tesseract.
+class DetLineFit {
+public:
+  DetLineFit();
+  ~DetLineFit() = default;
+
+  // Delete all Added points.
+  void Clear();
+
+  // Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
+  // Add must be called on points in sequence along the line.
+  void Add(const ICOORD &pt);
+  // Associates a half-width with the given point if a point overlaps the
+  // previous point by more than half the width, and its distance is further
+  // than the previous point, then the more distant point is ignored in the
+  // distance calculation. Useful for ignoring i dots and other diacritics.
+  void Add(const ICOORD &pt, int halfwidth);
+
+  // Fits a line to the points, returning the fitted line as a pair of
+  // points, and the upper quartile error.
+  double Fit(ICOORD *pt1, ICOORD *pt2) {
+    return Fit(0, 0, pt1, pt2);
+  }
+  // Fits a line to the points, ignoring the skip_first initial points and the
+  // skip_last final points, returning the fitted line as a pair of points,
+  // and the upper quartile error.
+  double Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2);
+
+  // Constrained fit with a supplied direction vector. Finds the best line_pt,
+  // that is one of the supplied points having the median cross product with
+  // direction, ignoring points that have a cross product outside of the range
+  // [min_dist, max_dist]. Returns the resulting error metric using the same
+  // reduced set of points.
+  // *Makes use of floating point arithmetic*
+  double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug,
+                        ICOORD *line_pt);
+
+  // Returns true if there were enough points at the last call to Fit or
+  // ConstrainedFit for the fitted points to be used on a badly fitted line.
+  bool SufficientPointsForIndependentFit() const;
+
+  // Backwards compatible fit returning a gradient and constant.
+  // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
+  // function in preference to the LMS class.
+  double Fit(float *m, float *c);
+
+  // Backwards compatible constrained fit with a supplied gradient.
+  // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
+  // to avoid potential difficulties with infinite gradients.
+  double ConstrainedFit(double m, float *c);
+
+private:
+  // Simple struct to hold an ICOORD point and a halfwidth representing half
+  // the "width" (supposedly approximately parallel to the direction of the
+  // line) of each point, such that distant points can be discarded when they
+  // overlap nearer points. (Think i dot and other diacritics or noise.)
+  struct PointWidth {
+    PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
+    PointWidth(const ICOORD &pt0, int halfwidth0) : pt(pt0), halfwidth(halfwidth0) {}
+
+    ICOORD pt;
+    int halfwidth;
+  };
+  // Type holds the distance of each point from the fitted line and the point
+  // itself. Use of double allows integer distances from ICOORDs to be stored
+  // exactly, and also the floating point results from ConstrainedFit.
+  using DistPointPair = KDPairInc<double, ICOORD>;
+
+  // Computes and returns the squared evaluation metric for a line fit.
+  double EvaluateLineFit();
+
+  // Computes the absolute values of the precomputed distances_,
+  // and returns the squared upper-quartile error distance.
+  double ComputeUpperQuartileError();
+
+  // Returns the number of sample points that have an error more than threshold.
+  int NumberOfMisfittedPoints(double threshold) const;
+
+  // Computes all the cross product distances of the points from the line,
+  // storing the actual (signed) cross products in distances_.
+  // Ignores distances of points that are further away than the previous point,
+  // and overlaps the previous point by at least half.
+  void ComputeDistances(const ICOORD &start, const ICOORD &end);
+
+  // Computes all the cross product distances of the points perpendicular to
+  // the given direction, ignoring distances outside of the give distance range,
+  // storing the actual (signed) cross products in distances_.
+  void ComputeConstrainedDistances(const FCOORD &direction, double min_dist, double max_dist);
+
+  // Stores all the source points in the order they were given and their
+  // halfwidths, if any.
+  std::vector<PointWidth> pts_;
+  // Stores the computed perpendicular distances of (some of) the pts_ from a
+  // given vector (assuming it goes through the origin, making it a line).
+  // Since the distances may be a subset of the input points, and get
+  // re-ordered by the nth_item function, the original point is stored
+  // along side the distance.
+  std::vector<DistPointPair> distances_; // Distances of points.
+  // The squared length of the vector used to compute distances_.
+  double square_length_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/dppoint.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/dppoint.cpp
@ -0,0 +1,99 @@
+/**********************************************************************
+ * File:        dppoint.cpp
+ * Description: Simple generic dynamic programming class.
+ * Author:      Ray Smith
+ * Created:     Wed Mar 25 19:08:01 PDT 2009
+ *
+ * (C) Copyright 2009, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "dppoint.h"
+#include "errcode.h"
+#include "tprintf.h"
+
+namespace tesseract {
+
+// Solve the dynamic programming problem for the given array of points, with
+// the given size and cost function.
+// Steps backwards are limited to being between min_step and max_step
+// inclusive.
+// The return value is the tail of the best path.
+DPPoint *DPPoint::Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size,
+                        DPPoint *points) {
+  if (size <= 0 || max_step < min_step || min_step >= size) {
+    return nullptr; // Degenerate, but not necessarily an error.
+  }
+  ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true.
+  if (debug) {
+    tprintf("min = %d, max=%d\n", min_step, max_step);
+  }
+  // Evaluate the total cost at each point.
+  for (int i = 0; i < size; ++i) {
+    for (int offset = min_step; offset <= max_step; ++offset) {
+      DPPoint *prev = offset <= i ? points + i - offset : nullptr;
+      int64_t new_cost = (points[i].*cost_func)(prev);
+      if (points[i].best_prev_ != nullptr && offset > min_step * 2 &&
+          new_cost > points[i].total_cost_) {
+        break; // Find only the first minimum if going over twice the min.
+      }
+    }
+    points[i].total_cost_ += points[i].local_cost_;
+    if (debug) {
+      tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n", i, points[i].local_cost_,
+              points[i].total_cost_, points[i].total_steps_);
+    }
+  }
+  // Now find the end of the best path and return it.
+  int best_cost = points[size - 1].total_cost_;
+  int best_end = size - 1;
+  for (int end = best_end - 1; end >= size - min_step; --end) {
+    int cost = points[end].total_cost_;
+    if (cost < best_cost) {
+      best_cost = cost;
+      best_end = end;
+    }
+  }
+  return points + best_end;
+}
+
+// A CostFunc that takes the variance of step into account in the cost.
+int64_t DPPoint::CostWithVariance(const DPPoint *prev) {
+  if (prev == nullptr || prev == this) {
+    UpdateIfBetter(0, 1, nullptr, 0, 0, 0);
+    return 0;
+  }
+
+  int delta = this - prev;
+  int32_t n = prev->n_ + 1;
+  int32_t sig_x = prev->sig_x_ + delta;
+  int64_t sig_xsq = prev->sig_xsq_ + delta * delta;
+  int64_t cost = (sig_xsq - sig_x * sig_x / n) / n;
+  cost += prev->total_cost_;
+  UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq);
+  return cost;
+}
+
+// Update the other members if the cost is lower.
+void DPPoint::UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint *prev, int32_t n,
+                             int32_t sig_x, int64_t sig_xsq) {
+  if (cost < total_cost_) {
+    total_cost_ = cost;
+    total_steps_ = steps;
+    best_prev_ = prev;
+    n_ = n;
+    sig_x_ = sig_x;
+    sig_xsq_ = sig_xsq;
+  }
+}
+
+} // namespace tesseract.
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/dppoint.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/dppoint.h
@ -0,0 +1,105 @@
+/**********************************************************************
+ * File:        dppoint.h
+ * Description: Simple generic dynamic programming class.
+ * Author:      Ray Smith
+ * Created:     Wed Mar 25 18:57:01 PDT 2009
+ *
+ * (C) Copyright 2009, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCSTRUCT_DPPOINT_H_
+#define TESSERACT_CCSTRUCT_DPPOINT_H_
+
+#include <cstdint>
+
+namespace tesseract {
+
+// A simple class to provide a dynamic programming solution to a class of
+// 1st-order problems in which the cost is dependent only on the current
+// step and the best cost to that step, with a possible special case
+// of using the variance of the steps, and only the top choice is required.
+// Useful for problems such as finding the optimal cut points in a fixed-pitch
+// (vertical or horizontal) situation.
+// Skeletal Example:
+// DPPoint* array = new DPPoint[width];
+// for (int i = 0; i < width; i++) {
+//   array[i].AddLocalCost(cost_at_i)
+// }
+// DPPoint* best_end = DPPoint::Solve(..., array);
+// while (best_end != nullptr) {
+//   int cut_index = best_end - array;
+//   best_end = best_end->best_prev();
+// }
+// delete [] array;
+class DPPoint {
+public:
+  // The cost function evaluates the total cost at this (excluding this's
+  // local_cost) and if it beats this's total_cost, then
+  // replace the appropriate values in this.
+  using CostFunc = int64_t (DPPoint::*)(const DPPoint *);
+
+  DPPoint()
+      : local_cost_(0)
+      , total_cost_(INT32_MAX)
+      , total_steps_(1)
+      , best_prev_(nullptr)
+      , n_(0)
+      , sig_x_(0)
+      , sig_xsq_(0) {}
+
+  // Solve the dynamic programming problem for the given array of points, with
+  // the given size and cost function.
+  // Steps backwards are limited to being between min_step and max_step
+  // inclusive.
+  // The return value is the tail of the best path.
+  static DPPoint *Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size,
+                        DPPoint *points);
+
+  // A CostFunc that takes the variance of step into account in the cost.
+  int64_t CostWithVariance(const DPPoint *prev);
+
+  // Accessors.
+  int total_cost() const {
+    return total_cost_;
+  }
+  int Pathlength() const {
+    return total_steps_;
+  }
+  const DPPoint *best_prev() const {
+    return best_prev_;
+  }
+  void AddLocalCost(int new_cost) {
+    local_cost_ += new_cost;
+  }
+
+private:
+  // Code common to different cost functions.
+
+  // Update the other members if the cost is lower.
+  void UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint *prev, int32_t n, int32_t sig_x,
+                      int64_t sig_xsq);
+
+  int32_t local_cost_;       // Cost of this point on its own.
+  int32_t total_cost_;       // Sum of all costs in best path to here.
+                             // During cost calculations local_cost is excluded.
+  int32_t total_steps_;      // Number of steps in best path to here.
+  const DPPoint *best_prev_; // Pointer to prev point in best path from here.
+  // Information for computing the variance part of the cost.
+  int32_t n_;       // Number of steps in best path to here for variance.
+  int32_t sig_x_;   // Sum of step sizes for computing variance.
+  int64_t sig_xsq_; // Sum of squares of steps for computing variance.
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_CCSTRUCT_DPPOINT_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/fontinfo.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/fontinfo.cpp
@ -0,0 +1,227 @@
+///////////////////////////////////////////////////////////////////////
+// File:        fontinfo.cpp
+// Description: Font information classes abstracted from intproto.h/cpp.
+// Author:      rays@google.com (Ray Smith)
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "fontinfo.h"
+#include "bitvector.h"
+#include "unicity_table.h"
+
+namespace tesseract {
+
+// Writes to the given file. Returns false in case of error.
+bool FontInfo::Serialize(FILE *fp) const {
+  if (!write_info(fp, *this)) {
+    return false;
+  }
+  if (!write_spacing_info(fp, *this)) {
+    return false;
+  }
+  return true;
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool FontInfo::DeSerialize(TFile *fp) {
+  if (!read_info(fp, this)) {
+    return false;
+  }
+  if (!read_spacing_info(fp, this)) {
+    return false;
+  }
+  return true;
+}
+
+FontInfoTable::FontInfoTable() {
+  using namespace std::placeholders; // for _1, _2
+  set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
+}
+
+FontInfoTable::~FontInfoTable() = default;
+
+// Writes to the given file. Returns false in case of error.
+bool FontInfoTable::Serialize(FILE *fp) const {
+  return this->SerializeClasses(fp);
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool FontInfoTable::DeSerialize(TFile *fp) {
+  truncate(0);
+  return this->DeSerializeClasses(fp);
+}
+
+// Returns true if the given set of fonts includes one with the same
+// properties as font_id.
+bool FontInfoTable::SetContainsFontProperties(int font_id,
+                                              const std::vector<ScoredFont> &font_set) const {
+  uint32_t properties = at(font_id).properties;
+  for (auto f : font_set) {
+    if (at(f.fontinfo_id).properties == properties) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns true if the given set of fonts includes multiple properties.
+bool FontInfoTable::SetContainsMultipleFontProperties(
+    const std::vector<ScoredFont> &font_set) const {
+  if (font_set.empty()) {
+    return false;
+  }
+  int first_font = font_set[0].fontinfo_id;
+  uint32_t properties = at(first_font).properties;
+  for (int f = 1; f < font_set.size(); ++f) {
+    if (at(font_set[f].fontinfo_id).properties != properties) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Moves any non-empty FontSpacingInfo entries from other to this.
+void FontInfoTable::MoveSpacingInfoFrom(FontInfoTable *other) {
+  using namespace std::placeholders; // for _1, _2
+  set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
+  for (int i = 0; i < other->size(); ++i) {
+    std::vector<FontSpacingInfo *> *spacing_vec = other->at(i).spacing_vec;
+    if (spacing_vec != nullptr) {
+      int target_index = get_index(other->at(i));
+      if (target_index < 0) {
+        // Bit copy the FontInfo and steal all the pointers.
+        push_back(other->at(i));
+        other->at(i).name = nullptr;
+      } else {
+        delete at(target_index).spacing_vec;
+        at(target_index).spacing_vec = other->at(i).spacing_vec;
+      }
+      other->at(i).spacing_vec = nullptr;
+    }
+  }
+}
+
+// Moves this to the target unicity table.
+void FontInfoTable::MoveTo(UnicityTable<FontInfo> *target) {
+  target->clear();
+  using namespace std::placeholders; // for _1, _2
+  target->set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
+  for (int i = 0; i < size(); ++i) {
+    // Bit copy the FontInfo and steal all the pointers.
+    target->push_back(at(i));
+    at(i).name = nullptr;
+    at(i).spacing_vec = nullptr;
+  }
+}
+
+// Callbacks for GenericVector.
+void FontInfoDeleteCallback(FontInfo f) {
+  if (f.spacing_vec != nullptr) {
+    for (auto data : *f.spacing_vec) {
+      delete data;
+    }
+    delete f.spacing_vec;
+    f.spacing_vec = nullptr;
+  }
+  delete[] f.name;
+  f.name = nullptr;
+}
+
+/*---------------------------------------------------------------------------*/
+// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
+bool read_info(TFile *f, FontInfo *fi) {
+  uint32_t size;
+  if (!f->DeSerialize(&size)) {
+    return false;
+  }
+  char *font_name = new char[size + 1];
+  fi->name = font_name;
+  if (!f->DeSerialize(font_name, size)) {
+    return false;
+  }
+  font_name[size] = '\0';
+  return f->DeSerialize(&fi->properties);
+}
+
+bool write_info(FILE *f, const FontInfo &fi) {
+  int32_t size = strlen(fi.name);
+  return tesseract::Serialize(f, &size) && tesseract::Serialize(f, &fi.name[0], size) &&
+         tesseract::Serialize(f, &fi.properties);
+}
+
+bool read_spacing_info(TFile *f, FontInfo *fi) {
+  int32_t vec_size, kern_size;
+  if (!f->DeSerialize(&vec_size)) {
+    return false;
+  }
+  ASSERT_HOST(vec_size >= 0);
+  if (vec_size == 0) {
+    return true;
+  }
+  fi->init_spacing(vec_size);
+  for (int i = 0; i < vec_size; ++i) {
+    auto *fs = new FontSpacingInfo();
+    if (!f->DeSerialize(&fs->x_gap_before) || !f->DeSerialize(&fs->x_gap_after) ||
+        !f->DeSerialize(&kern_size)) {
+      delete fs;
+      return false;
+    }
+    if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec
+      delete fs;
+      continue;
+    }
+    if (kern_size > 0 &&
+        (!f->DeSerialize(fs->kerned_unichar_ids) || !f->DeSerialize(fs->kerned_x_gaps))) {
+      delete fs;
+      return false;
+    }
+    fi->add_spacing(i, fs);
+  }
+  return true;
+}
+
+bool write_spacing_info(FILE *f, const FontInfo &fi) {
+  int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
+  if (!tesseract::Serialize(f, &vec_size)) {
+    return false;
+  }
+  int16_t x_gap_invalid = -1;
+  for (int i = 0; i < vec_size; ++i) {
+    FontSpacingInfo *fs = fi.spacing_vec->at(i);
+    int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
+    if (fs == nullptr) {
+      // Writing two invalid x-gaps.
+      if (!tesseract::Serialize(f, &x_gap_invalid, 2) || !tesseract::Serialize(f, &kern_size)) {
+        return false;
+      }
+    } else {
+      if (!tesseract::Serialize(f, &fs->x_gap_before) ||
+          !tesseract::Serialize(f, &fs->x_gap_after) || !tesseract::Serialize(f, &kern_size)) {
+        return false;
+      }
+    }
+    if (kern_size > 0 &&
+        (!Serialize(f, fs->kerned_unichar_ids) || !Serialize(f, fs->kerned_x_gaps))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool write_set(FILE *f, const FontSet &fs) {
+  int size = fs.size();
+  return tesseract::Serialize(f, &size) && tesseract::Serialize(f, &fs[0], size);
+}
+
+} // namespace tesseract.
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/fontinfo.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/fontinfo.h
@ -0,0 +1,205 @@
+///////////////////////////////////////////////////////////////////////
+// File:        fontinfo.h
+// Description: Font information classes abstracted from intproto.h/cpp.
+// Author:      rays@google.com (Ray Smith)
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
+#define TESSERACT_CCSTRUCT_FONTINFO_H_
+
+#include "errcode.h"
+
+#include <tesseract/unichar.h>
+#include "genericvector.h"
+
+#include <cstdint> // for uint16_t, uint32_t
+#include <cstdio>  // for FILE
+#include <vector>
+
+namespace tesseract {
+
+template <typename T>
+class UnicityTable;
+
+// Simple struct to hold a font and a score. The scores come from the low-level
+// integer matcher, so they are in the uint16_t range. Fonts are an index to
+// fontinfo_table.
+// These get copied around a lot, so best to keep them small.
+struct ScoredFont {
+  ScoredFont() : fontinfo_id(-1), score(0) {}
+  ScoredFont(int font_id, uint16_t classifier_score)
+      : fontinfo_id(font_id), score(classifier_score) {}
+
+  // Index into fontinfo table, but inside the classifier, may be a shapetable
+  // index.
+  int32_t fontinfo_id;
+  // Raw score from the low-level classifier.
+  uint16_t score;
+};
+
+// Struct for information about spacing between characters in a particular font.
+struct FontSpacingInfo {
+  int16_t x_gap_before;
+  int16_t x_gap_after;
+  std::vector<UNICHAR_ID> kerned_unichar_ids;
+  std::vector<int16_t> kerned_x_gaps;
+};
+
+/*
+ * font_properties contains properties about boldness, italicness, fixed pitch,
+ * serif, fraktur
+ */
+struct FontInfo {
+  FontInfo() : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {}
+  ~FontInfo() = default;
+
+  bool operator==(const FontInfo &rhs) const {
+    return strcmp(name, rhs.name) == 0;
+  }
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE *fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(TFile *fp);
+
+  // Reserves unicharset_size spots in spacing_vec.
+  void init_spacing(int unicharset_size) {
+    spacing_vec = new std::vector<FontSpacingInfo *>();
+    spacing_vec->resize(unicharset_size);
+  }
+  // Adds the given pointer to FontSpacingInfo to spacing_vec member
+  // (FontInfo class takes ownership of the pointer).
+  // Note: init_spacing should be called before calling this function.
+  void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
+    ASSERT_HOST(static_cast<size_t>(uch_id) < spacing_vec->size());
+    (*spacing_vec)[uch_id] = spacing_info;
+  }
+
+  // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
+  const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
+    return (spacing_vec == nullptr || spacing_vec->size() <= static_cast<size_t>(uch_id)) ? nullptr
+                                                                     : (*spacing_vec)[uch_id];
+  }
+
+  // Fills spacing with the value of the x gap expected between the two given
+  // UNICHAR_IDs. Returns true on success.
+  bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const {
+    const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
+    const FontSpacingInfo *fsi = this->get_spacing(uch_id);
+    if (prev_fsi == nullptr || fsi == nullptr) {
+      return false;
+    }
+    size_t i = 0;
+    for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
+      if (prev_fsi->kerned_unichar_ids[i] == uch_id) {
+        break;
+      }
+    }
+    if (i < prev_fsi->kerned_unichar_ids.size()) {
+      *spacing = prev_fsi->kerned_x_gaps[i];
+    } else {
+      *spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
+    }
+    return true;
+  }
+
+  bool is_italic() const {
+    return properties & 1;
+  }
+  bool is_bold() const {
+    return (properties & 2) != 0;
+  }
+  bool is_fixed_pitch() const {
+    return (properties & 4) != 0;
+  }
+  bool is_serif() const {
+    return (properties & 8) != 0;
+  }
+  bool is_fraktur() const {
+    return (properties & 16) != 0;
+  }
+
+  char *name;
+  uint32_t properties;
+  // The universal_id is a field reserved for the initialization process
+  // to assign a unique id number to all fonts loaded for the current
+  // combination of languages. This id will then be returned by
+  // ResultIterator::WordFontAttributes.
+  int32_t universal_id;
+  // Horizontal spacing between characters (indexed by UNICHAR_ID).
+  std::vector<FontSpacingInfo *> *spacing_vec;
+};
+
+// Every class (character) owns a FontSet that represents all the fonts that can
+// render this character.
+// Since almost all the characters from the same script share the same set of
+// fonts, the sets are shared over multiple classes (see
+// Classify::fontset_table_). Thus, a class only store an id to a set.
+// Because some fonts cannot render just one character of a set, there are a
+// lot of FontSet that differ only by one font. Rather than storing directly
+// the FontInfo in the FontSet structure, it's better to share FontInfos among
+// FontSets (Classify::fontinfo_table_).
+using FontSet = std::vector<int>;
+
+// Class that adds a bit of functionality on top of GenericVector to
+// implement a table of FontInfo that replaces UniCityTable<FontInfo>.
+// TODO(rays) change all references once all existing traineddata files
+// are replaced.
+class FontInfoTable : public GenericVector<FontInfo> {
+public:
+  TESS_API // when you remove inheritance from GenericVector, move this on
+  // class level
+  FontInfoTable();
+  TESS_API
+  ~FontInfoTable();
+
+  // Writes to the given file. Returns false in case of error.
+  TESS_API
+  bool Serialize(FILE *fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  TESS_API
+  bool DeSerialize(TFile *fp);
+
+  // Returns true if the given set of fonts includes one with the same
+  // properties as font_id.
+  TESS_API
+  bool SetContainsFontProperties(int font_id, const std::vector<ScoredFont> &font_set) const;
+  // Returns true if the given set of fonts includes multiple properties.
+  TESS_API
+  bool SetContainsMultipleFontProperties(const std::vector<ScoredFont> &font_set) const;
+
+  // Moves any non-empty FontSpacingInfo entries from other to this.
+  TESS_API
+  void MoveSpacingInfoFrom(FontInfoTable *other);
+  // Moves this to the target unicity table.
+  TESS_API
+  void MoveTo(UnicityTable<FontInfo> *target);
+};
+
+// Deletion callbacks for GenericVector.
+void FontInfoDeleteCallback(FontInfo f);
+
+// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
+bool read_info(TFile *f, FontInfo *fi);
+bool write_info(FILE *f, const FontInfo &fi);
+bool read_spacing_info(TFile *f, FontInfo *fi);
+bool write_spacing_info(FILE *f, const FontInfo &fi);
+bool write_set(FILE *f, const FontSet &fs);
+
+} // namespace tesseract.
+
+#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/image.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/image.cpp
@ -0,0 +1,60 @@
+///////////////////////////////////////////////////////////////////////
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "image.h"
+
+#include <allheaders.h>
+
+namespace tesseract {
+
+Image Image::clone() const {
+  return pixClone(pix_);
+}
+
+Image Image::copy() const {
+  return pixCopy(nullptr, pix_);
+}
+
+void Image::destroy() {
+  pixDestroy(&pix_);
+}
+
+bool Image::isZero() const {
+  l_int32 r = 0;
+  pixZero(pix_, &r);
+  return r == 1;
+}
+
+Image Image::operator|(Image i) const {
+  return pixOr(nullptr, pix_, i);
+}
+
+Image &Image::operator|=(Image i) {
+  pixOr(pix_, pix_, i);
+  return *this;
+}
+
+Image Image::operator&(Image i) const {
+  return pixAnd(nullptr, pix_, i);
+}
+
+Image &Image::operator&=(Image i) {
+  pixAnd(pix_, pix_, i);
+  return *this;
+}
+
+}
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/image.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/image.h
@ -0,0 +1,53 @@
+///////////////////////////////////////////////////////////////////////
+// File:        image.h
+// Description: Image wrapper.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_IMAGE_H_
+#define TESSERACT_CCSTRUCT_IMAGE_H_
+
+#include <tesseract/export.h>
+
+struct Pix;
+
+namespace tesseract {
+
+class TESS_API Image {
+public:
+  Pix *pix_ = nullptr;
+
+public:
+  Image() = default;
+  Image(Pix *pix) : pix_(pix) {}
+
+  // service
+  operator Pix *() const { return pix_; }
+  explicit operator Pix **() { return &pix_; }
+  Pix *operator->() const { return pix_; }
+
+  // api
+  Image clone() const; // increases refcount
+  Image copy() const;  // does full copy
+  void destroy();
+  bool isZero() const;
+
+  // ops
+  Image operator|(Image) const;
+  Image &operator|=(Image);
+  Image operator&(Image) const;
+  Image &operator&=(Image);
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_IMAGE_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/imagedata.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/imagedata.cpp
@ -0,0 +1,742 @@
+///////////////////////////////////////////////////////////////////////
+// File:        imagedata.cpp
+// Description: Class to hold information about a single multi-page tiff
+//              training file and its corresponding boxes or text file.
+// Author:      Ray Smith
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "imagedata.h"
+
+#include "boxread.h"    // for ReadMemBoxes
+#include "rect.h"       // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::CYAN, ScrollView::NONE
+#include "tprintf.h"    // for tprintf
+
+#include "helpers.h"  // for IntCastRounded, TRand, ClipToRange, Modulo
+#include "serialis.h" // for TFile
+
+#include <allheaders.h> // for pixDestroy, pixGetHeight, pixGetWidth, lept_...
+
+#include <cinttypes> // for PRId64
+
+namespace tesseract {
+
+// Number of documents to read ahead while training. Doesn't need to be very
+// large.
+const int kMaxReadAhead = 8;
+
+ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
+// Takes ownership of the pix and destroys it.
+ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) {
+  SetPix(pix);
+}
+ImageData::~ImageData() {
+#ifdef TESSERACT_IMAGEDATA_AS_PIX
+  internal_pix_.destroy();
+#endif
+}
+
+// Builds and returns an ImageData from the basic data. Note that imagedata,
+// truth_text, and box_text are all the actual file data, NOT filenames.
+ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
+                            const char *imagedata, int imagedatasize, const char *truth_text,
+                            const char *box_text) {
+  auto *image_data = new ImageData();
+  image_data->imagefilename_ = name;
+  image_data->page_number_ = page_number;
+  image_data->language_ = lang;
+  // Save the imagedata.
+  // TODO: optimize resize (no init).
+  image_data->image_data_.resize(imagedatasize);
+  memcpy(&image_data->image_data_[0], imagedata, imagedatasize);
+  if (!image_data->AddBoxes(box_text)) {
+    if (truth_text == nullptr || truth_text[0] == '\0') {
+      tprintf("Error: No text corresponding to page %d from image %s!\n", page_number, name);
+      delete image_data;
+      return nullptr;
+    }
+    image_data->transcription_ = truth_text;
+    // If we have no boxes, the transcription is in the 0th box_texts_.
+    image_data->box_texts_.emplace_back(truth_text);
+    // We will create a box for the whole image on PreScale, to save unpacking
+    // the image now.
+  } else if (truth_text != nullptr && truth_text[0] != '\0' &&
+             image_data->transcription_ != truth_text) {
+    // Save the truth text as it is present and disagrees with the box text.
+    image_data->transcription_ = truth_text;
+  }
+  return image_data;
+}
+
+// Writes to the given file. Returns false in case of error.
+bool ImageData::Serialize(TFile *fp) const {
+  if (!fp->Serialize(imagefilename_)) {
+    return false;
+  }
+  if (!fp->Serialize(&page_number_)) {
+    return false;
+  }
+  if (!fp->Serialize(image_data_)) {
+    return false;
+  }
+  if (!fp->Serialize(language_)) {
+    return false;
+  }
+  if (!fp->Serialize(transcription_)) {
+    return false;
+  }
+  if (!fp->Serialize(boxes_)) {
+    return false;
+  }
+  if (!fp->Serialize(box_texts_)) {
+    return false;
+  }
+  int8_t vertical = vertical_text_;
+  return fp->Serialize(&vertical);
+}
+
+// Reads from the given file. Returns false in case of error.
+bool ImageData::DeSerialize(TFile *fp) {
+  if (!fp->DeSerialize(imagefilename_)) {
+    return false;
+  }
+  if (!fp->DeSerialize(&page_number_)) {
+    return false;
+  }
+  if (!fp->DeSerialize(image_data_)) {
+    return false;
+  }
+  if (!fp->DeSerialize(language_)) {
+    return false;
+  }
+  if (!fp->DeSerialize(transcription_)) {
+    return false;
+  }
+  if (!fp->DeSerialize(boxes_)) {
+    return false;
+  }
+  if (!fp->DeSerialize(box_texts_)) {
+    return false;
+  }
+  int8_t vertical = 0;
+  if (!fp->DeSerialize(&vertical)) {
+    return false;
+  }
+  vertical_text_ = vertical != 0;
+  return true;
+}
+
+// As DeSerialize, but only seeks past the data - hence a static method.
+bool ImageData::SkipDeSerialize(TFile *fp) {
+  if (!fp->DeSerializeSkip()) {
+    return false;
+  }
+  int32_t page_number;
+  if (!fp->DeSerialize(&page_number)) {
+    return false;
+  }
+  if (!fp->DeSerializeSkip()) {
+    return false;
+  }
+  if (!fp->DeSerializeSkip()) {
+    return false;
+  }
+  if (!fp->DeSerializeSkip()) {
+    return false;
+  }
+  if (!fp->DeSerializeSkip(sizeof(TBOX))) {
+    return false;
+  }
+  int32_t number;
+  if (!fp->DeSerialize(&number)) {
+    return false;
+  }
+  for (int i = 0; i < number; i++) {
+    if (!fp->DeSerializeSkip()) {
+      return false;
+    }
+  }
+  int8_t vertical = 0;
+  return fp->DeSerialize(&vertical);
+}
+
+// Saves the given Pix as a PNG-encoded string and destroys it.
+// In case of missing PNG support in Leptonica use PNM format,
+// which requires more memory.
+void ImageData::SetPix(Image pix) {
+#ifdef TESSERACT_IMAGEDATA_AS_PIX
+  internal_pix_ = pix;
+#else
+  SetPixInternal(pix, &image_data_);
+#endif
+}
+
+// Returns the Pix image for *this. Must be pixDestroyed after use.
+Image ImageData::GetPix() const {
+#ifdef TESSERACT_IMAGEDATA_AS_PIX
+#  ifdef GRAPHICS_DISABLED
+  /* The only caller of this is the scaling functions to prescale the
+   * source. Thus we can just return a new pointer to the same data. */
+  return internal_pix_.clone();
+#  else
+  /* pixCopy always does an actual copy, so the caller can modify the
+   * changed data. */
+  return internal_pix_.copy();
+#  endif
+#else
+  return GetPixInternal(image_data_);
+#endif
+}
+
+// Gets anything and everything with a non-nullptr pointer, prescaled to a
+// given target_height (if 0, then the original image height), and aligned.
+// Also returns (if not nullptr) the width and height of the scaled image.
+// The return value is the scaled Pix, which must be pixDestroyed after use,
+// and scale_factor (if not nullptr) is set to the scale factor that was applied
+// to the image to achieve the target_height.
+Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
+                         int *scaled_height, std::vector<TBOX> *boxes) const {
+  int input_width = 0;
+  int input_height = 0;
+  Image src_pix = GetPix();
+  ASSERT_HOST(src_pix != nullptr);
+  input_width = pixGetWidth(src_pix);
+  input_height = pixGetHeight(src_pix);
+  if (target_height == 0) {
+    target_height = std::min(input_height, max_height);
+  }
+  float im_factor = static_cast<float>(target_height) / input_height;
+  if (scaled_width != nullptr) {
+    *scaled_width = IntCastRounded(im_factor * input_width);
+  }
+  if (scaled_height != nullptr) {
+    *scaled_height = target_height;
+  }
+  // Get the scaled image.
+  Image pix = pixScale(src_pix, im_factor, im_factor);
+  if (pix == nullptr) {
+    tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
+            im_factor);
+    src_pix.destroy();
+    return nullptr;
+  }
+  if (scaled_width != nullptr) {
+    *scaled_width = pixGetWidth(pix);
+  }
+  if (scaled_height != nullptr) {
+    *scaled_height = pixGetHeight(pix);
+  }
+  src_pix.destroy();
+  if (boxes != nullptr) {
+    // Get the boxes.
+    boxes->clear();
+    for (auto box : boxes_) {
+      box.scale(im_factor);
+      boxes->push_back(box);
+    }
+    if (boxes->empty()) {
+      // Make a single box for the whole image.
+      TBOX box(0, 0, im_factor * input_width, target_height);
+      boxes->push_back(box);
+    }
+  }
+  if (scale_factor != nullptr) {
+    *scale_factor = im_factor;
+  }
+  return pix;
+}
+
+int ImageData::MemoryUsed() const {
+  return image_data_.size();
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Draws the data in a new window.
+void ImageData::Display() const {
+  const int kTextSize = 64;
+  // Draw the image.
+  Image pix = GetPix();
+  if (pix == nullptr) {
+    return;
+  }
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  auto *win =
+      new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
+                     2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
+  win->Draw(pix, 0, height - 1);
+  pix.destroy();
+  // Draw the boxes.
+  win->Pen(ScrollView::RED);
+  win->Brush(ScrollView::NONE);
+  int text_size = kTextSize;
+  if (!boxes_.empty() && boxes_[0].height() * 2 < text_size) {
+    text_size = boxes_[0].height() * 2;
+  }
+  win->TextAttributes("Arial", text_size, false, false, false);
+  if (!boxes_.empty()) {
+    for (int b = 0; b < boxes_.size(); ++b) {
+      boxes_[b].plot(win);
+      win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].c_str());
+    }
+  } else {
+    // The full transcription.
+    win->Pen(ScrollView::CYAN);
+    win->Text(0, height + kTextSize * 2, transcription_.c_str());
+  }
+  win->Update();
+  win->Wait();
+}
+
+#endif
+
+// Adds the supplied boxes and transcriptions that correspond to the correct
+// page number.
+void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
+                         const std::vector<int> &box_pages) {
+  // Copy the boxes and make the transcription.
+  for (int i = 0; i < box_pages.size(); ++i) {
+    if (page_number_ >= 0 && box_pages[i] != page_number_) {
+      continue;
+    }
+    transcription_ += texts[i];
+    boxes_.push_back(boxes[i]);
+    box_texts_.push_back(texts[i]);
+  }
+}
+
+#ifndef TESSERACT_IMAGEDATA_AS_PIX
+// Saves the given Pix as a PNG-encoded string and destroys it.
+// In case of missing PNG support in Leptonica use PNM format,
+// which requires more memory.
+void ImageData::SetPixInternal(Image pix, std::vector<char> *image_data) {
+  l_uint8 *data;
+  size_t size;
+  l_int32 ret;
+  ret = pixWriteMem(&data, &size, pix, IFF_PNG);
+  if (ret) {
+    ret = pixWriteMem(&data, &size, pix, IFF_PNM);
+  }
+  pix.destroy();
+  // TODO: optimize resize (no init).
+  image_data->resize(size);
+  memcpy(&(*image_data)[0], data, size);
+  lept_free(data);
+}
+
+// Returns the Pix image for the image_data. Must be pixDestroyed after use.
+Image ImageData::GetPixInternal(const std::vector<char> &image_data) {
+  Image pix = nullptr;
+  if (!image_data.empty()) {
+    // Convert the array to an image.
+    const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);
+    pix = pixReadMem(u_data, image_data.size());
+  }
+  return pix;
+}
+#endif
+
+// Parses the text string as a box file and adds any discovered boxes that
+// match the page number. Returns false on error.
+bool ImageData::AddBoxes(const char *box_text) {
+  if (box_text != nullptr && box_text[0] != '\0') {
+    std::vector<TBOX> boxes;
+    std::vector<std::string> texts;
+    std::vector<int> box_pages;
+    if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
+                     /*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) {
+      AddBoxes(boxes, texts, box_pages);
+      return true;
+    } else {
+      tprintf("Error: No boxes for page %d from image %s!\n", page_number_, imagefilename_.c_str());
+    }
+  }
+  return false;
+}
+
+DocumentData::DocumentData(const std::string &name)
+    : document_name_(name)
+    , pages_offset_(-1)
+    , total_pages_(-1)
+    , memory_used_(0)
+    , max_memory_(0)
+    , reader_(nullptr) {}
+
+DocumentData::~DocumentData() {
+  if (thread.joinable()) {
+    thread.join();
+  }
+  std::lock_guard<std::mutex> lock_p(pages_mutex_);
+  std::lock_guard<std::mutex> lock_g(general_mutex_);
+  for (auto data : pages_) {
+    delete data;
+  }
+}
+
+// Reads all the pages in the given lstmf filename to the cache. The reader
+// is used to read the file.
+bool DocumentData::LoadDocument(const char *filename, int start_page, int64_t max_memory,
+                                FileReader reader) {
+  SetDocument(filename, max_memory, reader);
+  pages_offset_ = start_page;
+  return ReCachePages();
+}
+
+// Sets up the document, without actually loading it.
+void DocumentData::SetDocument(const char *filename, int64_t max_memory, FileReader reader) {
+  std::lock_guard<std::mutex> lock_p(pages_mutex_);
+  std::lock_guard<std::mutex> lock(general_mutex_);
+  document_name_ = filename;
+  pages_offset_ = -1;
+  max_memory_ = max_memory;
+  reader_ = reader;
+}
+
+// Writes all the pages to the given filename. Returns false on error.
+bool DocumentData::SaveDocument(const char *filename, FileWriter writer) {
+  std::lock_guard<std::mutex> lock(pages_mutex_);
+  TFile fp;
+  fp.OpenWrite(nullptr);
+  if (!fp.Serialize(pages_) || !fp.CloseWrite(filename, writer)) {
+    tprintf("Serialize failed: %s\n", filename);
+    return false;
+  }
+  return true;
+}
+
+// Adds the given page data to this document, counting up memory.
+void DocumentData::AddPageToDocument(ImageData *page) {
+  std::lock_guard<std::mutex> lock(pages_mutex_);
+  pages_.push_back(page);
+  set_memory_used(memory_used() + page->MemoryUsed());
+}
+
+// If the given index is not currently loaded, loads it using a separate
+// thread.
+void DocumentData::LoadPageInBackground(int index) {
+  ImageData *page = nullptr;
+  if (IsPageAvailable(index, &page)) {
+    return;
+  }
+  std::lock_guard<std::mutex> lock(pages_mutex_);
+  if (pages_offset_ == index) {
+    return;
+  }
+  pages_offset_ = index;
+  for (auto page : pages_) {
+    delete page;
+  }
+  pages_.clear();
+  if (thread.joinable()) {
+    thread.join();
+  }
+  thread = std::thread(&tesseract::DocumentData::ReCachePages, this);
+}
+
+// Returns a pointer to the page with the given index, modulo the total
+// number of pages. Blocks until the background load is completed.
+const ImageData *DocumentData::GetPage(int index) {
+  ImageData *page = nullptr;
+  while (!IsPageAvailable(index, &page)) {
+    // If there is no background load scheduled, schedule one now.
+    pages_mutex_.lock();
+    bool needs_loading = pages_offset_ != index;
+    pages_mutex_.unlock();
+    if (needs_loading) {
+      LoadPageInBackground(index);
+    }
+    // We can't directly load the page, or the background load will delete it
+    // while the caller is using it, so give it a chance to work.
+    std::this_thread::yield();
+  }
+  return page;
+}
+
+// Returns true if the requested page is available, and provides a pointer,
+// which may be nullptr if the document is empty. May block, even though it
+// doesn't guarantee to return true.
+bool DocumentData::IsPageAvailable(int index, ImageData **page) {
+  std::lock_guard<std::mutex> lock(pages_mutex_);
+  int num_pages = NumPages();
+  if (num_pages == 0 || index < 0) {
+    *page = nullptr; // Empty Document.
+    return true;
+  }
+  if (num_pages > 0) {
+    index = Modulo(index, num_pages);
+    if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
+      *page = pages_[index - pages_offset_]; // Page is available already.
+      return true;
+    }
+  }
+  return false;
+}
+
+// Removes all pages from memory and frees the memory, but does not forget
+// the document metadata.
+int64_t DocumentData::UnCache() {
+  std::lock_guard<std::mutex> lock(pages_mutex_);
+  int64_t memory_saved = memory_used();
+  for (auto page : pages_) {
+    delete page;
+  }
+  pages_.clear();
+  pages_offset_ = -1;
+  set_total_pages(-1);
+  set_memory_used(0);
+  tprintf("Unloaded document %s, saving %" PRId64 " memory\n", document_name_.c_str(),
+          memory_saved);
+  return memory_saved;
+}
+
+// Shuffles all the pages in the document.
+void DocumentData::Shuffle() {
+  TRand random;
+  // Different documents get shuffled differently, but the same for the same
+  // name.
+  random.set_seed(document_name_.c_str());
+  int num_pages = pages_.size();
+  // Execute one random swap for each page in the document.
+  for (int i = 0; i < num_pages; ++i) {
+    int src = random.IntRand() % num_pages;
+    int dest = random.IntRand() % num_pages;
+    std::swap(pages_[src], pages_[dest]);
+  }
+}
+
+// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
+// starting at index pages_offset_.
+bool DocumentData::ReCachePages() {
+  std::lock_guard<std::mutex> lock(pages_mutex_);
+  // Read the file.
+  set_total_pages(0);
+  set_memory_used(0);
+  int loaded_pages = 0;
+  for (auto page : pages_) {
+    delete page;
+  }
+  pages_.clear();
+  TFile fp;
+  if (!fp.Open(document_name_.c_str(), reader_) || !fp.DeSerializeSize(&loaded_pages) ||
+      loaded_pages <= 0) {
+    tprintf("Deserialize header failed: %s\n", document_name_.c_str());
+    return false;
+  }
+  pages_offset_ %= loaded_pages;
+  // Skip pages before the first one we want, and load the rest until max
+  // memory and skip the rest after that.
+  int page;
+  for (page = 0; page < loaded_pages; ++page) {
+    uint8_t non_null;
+    if (!fp.DeSerialize(&non_null)) {
+      break;
+    }
+    if (page < pages_offset_ || (max_memory_ > 0 && memory_used() > max_memory_)) {
+      if (non_null && !ImageData::SkipDeSerialize(&fp)) {
+        break;
+      }
+    } else {
+      ImageData *image_data = nullptr;
+      if (non_null) {
+        image_data = new ImageData;
+        if (!image_data->DeSerialize(&fp)) {
+          delete image_data;
+          break;
+        }
+      }
+      pages_.push_back(image_data);
+      if (image_data->imagefilename().empty()) {
+        image_data->set_imagefilename(document_name_);
+        image_data->set_page_number(page);
+      }
+      set_memory_used(memory_used() + image_data->MemoryUsed());
+    }
+  }
+  if (page < loaded_pages) {
+    tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(), page,
+            loaded_pages);
+    for (auto page : pages_) {
+      delete page;
+    }
+    pages_.clear();
+  } else if (loaded_pages > 1) {
+    // Avoid lots of messages for training with single line images.
+    tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), loaded_pages,
+            pages_offset_ + 1, pages_offset_ + pages_.size(), document_name_.c_str());
+  }
+  set_total_pages(loaded_pages);
+  return !pages_.empty();
+}
+
+// A collection of DocumentData that knows roughly how much memory it is using.
+DocumentCache::DocumentCache(int64_t max_memory) : max_memory_(max_memory) {}
+
+DocumentCache::~DocumentCache() {
+  for (auto *document : documents_) {
+    delete document;
+  }
+}
+
+// Adds all the documents in the list of filenames, counting memory.
+// The reader is used to read the files.
+bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
+                                  CachingStrategy cache_strategy, FileReader reader) {
+  cache_strategy_ = cache_strategy;
+  int64_t fair_share_memory = 0;
+  // In the round-robin case, each DocumentData handles restricting its content
+  // to its fair share of memory. In the sequential case, DocumentCache
+  // determines which DocumentDatas are held entirely in memory.
+  if (cache_strategy_ == CS_ROUND_ROBIN) {
+    fair_share_memory = max_memory_ / filenames.size();
+  }
+  for (auto filename : filenames) {
+    auto *document = new DocumentData(filename);
+    document->SetDocument(filename.c_str(), fair_share_memory, reader);
+    AddToCache(document);
+  }
+  if (!documents_.empty()) {
+    // Try to get the first page now to verify the list of filenames.
+    if (GetPageBySerial(0) != nullptr) {
+      return true;
+    }
+    tprintf("Load of page 0 failed!\n");
+  }
+  return false;
+}
+
+// Adds document to the cache.
+bool DocumentCache::AddToCache(DocumentData *data) {
+  documents_.push_back(data);
+  return true;
+}
+
+// Finds and returns a document by name.
+DocumentData *DocumentCache::FindDocument(const std::string &document_name) const {
+  for (auto *document : documents_) {
+    if (document->document_name() == document_name) {
+      return document;
+    }
+  }
+  return nullptr;
+}
+
+// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
+// strategy, could take a long time.
+int DocumentCache::TotalPages() {
+  if (cache_strategy_ == CS_SEQUENTIAL) {
+    // In sequential mode, we assume each doc has the same number of pages
+    // whether it is true or not.
+    if (num_pages_per_doc_ == 0) {
+      GetPageSequential(0);
+    }
+    return num_pages_per_doc_ * documents_.size();
+  }
+  int total_pages = 0;
+  for (auto *document : documents_) {
+    // We have to load a page to make NumPages() valid.
+    document->GetPage(0);
+    total_pages += document->NumPages();
+  }
+  return total_pages;
+}
+
+// Returns a page by serial number, selecting them in a round-robin fashion
+// from all the documents. Highly disk-intensive, but doesn't need samples
+// to be shuffled between files to begin with.
+const ImageData *DocumentCache::GetPageRoundRobin(int serial) {
+  int num_docs = documents_.size();
+  int doc_index = serial % num_docs;
+  const ImageData *doc = documents_[doc_index]->GetPage(serial / num_docs);
+  for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
+    doc_index = (serial + offset) % num_docs;
+    int page = (serial + offset) / num_docs;
+    documents_[doc_index]->LoadPageInBackground(page);
+  }
+  return doc;
+}
+
+// Returns a page by serial number, selecting them in sequence from each file.
+// Requires the samples to be shuffled between the files to give a random or
+// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
+const ImageData *DocumentCache::GetPageSequential(int serial) {
+  int num_docs = documents_.size();
+  ASSERT_HOST(num_docs > 0);
+  if (num_pages_per_doc_ == 0) {
+    // Use the pages in the first doc as the number of pages in each doc.
+    documents_[0]->GetPage(0);
+    num_pages_per_doc_ = documents_[0]->NumPages();
+    if (num_pages_per_doc_ == 0) {
+      tprintf("First document cannot be empty!!\n");
+      ASSERT_HOST(num_pages_per_doc_ > 0);
+    }
+    // Get rid of zero now if we don't need it.
+    if (serial / num_pages_per_doc_ % num_docs > 0) {
+      documents_[0]->UnCache();
+    }
+  }
+  int doc_index = serial / num_pages_per_doc_ % num_docs;
+  const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
+  // Count up total memory. Background loading makes it more complicated to
+  // keep a running count.
+  int64_t total_memory = 0;
+  for (auto *document : documents_) {
+    total_memory += document->memory_used();
+  }
+  if (total_memory >= max_memory_) {
+    // Find something to un-cache.
+    // If there are more than 3 in front, then serial is from the back reader
+    // of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
+    // we create a hole between them and then un-caching the backmost occupied
+    // will work for both.
+    int num_in_front = CountNeighbourDocs(doc_index, 1);
+    for (int offset = num_in_front - 2; offset > 1 && total_memory >= max_memory_; --offset) {
+      int next_index = (doc_index + offset) % num_docs;
+      total_memory -= documents_[next_index]->UnCache();
+    }
+    // If that didn't work, the best solution is to un-cache from the back. If
+    // we take away the document that a 2nd reader is using, it will put it
+    // back and make a hole between.
+    int num_behind = CountNeighbourDocs(doc_index, -1);
+    for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; ++offset) {
+      int next_index = (doc_index + offset + num_docs) % num_docs;
+      total_memory -= documents_[next_index]->UnCache();
+    }
+  }
+  int next_index = (doc_index + 1) % num_docs;
+  if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
+    documents_[next_index]->LoadPageInBackground(0);
+  }
+  return doc;
+}
+
+// Helper counts the number of adjacent cached neighbours of index looking in
+// direction dir, ie index+dir, index+2*dir etc.
+int DocumentCache::CountNeighbourDocs(int index, int dir) {
+  int num_docs = documents_.size();
+  for (int offset = dir; abs(offset) < num_docs; offset += dir) {
+    int offset_index = (index + offset + num_docs) % num_docs;
+    if (!documents_[offset_index]->IsCached()) {
+      return offset - dir;
+    }
+  }
+  return num_docs;
+}
+
+} // namespace tesseract.
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/imagedata.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/imagedata.h
@ -0,0 +1,362 @@
+///////////////////////////////////////////////////////////////////////
+// File:        imagedata.h
+// Description: Class to hold information about a single image and its
+//              corresponding boxes or text file.
+// Author:      Ray Smith
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
+#define TESSERACT_IMAGE_IMAGEDATA_H_
+
+#include "image.h"
+#include "points.h" // for FCOORD
+
+#include <mutex>  // for std::mutex
+#include <thread> // for std::thread
+
+struct Pix;
+
+namespace tesseract {
+
+class TFile;
+class ScrollView;
+class TBOX;
+
+// Amount of padding to apply in output pixels in feature mode.
+const int kFeaturePadding = 2;
+// Number of pixels to pad around text boxes.
+const int kImagePadding = 4;
+
+// Enum to determine the caching and data sequencing strategy.
+enum CachingStrategy {
+  // Reads all of one file before moving on to the next. Requires samples to be
+  // shuffled across files. Uses the count of samples in the first file as
+  // the count in all the files to achieve high-speed random access. As a
+  // consequence, if subsequent files are smaller, they get entries used more
+  // than once, and if subsequent files are larger, some entries are not used.
+  // Best for larger data sets that don't fit in memory.
+  CS_SEQUENTIAL,
+  // Reads one sample from each file in rotation. Does not require shuffled
+  // samples, but is extremely disk-intensive. Samples in smaller files also
+  // get used more often than samples in larger files.
+  // Best for smaller data sets that mostly fit in memory.
+  CS_ROUND_ROBIN,
+};
+
+// Class to hold information on a single image:
+// Filename, cached image as a Pix*, character boxes, text transcription.
+// The text transcription is the ground truth UTF-8 text for the image.
+// Character boxes are optional and indicate the desired segmentation of
+// the text into recognition units.
+class TESS_API ImageData {
+public:
+  ImageData();
+  // Takes ownership of the pix.
+  ImageData(bool vertical, Image pix);
+  ~ImageData();
+
+  // Builds and returns an ImageData from the basic data. Note that imagedata,
+  // truth_text, and box_text are all the actual file data, NOT filenames.
+  static ImageData *Build(const char *name, int page_number, const char *lang,
+                          const char *imagedata, int imagedatasize, const char *truth_text,
+                          const char *box_text);
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(TFile *fp) const;
+  // Reads from the given file. Returns false in case of error.
+  bool DeSerialize(TFile *fp);
+  // As DeSerialize, but only seeks past the data - hence a static method.
+  static bool SkipDeSerialize(TFile *fp);
+
+  // Other accessors.
+  const std::string &imagefilename() const {
+    return imagefilename_;
+  }
+  void set_imagefilename(const std::string &name) {
+    imagefilename_ = name;
+  }
+  int page_number() const {
+    return page_number_;
+  }
+  void set_page_number(int num) {
+    page_number_ = num;
+  }
+  const std::vector<char> &image_data() const {
+    return image_data_;
+  }
+  const std::string &language() const {
+    return language_;
+  }
+  void set_language(const std::string &lang) {
+    language_ = lang;
+  }
+  const std::string &transcription() const {
+    return transcription_;
+  }
+  const std::vector<TBOX> &boxes() const {
+    return boxes_;
+  }
+  const std::vector<std::string> &box_texts() const {
+    return box_texts_;
+  }
+  const std::string &box_text(int index) const {
+    return box_texts_[index];
+  }
+  // Saves the given Pix as a PNG-encoded string and destroys it.
+  // In case of missing PNG support in Leptonica use PNM format,
+  // which requires more memory.
+  void SetPix(Image pix);
+  // Returns the Pix image for *this. Must be pixDestroyed after use.
+  Image GetPix() const;
+  // Gets anything and everything with a non-nullptr pointer, prescaled to a
+  // given target_height (if 0, then the original image height), and aligned.
+  // Also returns (if not nullptr) the width and height of the scaled image.
+  // The return value is the scaled Pix, which must be pixDestroyed after use,
+  // and scale_factor (if not nullptr) is set to the scale factor that was
+  // applied to the image to achieve the target_height.
+  Image PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
+                int *scaled_height, std::vector<TBOX> *boxes) const;
+
+  int MemoryUsed() const;
+
+  // Draws the data in a new window.
+  void Display() const;
+
+  // Adds the supplied boxes and transcriptions that correspond to the correct
+  // page number.
+  void AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
+                const std::vector<int> &box_pages);
+
+private:
+  // Saves the given Pix as a PNG-encoded string and destroys it.
+  // In case of missing PNG support in Leptonica use PNM format,
+  // which requires more memory.
+  static void SetPixInternal(Image pix, std::vector<char> *image_data);
+  // Returns the Pix image for the image_data. Must be pixDestroyed after use.
+  static Image GetPixInternal(const std::vector<char> &image_data);
+  // Parses the text string as a box file and adds any discovered boxes that
+  // match the page number. Returns false on error.
+  bool AddBoxes(const char *box_text);
+
+private:
+  std::string imagefilename_; // File to read image from.
+  int32_t page_number_;  // Page number if multi-page tif or -1.
+  // see https://github.com/tesseract-ocr/tesseract/pull/2965
+  // EP: reconsider for tess6.0/opencv
+#ifdef TESSERACT_IMAGEDATA_AS_PIX
+  Image internal_pix_;
+#endif
+  std::vector<char> image_data_;  // PNG/PNM file data.
+  std::string language_;          // Language code for image.
+  std::string transcription_;     // UTF-8 ground truth of image.
+  std::vector<TBOX> boxes_;       // If non-empty boxes of the image.
+  std::vector<std::string> box_texts_; // String for text in each box.
+  bool vertical_text_;            // Image has been rotated from vertical.
+};
+
+// A collection of ImageData that knows roughly how much memory it is using.
+class DocumentData {
+public:
+  TESS_API
+  explicit DocumentData(const std::string &name);
+  TESS_API
+  ~DocumentData();
+
+  // Reads all the pages in the given lstmf filename to the cache. The reader
+  // is used to read the file.
+  TESS_API
+  bool LoadDocument(const char *filename, int start_page, int64_t max_memory, FileReader reader);
+  // Sets up the document, without actually loading it.
+  void SetDocument(const char *filename, int64_t max_memory, FileReader reader);
+  // Writes all the pages to the given filename. Returns false on error.
+  TESS_API
+  bool SaveDocument(const char *filename, FileWriter writer);
+
+  // Adds the given page data to this document, counting up memory.
+  TESS_API
+  void AddPageToDocument(ImageData *page);
+
+  const std::string &document_name() const {
+    std::lock_guard<std::mutex> lock(general_mutex_);
+    return document_name_;
+  }
+  int NumPages() const {
+    std::lock_guard<std::mutex> lock(general_mutex_);
+    return total_pages_;
+  }
+  size_t PagesSize() const {
+    return pages_.size();
+  }
+  int64_t memory_used() const {
+    std::lock_guard<std::mutex> lock(general_mutex_);
+    return memory_used_;
+  }
+  // If the given index is not currently loaded, loads it using a separate
+  // thread. Note: there are 4 cases:
+  // Document uncached: IsCached() returns false, total_pages_ < 0.
+  // Required page is available: IsPageAvailable returns true. In this case,
+  // total_pages_ > 0 and
+  // pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
+  // Pages are loaded, but the required one is not.
+  // The requested page is being loaded by LoadPageInBackground. In this case,
+  // index == pages_offset_. Once the loading starts, the pages lock is held
+  // until it completes, at which point IsPageAvailable will unblock and return
+  // true.
+  void LoadPageInBackground(int index);
+  // Returns a pointer to the page with the given index, modulo the total
+  // number of pages. Blocks until the background load is completed.
+  TESS_API
+  const ImageData *GetPage(int index);
+  // Returns true if the requested page is available, and provides a pointer,
+  // which may be nullptr if the document is empty. May block, even though it
+  // doesn't guarantee to return true.
+  bool IsPageAvailable(int index, ImageData **page);
+  // Takes ownership of the given page index. The page is made nullptr in *this.
+  ImageData *TakePage(int index) {
+    std::lock_guard<std::mutex> lock(pages_mutex_);
+    ImageData *page = pages_[index];
+    pages_[index] = nullptr;
+    return page;
+  }
+  // Returns true if the document is currently loaded or in the process of
+  // loading.
+  bool IsCached() const {
+    return NumPages() >= 0;
+  }
+  // Removes all pages from memory and frees the memory, but does not forget
+  // the document metadata. Returns the memory saved.
+  int64_t UnCache();
+  // Shuffles all the pages in the document.
+  void Shuffle();
+
+private:
+  // Sets the value of total_pages_ behind a mutex.
+  void set_total_pages(int total) {
+    std::lock_guard<std::mutex> lock(general_mutex_);
+    total_pages_ = total;
+  }
+  void set_memory_used(int64_t memory_used) {
+    std::lock_guard<std::mutex> lock(general_mutex_);
+    memory_used_ = memory_used;
+  }
+  // Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
+  // starting at index pages_offset_.
+  bool ReCachePages();
+
+private:
+  // A name for this document.
+  std::string document_name_;
+  // A group of pages that corresponds in some loose way to a document.
+  std::vector<ImageData *> pages_;
+  // Page number of the first index in pages_.
+  int pages_offset_;
+  // Total number of pages in document (may exceed size of pages_.)
+  int total_pages_;
+  // Total of all pix sizes in the document.
+  int64_t memory_used_;
+  // Max memory to use at any time.
+  int64_t max_memory_;
+  // Saved reader from LoadDocument to allow re-caching.
+  FileReader reader_;
+  // Mutex that protects pages_ and pages_offset_ against multiple parallel
+  // loads, and provides a wait for page.
+  std::mutex pages_mutex_;
+  // Mutex that protects other data members that callers want to access without
+  // waiting for a load operation.
+  mutable std::mutex general_mutex_;
+
+  // Thread which loads document.
+  std::thread thread;
+};
+
+// A collection of DocumentData that knows roughly how much memory it is using.
+// Note that while it supports background read-ahead, it assumes that a single
+// thread is accessing documents, ie it is not safe for multiple threads to
+// access different documents in parallel, as one may de-cache the other's
+// content.
+class DocumentCache {
+public:
+  TESS_API
+  explicit DocumentCache(int64_t max_memory);
+  TESS_API
+  ~DocumentCache();
+
+  // Deletes all existing documents from the cache.
+  void Clear() {
+    for (auto *document : documents_) {
+      delete document;
+    }
+    documents_.clear();
+    num_pages_per_doc_ = 0;
+  }
+  // Adds all the documents in the list of filenames, counting memory.
+  // The reader is used to read the files.
+  TESS_API
+  bool LoadDocuments(const std::vector<std::string> &filenames, CachingStrategy cache_strategy,
+                     FileReader reader);
+
+  // Adds document to the cache.
+  bool AddToCache(DocumentData *data);
+
+  // Finds and returns a document by name.
+  DocumentData *FindDocument(const std::string &document_name) const;
+
+  // Returns a page by serial number using the current cache_strategy_ to
+  // determine the mapping from serial number to page.
+  const ImageData *GetPageBySerial(int serial) {
+    if (cache_strategy_ == CS_SEQUENTIAL) {
+      return GetPageSequential(serial);
+    } else {
+      return GetPageRoundRobin(serial);
+    }
+  }
+
+  const std::vector<DocumentData *> &documents() const {
+    return documents_;
+  }
+  // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
+  // strategy, could take a long time.
+  TESS_API
+  int TotalPages();
+
+private:
+  // Returns a page by serial number, selecting them in a round-robin fashion
+  // from all the documents. Highly disk-intensive, but doesn't need samples
+  // to be shuffled between files to begin with.
+  TESS_API
+  const ImageData *GetPageRoundRobin(int serial);
+  // Returns a page by serial number, selecting them in sequence from each file.
+  // Requires the samples to be shuffled between the files to give a random or
+  // uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
+  TESS_API
+  const ImageData *GetPageSequential(int serial);
+
+  // Helper counts the number of adjacent cached neighbour documents_ of index
+  // looking in direction dir, ie index+dir, index+2*dir etc.
+  int CountNeighbourDocs(int index, int dir);
+
+  // A group of pages that corresponds in some loose way to a document.
+  std::vector<DocumentData *> documents_;
+  // Strategy to use for caching and serializing data samples.
+  CachingStrategy cache_strategy_ = CS_SEQUENTIAL;
+  // Number of pages in the first document, used as a divisor in
+  // GetPageSequential to determine the document index.
+  int num_pages_per_doc_ = 0;
+  // Max memory allowed in this cache.
+  int64_t max_memory_ = 0;
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_IMAGE_IMAGEDATA_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/linlsq.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/linlsq.cpp
@ -0,0 +1,259 @@
+/**********************************************************************
+ * File:        linlsq.cpp  (Formerly llsq.c)
+ * Description: Linear Least squares fitting code.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "linlsq.h"
+#include <cmath> // for std::sqrt
+#include <cstdio>
+#include "errcode.h"
+
+namespace tesseract {
+
+constexpr ERRCODE EMPTY_LLSQ("Can't delete from an empty LLSQ");
+
+/**********************************************************************
+ * LLSQ::clear
+ *
+ * Function to initialize a LLSQ.
+ **********************************************************************/
+
+void LLSQ::clear() {  // initialize
+  total_weight = 0.0; // no elements
+  sigx = 0.0;         // update accumulators
+  sigy = 0.0;
+  sigxx = 0.0;
+  sigxy = 0.0;
+  sigyy = 0.0;
+}
+
+/**********************************************************************
+ * LLSQ::add
+ *
+ * Add an element to the accumulator.
+ **********************************************************************/
+
+void LLSQ::add(double x, double y) { // add an element
+  total_weight++;                    // count elements
+  sigx += x;                         // update accumulators
+  sigy += y;
+  sigxx += x * x;
+  sigxy += x * y;
+  sigyy += y * y;
+}
+// Adds an element with a specified weight.
+void LLSQ::add(double x, double y, double weight) {
+  total_weight += weight;
+  sigx += x * weight; // update accumulators
+  sigy += y * weight;
+  sigxx += x * x * weight;
+  sigxy += x * y * weight;
+  sigyy += y * y * weight;
+}
+// Adds a whole LLSQ.
+void LLSQ::add(const LLSQ &other) {
+  total_weight += other.total_weight;
+  sigx += other.sigx; // update accumulators
+  sigy += other.sigy;
+  sigxx += other.sigxx;
+  sigxy += other.sigxy;
+  sigyy += other.sigyy;
+}
+
+/**********************************************************************
+ * LLSQ::remove
+ *
+ * Delete an element from the acculuator.
+ **********************************************************************/
+
+void LLSQ::remove(double x, double y) { // delete an element
+  if (total_weight <= 0.0) {            // illegal
+    EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
+  }
+  total_weight--; // count elements
+  sigx -= x;      // update accumulators
+  sigy -= y;
+  sigxx -= x * x;
+  sigxy -= x * y;
+  sigyy -= y * y;
+}
+
+/**********************************************************************
+ * LLSQ::m
+ *
+ * Return the gradient of the line fit.
+ **********************************************************************/
+
+double LLSQ::m() const { // get gradient
+  double covar = covariance();
+  double x_var = x_variance();
+  if (x_var != 0.0) {
+    return covar / x_var;
+  } else {
+    return 0.0; // too little
+  }
+}
+
+/**********************************************************************
+ * LLSQ::c
+ *
+ * Return the constant of the line fit.
+ **********************************************************************/
+
+double LLSQ::c(double m) const { // get constant
+  if (total_weight > 0.0) {
+    return (sigy - m * sigx) / total_weight;
+  } else {
+    return 0; // too little
+  }
+}
+
+/**********************************************************************
+ * LLSQ::rms
+ *
+ * Return the rms error of the fit.
+ **********************************************************************/
+
+double LLSQ::rms(double m, double c) const { // get error
+  double error;                              // total error
+
+  if (total_weight > 0) {
+    error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (total_weight * c - 2 * sigy);
+    if (error >= 0) {
+      error = std::sqrt(error / total_weight); // sqrt of mean
+    } else {
+      error = 0;
+    }
+  } else {
+    error = 0; // too little
+  }
+  return error;
+}
+
+/**********************************************************************
+ * LLSQ::pearson
+ *
+ * Return the pearson product moment correlation coefficient.
+ **********************************************************************/
+
+double LLSQ::pearson() const { // get correlation
+  double r = 0.0;              // Correlation is 0 if insufficient data.
+
+  double covar = covariance();
+  if (covar != 0.0) {
+    double var_product = x_variance() * y_variance();
+    if (var_product > 0.0) {
+      r = covar / std::sqrt(var_product);
+    }
+  }
+  return r;
+}
+
+// Returns the x,y means as an FCOORD.
+FCOORD LLSQ::mean_point() const {
+  if (total_weight > 0.0) {
+    return FCOORD(sigx / total_weight, sigy / total_weight);
+  } else {
+    return FCOORD(0.0f, 0.0f);
+  }
+}
+
+// Returns the sqrt of the mean squared error measured perpendicular from the
+// line through mean_point() in the direction dir.
+//
+// Derivation:
+//   Lemma:  Let v and x_i (i=1..N) be a k-dimensional vectors (1xk matrices).
+//     Let % be dot product and ' be transpose.  Note that:
+//      Sum[i=1..N] (v % x_i)^2
+//         = v * [x_1' x_2' ... x_N'] * [x_1' x_2' .. x_N']' * v'
+//     If x_i have average 0 we have:
+//       = v * (N * COVARIANCE_MATRIX(X)) * v'
+//     Expanded for the case that k = 2, where we treat the dimensions
+//     as x_i and y_i, this is:
+//       = v * (N * [VAR(X), COV(X,Y); COV(X,Y) VAR(Y)]) * v'
+//  Now, we are trying to calculate the mean squared error, where v is
+//  perpendicular to our line of interest:
+//    Mean squared error
+//      = E [ (v % (x_i - x_avg))) ^2 ]
+//      = Sum (v % (x_i - x_avg))^2 / N
+//      = v * N * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] / N * v'
+//      = v * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] * v'
+//      = code below
+double LLSQ::rms_orth(const FCOORD &dir) const {
+  FCOORD v = !dir;
+  v.normalise();
+  return std::sqrt(x_variance() * v.x() * v.x() + 2 * covariance() * v.x() * v.y() +
+                   y_variance() * v.y() * v.y());
+}
+
+// Returns the direction of the fitted line as a unit vector, using the
+// least mean squared perpendicular distance. The line runs through the
+// mean_point, i.e. a point p on the line is given by:
+// p = mean_point() + lambda * vector_fit() for some real number lambda.
+// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
+// and may be negated without changing its meaning.
+// Fitting a line m + 𝜆v to a set of N points Pi = (xi, yi), where
+// m is the mean point (𝝁, 𝝂) and
+// v is the direction vector (cos𝜃, sin𝜃)
+// The perpendicular distance of each Pi from the line is:
+// (Pi - m) x v, where x is the scalar cross product.
+// Total squared error is thus:
+// E = ∑((xi - 𝝁)sin𝜃 - (yi - 𝝂)cos𝜃)²
+//   = ∑(xi - 𝝁)²sin²𝜃  - 2∑(xi - 𝝁)(yi - 𝝂)sin𝜃 cos𝜃 + ∑(yi - 𝝂)²cos²𝜃
+//   = NVar(xi)sin²𝜃  - 2NCovar(xi, yi)sin𝜃 cos𝜃  + NVar(yi)cos²𝜃   (Eq 1)
+// where Var(xi) is the variance of xi,
+// and Covar(xi, yi) is the covariance of xi, yi.
+// Taking the derivative wrt 𝜃 and setting to 0 to obtain the min/max:
+// 0 = 2NVar(xi)sin𝜃 cos𝜃 -2NCovar(xi, yi)(cos²𝜃 - sin²𝜃) -2NVar(yi)sin𝜃 cos𝜃
+// => Covar(xi, yi)(cos²𝜃 - sin²𝜃) = (Var(xi) - Var(yi))sin𝜃 cos𝜃
+// Using double angles:
+// 2Covar(xi, yi)cos2𝜃 = (Var(xi) - Var(yi))sin2𝜃   (Eq 2)
+// So 𝜃 = 0.5 atan2(2Covar(xi, yi), Var(xi) - Var(yi)) (Eq 3)
+
+// Because it involves 2𝜃 , Eq 2 has 2 solutions 90 degrees apart, but which
+// is the min and which is the max? From Eq1:
+// E/N = Var(xi)sin²𝜃  - 2Covar(xi, yi)sin𝜃 cos𝜃  + Var(yi)cos²𝜃
+// and 90 degrees away, using sin/cos equivalences:
+// E'/N = Var(xi)cos²𝜃  + 2Covar(xi, yi)sin𝜃 cos𝜃  + Var(yi)sin²𝜃
+// The second error is smaller (making it the minimum) iff
+// E'/N < E/N ie:
+// (Var(xi) - Var(yi))(cos²𝜃 - sin²𝜃) < -4Covar(xi, yi)sin𝜃 cos𝜃
+// Using double angles:
+// (Var(xi) - Var(yi))cos2𝜃  < -2Covar(xi, yi)sin2𝜃  (InEq 1)
+// But atan2(2Covar(xi, yi), Var(xi) - Var(yi)) picks 2𝜃  such that:
+// sgn(cos2𝜃) = sgn(Var(xi) - Var(yi)) and sgn(sin2𝜃) = sgn(Covar(xi, yi))
+// so InEq1 can *never* be true, making the atan2 result *always* the min!
+// In the degenerate case, where Covar(xi, yi) = 0 AND Var(xi) = Var(yi),
+// the 2 solutions have equal error and the inequality is still false.
+// Therefore the solution really is as trivial as Eq 3.
+
+// This is equivalent to returning the Principal Component in PCA, or the
+// eigenvector corresponding to the largest eigenvalue in the covariance
+// matrix.  However, atan2 is much simpler! The one reference I found that
+// uses this formula is http://web.mit.edu/18.06/www/Essays/tlsfit.pdf but
+// that is still a much more complex derivation. It seems Pearson had already
+// found this simple solution in 1901.
+// http://books.google.com/books?id=WXwvAQAAIAAJ&pg=PA559
+FCOORD LLSQ::vector_fit() const {
+  double x_var = x_variance();
+  double y_var = y_variance();
+  double covar = covariance();
+  double theta = 0.5 * atan2(2.0 * covar, x_var - y_var);
+  FCOORD result(cos(theta), sin(theta));
+  return result;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/linlsq.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/linlsq.h
@ -0,0 +1,142 @@
+/**********************************************************************
+ * File:        linlsq.h  (Formerly llsq.h)
+ * Description: Linear Least squares fitting code.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCSTRUCT_LINLSQ_H_
+#define TESSERACT_CCSTRUCT_LINLSQ_H_
+
+#include "points.h" // for FCOORD
+
+#include <algorithm> // for std::nth_element
+#include <cstdint> // for int32_t
+
+namespace tesseract {
+
+class TESS_API LLSQ {
+public:
+  LLSQ() {   // constructor
+    clear(); // set to zeros
+  }
+  void clear(); // initialize
+
+  // Adds an element with a weight of 1.
+  void add(double x, double y);
+  // Adds an element with a specified weight.
+  void add(double x, double y, double weight);
+  // Adds a whole LLSQ.
+  void add(const LLSQ &other);
+  // Deletes an element with a weight of 1.
+  void remove(double x, double y);
+  int32_t count() const { // no of elements
+    return static_cast<int>(total_weight + 0.5);
+  }
+
+  double m() const;                     // get gradient
+  double c(double m) const;             // get constant
+  double rms(double m, double c) const; // get error
+  double pearson() const;               // get correlation coefficient.
+
+  // Returns the x,y means as an FCOORD.
+  FCOORD mean_point() const;
+
+  // Returns the average sum of squared perpendicular error from a line
+  // through mean_point() in the direction dir.
+  double rms_orth(const FCOORD &dir) const;
+
+  // Returns the direction of the fitted line as a unit vector, using the
+  // least mean squared perpendicular distance. The line runs through the
+  // mean_point, i.e. a point p on the line is given by:
+  // p = mean_point() + lambda * vector_fit() for some real number lambda.
+  // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
+  // and may be negated without changing its meaning, since a line is only
+  // unique to a range of pi radians.
+  // Modernists prefer to think of this as an Eigenvalue problem, but
+  // Pearson had the simple solution in 1901.
+  //
+  // Note that this is equivalent to returning the Principal Component in PCA,
+  // or the eigenvector corresponding to the largest eigenvalue in the
+  // covariance matrix.
+  FCOORD vector_fit() const;
+
+  // Returns the covariance.
+  double covariance() const {
+    if (total_weight > 0.0) {
+      return (sigxy - sigx * sigy / total_weight) / total_weight;
+    } else {
+      return 0.0;
+    }
+  }
+  double x_variance() const {
+    if (total_weight > 0.0) {
+      return (sigxx - sigx * sigx / total_weight) / total_weight;
+    } else {
+      return 0.0;
+    }
+  }
+  double y_variance() const {
+    if (total_weight > 0.0) {
+      return (sigyy - sigy * sigy / total_weight) / total_weight;
+    } else {
+      return 0.0;
+    }
+  }
+
+private:
+  double total_weight; // no of elements or sum of weights.
+  double sigx;         // sum of x
+  double sigy;         // sum of y
+  double sigxx;        // sum x squared
+  double sigxy;        // sum of xy
+  double sigyy;        // sum y squared
+};
+
+// Returns the median value of the vector, given that the values are
+// circular, with the given modulus. Values may be signed or unsigned,
+// eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi).
+// NOTE that the array is shuffled, but the time taken is linear.
+// An assumption is made that most of the values are spread over no more than
+// half the range, but wrap-around is accounted for if the median is near
+// the wrap-around point.
+// Cannot be a member of vector, as it makes heavy use of LLSQ.
+// T must be an integer or float/double type.
+template <typename T>
+T MedianOfCircularValues(T modulus, std::vector<T> &v) {
+  LLSQ stats;
+  T halfrange = static_cast<T>(modulus / 2);
+  auto num_elements = v.size();
+  for (auto i : v) {
+    stats.add(i, i + halfrange);
+  }
+  bool offset_needed = stats.y_variance() < stats.x_variance();
+  if (offset_needed) {
+    for (auto i : v) {
+      i += halfrange;
+    }
+  }
+  auto median_index = num_elements / 2;
+  std::nth_element(v.begin(), v.begin() + median_index, v.end());
+  if (offset_needed) {
+    for (auto i : v) {
+      i -= halfrange;
+    }
+  }
+  return v[median_index];
+}
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_LINLSQ_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/matrix.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/matrix.cpp
@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * File:         matrix.cpp  (Formerly matrix.c)
+ * Description:  Ratings matrix code. (Used by associator)
+ * Author:       Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1990, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+/*----------------------------------------------------------------------
+              I n c l u d e s
+----------------------------------------------------------------------*/
+#include "matrix.h"
+
+#include "ratngs.h"
+#include "tprintf.h"
+#include "unicharset.h"
+
+namespace tesseract {
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+MATRIX::~MATRIX() = default;
+
+// Returns true if there are any real classification results.
+bool MATRIX::Classified(int col, int row, int wildcard_id) const {
+  if (get(col, row) == NOT_CLASSIFIED) {
+    return false;
+  }
+  BLOB_CHOICE_IT b_it(get(col, row));
+  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+    BLOB_CHOICE *choice = b_it.data();
+    if (choice->IsClassified()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Expands the existing matrix in-place to make the band wider, without
+// losing any existing data.
+void MATRIX::IncreaseBandSize(int bandwidth) {
+  ResizeWithCopy(dimension(), bandwidth);
+}
+
+// Returns a bigger MATRIX with a new column and row in the matrix in order
+// to split the blob at the given (ind,ind) diagonal location.
+// Entries are relocated to the new MATRIX using the transformation defined
+// by MATRIX_COORD::MapForSplit.
+// Transfers the pointer data to the new MATRIX and deletes *this.
+MATRIX *MATRIX::ConsumeAndMakeBigger(int ind) {
+  int dim = dimension();
+  int band_width = bandwidth();
+  // Check to see if bandwidth needs expanding.
+  for (int col = ind; col >= 0 && col > ind - band_width; --col) {
+    if (array_[col * band_width + band_width - 1] != empty_) {
+      ++band_width;
+      break;
+    }
+  }
+  auto *result = new MATRIX(dim + 1, band_width);
+
+  for (int col = 0; col < dim; ++col) {
+    for (int row = col; row < dim && row < col + bandwidth(); ++row) {
+      MATRIX_COORD coord(col, row);
+      coord.MapForSplit(ind);
+      BLOB_CHOICE_LIST *choices = get(col, row);
+      if (choices != nullptr) {
+        // Correct matrix location on each choice.
+        BLOB_CHOICE_IT bc_it(choices);
+        for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
+          BLOB_CHOICE *choice = bc_it.data();
+          choice->set_matrix_cell(coord.col, coord.row);
+        }
+        ASSERT_HOST(coord.Valid(*result));
+        result->put(coord.col, coord.row, choices);
+      }
+    }
+  }
+  delete this;
+  return result;
+}
+
+// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
+// on the lists, but not any LanguageModelState that may be attached to the
+// BLOB_CHOICEs.
+MATRIX *MATRIX::DeepCopy() const {
+  int dim = dimension();
+  int band_width = bandwidth();
+  auto *result = new MATRIX(dim, band_width);
+  for (int col = 0; col < dim; ++col) {
+    for (int row = col; row < dim && row < col + band_width; ++row) {
+      BLOB_CHOICE_LIST *choices = get(col, row);
+      if (choices != nullptr) {
+        auto *copy_choices = new BLOB_CHOICE_LIST;
+        copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
+        result->put(col, row, copy_choices);
+      }
+    }
+  }
+  return result;
+}
+
+// Print the best guesses out of the match rating matrix.
+void MATRIX::print(const UNICHARSET &unicharset) const {
+  tprintf("Ratings Matrix (top 3 choices)\n");
+  int dim = dimension();
+  int band_width = bandwidth();
+  int row, col;
+  for (col = 0; col < dim; ++col) {
+    for (row = col; row < dim && row < col + band_width; ++row) {
+      BLOB_CHOICE_LIST *rating = this->get(col, row);
+      if (rating == NOT_CLASSIFIED) {
+        continue;
+      }
+      BLOB_CHOICE_IT b_it(rating);
+      tprintf("col=%d row=%d ", col, row);
+      for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+        tprintf("%s rat=%g cert=%g ", unicharset.id_to_unichar(b_it.data()->unichar_id()),
+                b_it.data()->rating(), b_it.data()->certainty());
+      }
+      tprintf("\n");
+    }
+    tprintf("\n");
+  }
+  tprintf("\n");
+  for (col = 0; col < dim; ++col) {
+    tprintf("\t%d", col);
+  }
+  tprintf("\n");
+  for (row = 0; row < dim; ++row) {
+    for (col = 0; col <= row; ++col) {
+      if (col == 0) {
+        tprintf("%d\t", row);
+      }
+      if (row >= col + band_width) {
+        tprintf(" \t");
+        continue;
+      }
+      BLOB_CHOICE_LIST *rating = this->get(col, row);
+      if (rating != NOT_CLASSIFIED) {
+        BLOB_CHOICE_IT b_it(rating);
+        int counter = 0;
+        for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+          tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
+          ++counter;
+          if (counter == 3) {
+            break;
+          }
+        }
+        tprintf("\t");
+      } else {
+        tprintf(" \t");
+      }
+    }
+    tprintf("\n");
+  }
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/matrix.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/matrix.h
@ -0,0 +1,728 @@
+/******************************************************************************
+ * File:         matrix.h
+ * Description:  Generic 2-d array/matrix and banded triangular matrix class.
+ * Author:       Ray Smith
+ * TODO(rays) Separate from ratings matrix, which it also contains:
+ *
+ * Description:  Ratings matrix class (specialization of banded matrix).
+ *               Segmentation search matrix of lists of BLOB_CHOICE.
+ * Author:       Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1990, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+
+#ifndef TESSERACT_CCSTRUCT_MATRIX_H_
+#define TESSERACT_CCSTRUCT_MATRIX_H_
+
+#include "errcode.h" // for ASSERT_HOST
+#include "helpers.h" // for ReverseN, ClipToRange
+#include "kdpair.h"  // for KDPairInc
+#include "points.h"  // for ICOORD
+
+#include "serialis.h" // for TFile
+
+#include <algorithm> // for max, min
+#include <cmath>     // for sqrt, fabs, isfinite
+#include <cstdint>   // for int32_t
+#include <cstdio>    // for FILE
+#include <cstring>   // for memcpy
+
+namespace tesseract {
+
+class BLOB_CHOICE_LIST;
+class UNICHARSET;
+
+#define NOT_CLASSIFIED static_cast<BLOB_CHOICE_LIST *>(nullptr)
+
+// A generic class to hold a 2-D matrix with entries of type T, but can also
+// act as a base class for other implementations, such as a triangular or
+// banded matrix.
+template <class T>
+class GENERIC_2D_ARRAY {
+public:
+  // Initializes the array size, and empty element, but cannot allocate memory
+  // for the subclasses or initialize because calls to the num_elements
+  // member will be routed to the base class implementation. Subclasses can
+  // either pass the memory in, or allocate after by calling Resize().
+  GENERIC_2D_ARRAY(int dim1, int dim2, const T &empty, T *array)
+      : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) {
+    size_allocated_ = dim1 * dim2;
+  }
+  // Original constructor for a full rectangular matrix DOES allocate memory
+  // and initialize it to empty.
+  GENERIC_2D_ARRAY(int dim1, int dim2, const T &empty) : empty_(empty), dim1_(dim1), dim2_(dim2) {
+    int new_size = dim1 * dim2;
+    array_ = new T[new_size];
+    size_allocated_ = new_size;
+    for (int i = 0; i < size_allocated_; ++i) {
+      array_[i] = empty_;
+    }
+  }
+  // Default constructor for array allocation. Use Resize to set the size.
+  GENERIC_2D_ARRAY()
+      : array_(nullptr), empty_(static_cast<T>(0)), dim1_(0), dim2_(0), size_allocated_(0) {}
+  GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY<T> &src)
+      : array_(nullptr), empty_(static_cast<T>(0)), dim1_(0), dim2_(0), size_allocated_(0) {
+    *this = src;
+  }
+  virtual ~GENERIC_2D_ARRAY() {
+    delete[] array_;
+  }
+
+  void operator=(const GENERIC_2D_ARRAY<T> &src) {
+    ResizeNoInit(src.dim1(), src.dim2());
+    int size = num_elements();
+    if (size > 0) {
+      memcpy(array_, src.array_, size * sizeof(array_[0]));
+    }
+  }
+
+  // Reallocates the array to the given size. Does not keep old data, but does
+  // not initialize the array either.
+  // The allocated memory is expanded on the end by pad, allowing deliberate
+  // access beyond the bounds of the array.
+  void ResizeNoInit(int size1, int size2, int pad = 0) {
+    int new_size = size1 * size2 + pad;
+    if (new_size > size_allocated_) {
+      delete[] array_;
+      array_ = new T[new_size];
+      size_allocated_ = new_size;
+    }
+    dim1_ = size1;
+    dim2_ = size2;
+    // Fill the padding data so it isn't uninitialized.
+    for (int i = size1 * size2; i < new_size; ++i) {
+      array_[i] = empty_;
+    }
+  }
+
+  // Reallocate the array to the given size. Does not keep old data.
+  void Resize(int size1, int size2, const T &empty) {
+    empty_ = empty;
+    ResizeNoInit(size1, size2);
+    Clear();
+  }
+
+  // Reallocate the array to the given size, keeping old data.
+  void ResizeWithCopy(int size1, int size2) {
+    if (size1 != dim1_ || size2 != dim2_) {
+      int new_size = size1 * size2;
+      T *new_array = new T[new_size];
+      for (int col = 0; col < size1; ++col) {
+        for (int row = 0; row < size2; ++row) {
+          int old_index = col * dim2() + row;
+          int new_index = col * size2 + row;
+          if (col < dim1_ && row < dim2_) {
+            new_array[new_index] = array_[old_index];
+          } else {
+            new_array[new_index] = empty_;
+          }
+        }
+      }
+      delete[] array_;
+      array_ = new_array;
+      dim1_ = size1;
+      dim2_ = size2;
+      size_allocated_ = new_size;
+    }
+  }
+
+  // Sets all the elements of the array to the empty value.
+  void Clear() {
+    int total_size = num_elements();
+    for (int i = 0; i < total_size; ++i) {
+      array_[i] = empty_;
+    }
+  }
+
+  // Writes to the given file. Returns false in case of error.
+  // Only works with bitwise-serializeable types!
+  bool Serialize(FILE *fp) const {
+    if (!SerializeSize(fp)) {
+      return false;
+    }
+    if (!tesseract::Serialize(fp, &empty_)) {
+      return false;
+    }
+    int size = num_elements();
+    return tesseract::Serialize(fp, &array_[0], size);
+  }
+
+  bool Serialize(TFile *fp) const {
+    if (!SerializeSize(fp)) {
+      return false;
+    }
+    if (!fp->Serialize(&empty_)) {
+      return false;
+    }
+    int size = num_elements();
+    return fp->Serialize(&array_[0], size);
+  }
+
+  // Reads from the given file. Returns false in case of error.
+  // Only works with bitwise-serializeable types!
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE *fp) {
+    if (!DeSerializeSize(swap, fp)) {
+      return false;
+    }
+    if (!tesseract::DeSerialize(fp, &empty_)) {
+      return false;
+    }
+    if (swap) {
+      ReverseN(&empty_, sizeof(empty_));
+    }
+    int size = num_elements();
+    if (!tesseract::DeSerialize(fp, &array_[0], size)) {
+      return false;
+    }
+    if (swap) {
+      for (int i = 0; i < size; ++i) {
+        ReverseN(&array_[i], sizeof(array_[i]));
+      }
+    }
+    return true;
+  }
+
+  bool DeSerialize(TFile *fp) {
+    return DeSerializeSize(fp) && fp->DeSerialize(&empty_) &&
+           fp->DeSerialize(&array_[0], num_elements());
+  }
+
+  // Writes to the given file. Returns false in case of error.
+  // Assumes a T::Serialize(FILE*) const function.
+  bool SerializeClasses(FILE *fp) const {
+    if (!SerializeSize(fp)) {
+      return false;
+    }
+    if (!empty_.Serialize(fp)) {
+      return false;
+    }
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      if (!array_[i].Serialize(fp)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Reads from the given file. Returns false in case of error.
+  // Assumes a T::DeSerialize(bool swap, FILE*) function.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerializeClasses(bool swap, FILE *fp) {
+    if (!DeSerializeSize(swap, fp)) {
+      return false;
+    }
+    if (!empty_.DeSerialize(swap, fp)) {
+      return false;
+    }
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      if (!array_[i].DeSerialize(swap, fp)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Provide the dimensions of this rectangular matrix.
+  int dim1() const {
+    return dim1_;
+  }
+  int dim2() const {
+    return dim2_;
+  }
+  // Returns the number of elements in the array.
+  // Banded/triangular matrices may override.
+  virtual int num_elements() const {
+    return dim1_ * dim2_;
+  }
+
+  // Expression to select a specific location in the matrix. The matrix is
+  // stored COLUMN-major, so the left-most index is the most significant.
+  // This allows [][] access to use indices in the same order as (,).
+  virtual int index(int column, int row) const {
+    return (column * dim2_ + row);
+  }
+
+  // Put a list element into the matrix at a specific location.
+  void put(ICOORD pos, const T &thing) {
+    array_[this->index(pos.x(), pos.y())] = thing;
+  }
+  void put(int column, int row, const T &thing) {
+    array_[this->index(column, row)] = thing;
+  }
+
+  // Get the item at a specified location from the matrix.
+  T get(ICOORD pos) const {
+    return array_[this->index(pos.x(), pos.y())];
+  }
+  T get(int column, int row) const {
+    return array_[this->index(column, row)];
+  }
+  // Return a reference to the element at the specified location.
+  const T &operator()(int column, int row) const {
+    return array_[this->index(column, row)];
+  }
+  T &operator()(int column, int row) {
+    return array_[this->index(column, row)];
+  }
+  // Allow access using array[column][row]. NOTE that the indices are
+  // in the same left-to-right order as the () indexing.
+  T *operator[](int column) {
+    return &array_[this->index(column, 0)];
+  }
+  const T *operator[](int column) const {
+    return &array_[this->index(column, 0)];
+  }
+
+  // Adds addend to *this, element-by-element.
+  void operator+=(const GENERIC_2D_ARRAY<T> &addend) {
+    if (dim2_ == addend.dim2_) {
+      // Faster if equal size in the major dimension.
+      int size = std::min(num_elements(), addend.num_elements());
+      for (int i = 0; i < size; ++i) {
+        array_[i] += addend.array_[i];
+      }
+    } else {
+      for (int x = 0; x < dim1_; x++) {
+        for (int y = 0; y < dim2_; y++) {
+          (*this)(x, y) += addend(x, y);
+        }
+      }
+    }
+  }
+  // Subtracts minuend from *this, element-by-element.
+  void operator-=(const GENERIC_2D_ARRAY<T> &minuend) {
+    if (dim2_ == minuend.dim2_) {
+      // Faster if equal size in the major dimension.
+      int size = std::min(num_elements(), minuend.num_elements());
+      for (int i = 0; i < size; ++i) {
+        array_[i] -= minuend.array_[i];
+      }
+    } else {
+      for (int x = 0; x < dim1_; x++) {
+        for (int y = 0; y < dim2_; y++) {
+          (*this)(x, y) -= minuend(x, y);
+        }
+      }
+    }
+  }
+  // Adds addend to all elements.
+  void operator+=(const T &addend) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] += addend;
+    }
+  }
+  // Multiplies *this by factor, element-by-element.
+  void operator*=(const T &factor) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] *= factor;
+    }
+  }
+  // Clips *this to the given range.
+  void Clip(const T &rangemin, const T &rangemax) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] = ClipToRange(array_[i], rangemin, rangemax);
+    }
+  }
+  // Returns true if all elements of *this are within the given range.
+  // Only uses operator<
+  bool WithinBounds(const T &rangemin, const T &rangemax) const {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      const T &value = array_[i];
+      if (value < rangemin || rangemax < value) {
+        return false;
+      }
+    }
+    return true;
+  }
+  // Normalize the whole array.
+  double Normalize() {
+    int size = num_elements();
+    if (size <= 0) {
+      return 0.0;
+    }
+    // Compute the mean.
+    double mean = 0.0;
+    for (int i = 0; i < size; ++i) {
+      mean += array_[i];
+    }
+    mean /= size;
+    // Subtract the mean and compute the standard deviation.
+    double sd = 0.0;
+    for (int i = 0; i < size; ++i) {
+      double normed = array_[i] - mean;
+      array_[i] = normed;
+      sd += normed * normed;
+    }
+    sd = sqrt(sd / size);
+    if (sd > 0.0) {
+      // Divide by the sd.
+      for (int i = 0; i < size; ++i) {
+        array_[i] /= sd;
+      }
+    }
+    return sd;
+  }
+
+  // Returns the maximum value of the array.
+  T Max() const {
+    int size = num_elements();
+    if (size <= 0) {
+      return empty_;
+    }
+    // Compute the max.
+    T max_value = array_[0];
+    for (int i = 1; i < size; ++i) {
+      const T &value = array_[i];
+      if (value > max_value) {
+        max_value = value;
+      }
+    }
+    return max_value;
+  }
+
+  // Returns the maximum absolute value of the array.
+  T MaxAbs() const {
+    int size = num_elements();
+    if (size <= 0) {
+      return empty_;
+    }
+    // Compute the max.
+    T max_abs = static_cast<T>(0);
+    for (int i = 0; i < size; ++i) {
+      T value = static_cast<T>(fabs(array_[i]));
+      if (value > max_abs) {
+        max_abs = value;
+      }
+    }
+    return max_abs;
+  }
+
+  // Accumulates the element-wise sums of squares of src into *this.
+  void SumSquares(const GENERIC_2D_ARRAY<T> &src, const T &decay_factor) {
+    T update_factor = 1.0 - decay_factor;
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] = array_[i] * decay_factor + update_factor * src.array_[i] * src.array_[i];
+    }
+  }
+
+  // Scales each element using the adam algorithm, ie array_[i] by
+  // sqrt(sqsum[i] + epsilon)).
+  void AdamUpdate(const GENERIC_2D_ARRAY<T> &sum, const GENERIC_2D_ARRAY<T> &sqsum,
+                  const T &epsilon) {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      array_[i] += sum.array_[i] / (sqrt(sqsum.array_[i]) + epsilon);
+    }
+  }
+
+  void AssertFinite() const {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      ASSERT_HOST(isfinite(array_[i]));
+    }
+  }
+
+  // REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a
+  // num_dims-dimensional array/tensor with dimensions given by dims, (ordered
+  // from most significant to least significant, the same as standard C arrays)
+  // and moves src_dim to dest_dim, with the initial dest_dim and any dimensions
+  // in between shifted towards the hole left by src_dim. Example:
+  // Current data content: array_=[0, 1, 2, ....119]
+  //   perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]...
+  //   but the current dimensions are irrelevant.
+  // num_dims = 4, dims=[5, 4, 3, 2]
+  // src_dim=3, dest_dim=1
+  // tensor=[[[[0, 1][2, 3][4, 5]]
+  //          [[6, 7][8, 9][10, 11]]
+  //          [[12, 13][14, 15][16, 17]]
+  //          [[18, 19][20, 21][22, 23]]]
+  //         [[[24, 25]...
+  // output dims =[5, 2, 4, 3]
+  // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
+  //                 [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
+  //                [[[24, 26, 28]...
+  // which is stored in the array_ as:
+  //   [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...]
+  // NOTE: the 2 stored matrix dimensions are simply copied from *this. To
+  // change the dimensions after the transpose, use ResizeNoInit.
+  // Higher dimensions above 2 are strictly the responsibility of the caller.
+  void RotatingTranspose(const int *dims, int num_dims, int src_dim, int dest_dim,
+                         GENERIC_2D_ARRAY<T> *result) const {
+    int max_d = std::max(src_dim, dest_dim);
+    int min_d = std::min(src_dim, dest_dim);
+    // In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the
+    // ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1]
+    // being contiguous blocks of data that will move together, and
+    // [d0, min_d -1] being replicas of the transpose operation.
+    // num_replicas represents the large dimensions unchanged by the operation.
+    // move_size represents the small dimensions unchanged by the operation.
+    // src_step represents the stride in the src between each adjacent group
+    // in the destination.
+    int num_replicas = 1, move_size = 1, src_step = 1;
+    for (int d = 0; d < min_d; ++d) {
+      num_replicas *= dims[d];
+    }
+    for (int d = max_d + 1; d < num_dims; ++d) {
+      move_size *= dims[d];
+    }
+    for (int d = src_dim + 1; d < num_dims; ++d) {
+      src_step *= dims[d];
+    }
+    if (src_dim > dest_dim) {
+      src_step *= dims[src_dim];
+    }
+    // wrap_size is the size of a single replica, being the amount that is
+    // handled num_replicas times.
+    int wrap_size = move_size;
+    for (int d = min_d; d <= max_d; ++d) {
+      wrap_size *= dims[d];
+    }
+    result->ResizeNoInit(dim1_, dim2_);
+    result->empty_ = empty_;
+    const T *src = array_;
+    T *dest = result->array_;
+    for (int replica = 0; replica < num_replicas; ++replica) {
+      for (int start = 0; start < src_step; start += move_size) {
+        for (int pos = start; pos < wrap_size; pos += src_step) {
+          memcpy(dest, src + pos, sizeof(*dest) * move_size);
+          dest += move_size;
+        }
+      }
+      src += wrap_size;
+    }
+  }
+
+  // Delete objects pointed to by array_[i].
+  void delete_matrix_pointers() {
+    int size = num_elements();
+    for (int i = 0; i < size; ++i) {
+      T matrix_cell = array_[i];
+      if (matrix_cell != empty_) {
+        delete matrix_cell;
+      }
+    }
+  }
+
+protected:
+  // Factored helper to serialize the size.
+  bool SerializeSize(FILE *fp) const {
+    uint32_t size = dim1_;
+    if (!tesseract::Serialize(fp, &size)) {
+      return false;
+    }
+    size = dim2_;
+    return tesseract::Serialize(fp, &size);
+  }
+  bool SerializeSize(TFile *fp) const {
+    uint32_t size = dim1_;
+    if (!fp->Serialize(&size)) {
+      return false;
+    }
+    size = dim2_;
+    return fp->Serialize(&size);
+  }
+  // Factored helper to deserialize the size.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerializeSize(bool swap, FILE *fp) {
+    uint32_t size1, size2;
+    if (!tesseract::DeSerialize(fp, &size1)) {
+      return false;
+    }
+    if (!tesseract::DeSerialize(fp, &size2)) {
+      return false;
+    }
+    if (swap) {
+      ReverseN(&size1, sizeof(size1));
+      ReverseN(&size2, sizeof(size2));
+    }
+    // Arbitrarily limit the number of elements to protect against bad data.
+    if (size1 > UINT16_MAX) {
+      return false;
+    }
+    if (size2 > UINT16_MAX) {
+      return false;
+    }
+    Resize(size1, size2, empty_);
+    return true;
+  }
+  bool DeSerializeSize(TFile *fp) {
+    int32_t size1, size2;
+    if (!fp->DeSerialize(&size1)) {
+      return false;
+    }
+    if (!fp->DeSerialize(&size2)) {
+      return false;
+    }
+    // Arbitrarily limit the number of elements to protect against bad data.
+    if (size1 > UINT16_MAX) {
+      return false;
+    }
+    if (size2 > UINT16_MAX) {
+      return false;
+    }
+    Resize(size1, size2, empty_);
+    return true;
+  }
+
+  T *array_;
+  T empty_;  // The unused cell.
+  int dim1_; // Size of the 1st dimension in indexing functions.
+  int dim2_; // Size of the 2nd dimension in indexing functions.
+  // The total size to which the array can be expanded before a realloc is
+  // needed. If Resize is used, memory is retained so it can be re-expanded
+  // without a further alloc, and this stores the allocated size.
+  int size_allocated_;
+};
+
+// A generic class to store a banded triangular matrix with entries of type T.
+// In this array, the nominally square matrix is dim1_ x dim1_, and dim2_ is
+// the number of bands, INCLUDING the diagonal. The storage is thus of size
+// dim1_ * dim2_ and index(col, row) = col * dim2_ + row - col, and an
+// assert will fail if row < col or row - col >= dim2.
+template <class T>
+class BandTriMatrix : public GENERIC_2D_ARRAY<T> {
+public:
+  // Allocate a piece of memory to hold a 2d-array of the given dimension.
+  // Initialize all the elements of the array to empty instead of assuming
+  // that a default constructor can be used.
+  BandTriMatrix(int dim1, int dim2, const T &empty) : GENERIC_2D_ARRAY<T>(dim1, dim2, empty) {}
+  // The default destructor will do.
+
+  // Provide the dimensions of this matrix.
+  // dimension is the size of the nominally square matrix.
+  int dimension() const {
+    return this->dim1_;
+  }
+  // bandwidth is the number of bands in the matrix, INCLUDING the diagonal.
+  int bandwidth() const {
+    return this->dim2_;
+  }
+
+  // Expression to select a specific location in the matrix. The matrix is
+  // stored COLUMN-major, so the left-most index is the most significant.
+  // This allows [][] access to use indices in the same order as (,).
+  int index(int column, int row) const override {
+    ASSERT_HOST(row >= column);
+    ASSERT_HOST(row - column < this->dim2_);
+    return column * this->dim2_ + row - column;
+  }
+
+  // Appends array2 corner-to-corner to *this, making an array of dimension
+  // equal to the sum of the individual dimensions.
+  // array2 is not destroyed, but is left empty, as all elements are moved
+  // to *this.
+  void AttachOnCorner(BandTriMatrix<T> *array2) {
+    int new_dim1 = this->dim1_ + array2->dim1_;
+    int new_dim2 = std::max(this->dim2_, array2->dim2_);
+    T *new_array = new T[new_dim1 * new_dim2];
+    for (int col = 0; col < new_dim1; ++col) {
+      for (int j = 0; j < new_dim2; ++j) {
+        int new_index = col * new_dim2 + j;
+        if (col < this->dim1_ && j < this->dim2_) {
+          new_array[new_index] = this->get(col, col + j);
+        } else if (col >= this->dim1_ && j < array2->dim2_) {
+          new_array[new_index] = array2->get(col - this->dim1_, col - this->dim1_ + j);
+          array2->put(col - this->dim1_, col - this->dim1_ + j, nullptr);
+        } else {
+          new_array[new_index] = this->empty_;
+        }
+      }
+    }
+    delete[] this->array_;
+    this->array_ = new_array;
+    this->dim1_ = new_dim1;
+    this->dim2_ = new_dim2;
+  }
+};
+
+class MATRIX : public BandTriMatrix<BLOB_CHOICE_LIST *> {
+public:
+  MATRIX(int dimension, int bandwidth)
+      : BandTriMatrix<BLOB_CHOICE_LIST *>(dimension, bandwidth, NOT_CLASSIFIED) {}
+
+  ~MATRIX() override;
+
+  // Returns true if there are any real classification results.
+  bool Classified(int col, int row, int wildcard_id) const;
+
+  // Expands the existing matrix in-place to make the band wider, without
+  // losing any existing data.
+  void IncreaseBandSize(int bandwidth);
+
+  // Returns a bigger MATRIX with a new column and row in the matrix in order
+  // to split the blob at the given (ind,ind) diagonal location.
+  // Entries are relocated to the new MATRIX using the transformation defined
+  // by MATRIX_COORD::MapForSplit.
+  // Transfers the pointer data to the new MATRIX and deletes *this.
+  MATRIX *ConsumeAndMakeBigger(int ind);
+
+  // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
+  // on the lists, but not any LanguageModelState that may be attached to the
+  // BLOB_CHOICEs.
+  MATRIX *DeepCopy() const;
+
+  // Print a shortened version of the contents of the matrix.
+  void print(const UNICHARSET &unicharset) const;
+};
+
+struct MATRIX_COORD {
+  static void Delete(void *arg) {
+    auto *c = static_cast<MATRIX_COORD *>(arg);
+    delete c;
+  }
+  // Default constructor required by GenericHeap.
+  MATRIX_COORD() : col(0), row(0) {}
+  MATRIX_COORD(int c, int r) : col(c), row(r) {}
+  ~MATRIX_COORD() = default;
+
+  bool Valid(const MATRIX &m) const {
+    return 0 <= col && col < m.dimension() && col <= row && row < col + m.bandwidth() &&
+           row < m.dimension();
+  }
+
+  // Remaps the col,row pair to split the blob at the given (ind,ind) diagonal
+  // location.
+  // Entries at (i,j) for i in [0,ind] and j in [ind,dim) move to (i,j+1),
+  // making a new row at ind.
+  // Entries at (i,j) for i in [ind+1,dim) and j in [i,dim) move to (i+i,j+1),
+  // making a new column at ind+1.
+  void MapForSplit(int ind) {
+    ASSERT_HOST(row >= col);
+    if (col > ind) {
+      ++col;
+    }
+    if (row >= ind) {
+      ++row;
+    }
+    ASSERT_HOST(row >= col);
+  }
+
+  int col;
+  int row;
+};
+
+// The MatrixCoordPair contains a MATRIX_COORD and its priority.
+using MatrixCoordPair = KDPairInc<float, MATRIX_COORD>;
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_MATRIX_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/mod128.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/mod128.cpp
@ -0,0 +1,77 @@
+/**********************************************************************
+ * File:        mod128.cpp  (Formerly dir128.c)
+ * Description: Code to convert a DIR128 to an ICOORD.
+ * Author:          Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "mod128.h"
+
+namespace tesseract {
+
+static const int16_t idirtab[] = {
+    1000, 0,    998,  49,   995,  98,   989,  146,  980,   195,  970,  242,  956,  290,   941,
+    336,  923,  382,  903,  427,  881,  471,  857,  514,   831,  555,  803,  595,  773,   634,
+    740,  671,  707,  707,  671,  740,  634,  773,  595,   803,  555,  831,  514,  857,   471,
+    881,  427,  903,  382,  923,  336,  941,  290,  956,   242,  970,  195,  980,  146,   989,
+    98,   995,  49,   998,  0,    1000, -49,  998,  -98,   995,  -146, 989,  -195, 980,   -242,
+    970,  -290, 956,  -336, 941,  -382, 923,  -427, 903,   -471, 881,  -514, 857,  -555,  831,
+    -595, 803,  -634, 773,  -671, 740,  -707, 707,  -740,  671,  -773, 634,  -803, 595,   -831,
+    555,  -857, 514,  -881, 471,  -903, 427,  -923, 382,   -941, 336,  -956, 290,  -970,  242,
+    -980, 195,  -989, 146,  -995, 98,   -998, 49,   -1000, 0,    -998, -49,  -995, -98,   -989,
+    -146, -980, -195, -970, -242, -956, -290, -941, -336,  -923, -382, -903, -427, -881,  -471,
+    -857, -514, -831, -555, -803, -595, -773, -634, -740,  -671, -707, -707, -671, -740,  -634,
+    -773, -595, -803, -555, -831, -514, -857, -471, -881,  -427, -903, -382, -923, -336,  -941,
+    -290, -956, -242, -970, -195, -980, -146, -989, -98,   -995, -49,  -998, 0,    -1000, 49,
+    -998, 98,   -995, 146,  -989, 195,  -980, 242,  -970,  290,  -956, 336,  -941, 382,   -923,
+    427,  -903, 471,  -881, 514,  -857, 555,  -831, 595,   -803, 634,  -773, 671,  -740,  707,
+    -707, 740,  -671, 773,  -634, 803,  -595, 831,  -555,  857,  -514, 881,  -471, 903,   -427,
+    923,  -382, 941,  -336, 956,  -290, 970,  -242, 980,   -195, 989,  -146, 995,  -98,   998,
+    -49};
+
+static const ICOORD *dirtab = reinterpret_cast<const ICOORD *>(idirtab);
+
+/**********************************************************************
+ * DIR128::DIR128
+ *
+ * Quantize the direction of an FCOORD to make a DIR128.
+ **********************************************************************/
+
+DIR128::DIR128(     // from fcoord
+    const FCOORD fc // vector to quantize
+) {
+  int high, low, current; // binary search
+
+  low = 0;
+  if (fc.y() == 0) {
+    if (fc.x() >= 0) {
+      dir = 0;
+    } else {
+      dir = MODULUS / 2;
+    }
+    return;
+  }
+  high = MODULUS;
+  do {
+    current = (high + low) / 2;
+    if (dirtab[current] * fc >= 0) {
+      low = current;
+    } else {
+      high = current;
+    }
+  } while (high - low > 1);
+  dir = low;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/mod128.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/mod128.h
@ -0,0 +1,90 @@
+/**********************************************************************
+ * File:        mod128.h  (Formerly dir128.h)
+ * Description: Header for class which implements modulo arithmetic.
+ * Author:          Ray Smith
+ * Created:         Tue Mar 26 17:48:13 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef MOD128_H
+#define MOD128_H
+
+#include "points.h"
+
+namespace tesseract {
+
+#define MODULUS 128   /*range of directions */
+#define DIRBITS 7     // no of bits used
+#define DIRSCALE 1000 // length of vector
+
+class DIR128 {
+public:
+  DIR128() = default;
+
+  DIR128(              // constructor
+      int16_t value) { // value to assign
+    value %= MODULUS;  // modulo arithmetic
+    if (value < 0) {
+      value += MODULUS; // done properly
+    }
+    dir = static_cast<int8_t>(value);
+  }
+  DIR128(const FCOORD fc); // quantize vector
+
+  DIR128 &operator=(   // assign of int16_t
+      int16_t value) { // value to assign
+    value %= MODULUS;  // modulo arithmetic
+    if (value < 0) {
+      value += MODULUS; // done properly
+    }
+    dir = static_cast<int8_t>(value);
+    return *this;
+  }
+  int8_t operator-(              // subtraction
+      const DIR128 &minus) const // for signed result
+  {
+    // result
+    int16_t result = dir - minus.dir;
+
+    if (result > MODULUS / 2) {
+      result -= MODULUS; // get in range
+    } else if (result < -MODULUS / 2) {
+      result += MODULUS;
+    }
+    return static_cast<int8_t>(result);
+  }
+  DIR128 operator+(            // addition
+      const DIR128 &add) const // of itself
+  {
+    DIR128 result; // sum
+
+    result = dir + add.dir; // let = do the work
+    return result;
+  }
+  DIR128 &operator+=( // same as +
+      const DIR128 &add) {
+    *this = dir + add.dir; // let = do the work
+    return *this;
+  }
+  int8_t get_dir() const { // access function
+    return dir;
+  }
+
+private:
+  int8_t dir; // a direction
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/normalis.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/normalis.cpp
@ -0,0 +1,575 @@
+/**********************************************************************
+ * File:        normalis.cpp  (Formerly denorm.c)
+ * Description: Code for the DENORM class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "normalis.h"
+
+#include <allheaders.h>
+#include "blobs.h"
+#include "helpers.h"
+#include "matrix.h"
+#include "ocrblock.h"
+#include "unicharset.h"
+#include "werd.h"
+
+#include <cfloat> // for FLT_MAX
+#include <cstdlib>
+
+namespace tesseract {
+
+// Tolerance in pixels used for baseline and xheight on non-upper/lower scripts.
+const int kSloppyTolerance = 4;
+// Final tolerance in pixels added to the computed xheight range.
+const float kFinalPixelTolerance = 0.125f;
+
+DENORM::DENORM() {
+  Init();
+}
+
+DENORM::DENORM(const DENORM &src) {
+  rotation_ = nullptr;
+  *this = src;
+}
+
+DENORM &DENORM::operator=(const DENORM &src) {
+  Clear();
+  inverse_ = src.inverse_;
+  predecessor_ = src.predecessor_;
+  pix_ = src.pix_;
+  block_ = src.block_;
+  if (src.rotation_ == nullptr) {
+    rotation_ = nullptr;
+  } else {
+    rotation_ = new FCOORD(*src.rotation_);
+  }
+  x_origin_ = src.x_origin_;
+  y_origin_ = src.y_origin_;
+  x_scale_ = src.x_scale_;
+  y_scale_ = src.y_scale_;
+  final_xshift_ = src.final_xshift_;
+  final_yshift_ = src.final_yshift_;
+  return *this;
+}
+
+DENORM::~DENORM() {
+  Clear();
+}
+
+// Initializes the denorm for a transformation. For details see the large
+// comment in normalis.h.
+// Arguments:
+// block: if not nullptr, then this is the first transformation, and
+//        block->re_rotation() needs to be used after the Denorm
+//        transformation to get back to the image coords.
+// rotation: if not nullptr, apply this rotation after translation to the
+//           origin and scaling. (Usually a classify rotation.)
+// predecessor: if not nullptr, then predecessor has been applied to the
+//              input space and needs to be undone to complete the inverse.
+// The above pointers are not owned by this DENORM and are assumed to live
+// longer than this denorm, except rotation, which is deep copied on input.
+//
+// x_origin: The x origin which will be mapped to final_xshift in the result.
+// y_origin: The y origin which will be mapped to final_yshift in the result.
+//           Added to result of row->baseline(x) if not nullptr.
+//
+// x_scale: scale factor for the x-coordinate.
+// y_scale: scale factor for the y-coordinate. Ignored if segs is given.
+// Note that these scale factors apply to the same x and y system as the
+// x-origin and y-origin apply, ie after any block rotation, but before
+// the rotation argument is applied.
+//
+// final_xshift: The x component of the final translation.
+// final_yshift: The y component of the final translation.
+void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation,
+                                const DENORM *predecessor, float x_origin, float y_origin,
+                                float x_scale, float y_scale, float final_xshift,
+                                float final_yshift) {
+  Clear();
+  block_ = block;
+  if (rotation == nullptr) {
+    rotation_ = nullptr;
+  } else {
+    rotation_ = new FCOORD(*rotation);
+  }
+  predecessor_ = predecessor;
+  x_origin_ = x_origin;
+  y_origin_ = y_origin;
+  x_scale_ = x_scale;
+  y_scale_ = y_scale;
+  final_xshift_ = final_xshift;
+  final_yshift_ = final_yshift;
+}
+
+// Helper for SetupNonLinear computes an image of shortest run-lengths from
+// the x/y edges provided.
+// Based on "A nonlinear normalization method for handprinted Kanji character
+// recognition -- line density equalization" by Hiromitsu Yamada et al.
+// Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding
+//  ______________     input x_coords and y_coords.
+// |  _________  |     <empty>
+// | |    _    | |     1, 6
+// | |   | |   | |     1, 3, 4, 6
+// | |   | |   | |     1, 3, 4, 6
+// | |   | |   | |     1, 3, 4, 6
+// | |   |_|   | |     1, 3, 4, 6
+// | |_________| |     1, 6
+// |_____________|     <empty>
+//  E 1 1 1 1 1 E
+//  m 7 7 2 7 7 m
+//  p     6     p
+//  t     7     t
+//  y           y
+// The output image contains the min of the x and y run-length (distance
+// between edges) at each coordinate in the image thus:
+//  ______________
+// |7 1_1_1_1_1 7|
+// |1|5 5 1 5 5|1|
+// |1|2 2|1|2 2|1|
+// |1|2 2|1|2 2|1|
+// |1|2 2|1|2 2|1|
+// |1|2 2|1|2 2|1|
+// |1|5_5_1_5_5|1|
+// |7_1_1_1_1_1_7|
+// Note that the input coords are all integer, so all partial pixels are dealt
+// with elsewhere. Although it is nice for outlines to be properly connected
+// and continuous, there is no requirement that they be as such, so they could
+// have been derived from a flaky source, such as greyscale.
+// This function works only within the provided box, and it is assumed that the
+// input x_coords and y_coords have already been translated to have the bottom-
+// left of box as the origin. Although an output, the minruns should have been
+// pre-initialized to be the same size as box. Each element will contain the
+// minimum of x and y run-length as shown above.
+static void ComputeRunlengthImage(const TBOX &box,
+                                  const std::vector<std::vector<int>> &x_coords,
+                                  const std::vector<std::vector<int>> &y_coords,
+                                  GENERIC_2D_ARRAY<int> *minruns) {
+  int width = box.width();
+  int height = box.height();
+  ASSERT_HOST(minruns->dim1() == width);
+  ASSERT_HOST(minruns->dim2() == height);
+  // Set a 2-d image array to the run lengths at each pixel.
+  for (int ix = 0; ix < width; ++ix) {
+    int y = 0;
+    for (auto y_coord : y_coords[ix]) {
+      int y_edge = ClipToRange(y_coord, 0, height);
+      int gap = y_edge - y;
+      // Every pixel between the last and current edge get set to the gap.
+      while (y < y_edge) {
+        (*minruns)(ix, y) = gap;
+        ++y;
+      }
+    }
+    // Pretend there is a bounding box of edges all around the image.
+    int gap = height - y;
+    while (y < height) {
+      (*minruns)(ix, y) = gap;
+      ++y;
+    }
+  }
+  // Now set the image pixels the the MIN of the x and y runlengths.
+  for (int iy = 0; iy < height; ++iy) {
+    int x = 0;
+    for (auto x_coord : x_coords[iy]) {
+      int x_edge = ClipToRange(x_coord, 0, width);
+      int gap = x_edge - x;
+      while (x < x_edge) {
+        if (gap < (*minruns)(x, iy)) {
+          (*minruns)(x, iy) = gap;
+        }
+        ++x;
+      }
+    }
+    int gap = width - x;
+    while (x < width) {
+      if (gap < (*minruns)(x, iy)) {
+        (*minruns)(x, iy) = gap;
+      }
+      ++x;
+    }
+  }
+}
+// Converts the run-length image (see above to the edge density profiles used
+// for scaling, thus:
+//  ______________
+// |7 1_1_1_1_1 7|  = 5.28
+// |1|5 5 1 5 5|1|  = 3.8
+// |1|2 2|1|2 2|1|  = 5
+// |1|2 2|1|2 2|1|  = 5
+// |1|2 2|1|2 2|1|  = 5
+// |1|2 2|1|2 2|1|  = 5
+// |1|5_5_1_5_5|1|  = 3.8
+// |7_1_1_1_1_1_7|  = 5.28
+//  6 4 4 8 4 4 6
+//  . . . . . . .
+//  2 4 4 0 4 4 2
+//  8           8
+// Each profile is the sum of the reciprocals of the pixels in the image in
+// the appropriate row or column, and these are then normalized to sum to 1.
+// On output hx, hy contain an extra element, which will eventually be used
+// to guarantee that the top/right edge of the box (and anything beyond) always
+// gets mapped to the maximum target coordinate.
+static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<int> &minruns,
+                                       std::vector<float> &hx, std::vector<float> &hy) {
+  int width = box.width();
+  int height = box.height();
+  hx.resize(width + 1);
+  hy.resize(height + 1);
+  double total = 0.0;
+  for (int iy = 0; iy < height; ++iy) {
+    for (int ix = 0; ix < width; ++ix) {
+      int run = minruns(ix, iy);
+      if (run == 0) {
+        run = 1;
+      }
+      float density = 1.0f / run;
+      hx[ix] += density;
+      hy[iy] += density;
+    }
+    total += hy[iy];
+  }
+  // Normalize each profile to sum to 1.
+  if (total > 0.0) {
+    for (int ix = 0; ix < width; ++ix) {
+      hx[ix] /= total;
+    }
+    for (int iy = 0; iy < height; ++iy) {
+      hy[iy] /= total;
+    }
+  }
+  // There is an extra element in each array, so initialize to 1.
+  hx[width] = 1.0f;
+  hy[height] = 1.0f;
+}
+
+// Sets up the DENORM to execute a non-linear transformation based on
+// preserving an even distribution of stroke edges. The transformation
+// operates only within the given box.
+// x_coords is a collection of the x-coords of vertical edges for each
+// y-coord starting at box.bottom().
+// y_coords is a collection of the y-coords of horizontal edges for each
+// x-coord starting at box.left().
+// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
+// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
+// The second-level vectors must all be sorted in ascending order.
+// See comments on the helper functions above for more details.
+void DENORM::SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
+                            float target_height, float final_xshift, float final_yshift,
+                            const std::vector<std::vector<int>> &x_coords,
+                            const std::vector<std::vector<int>> &y_coords) {
+  Clear();
+  predecessor_ = predecessor;
+  // x_map_ and y_map_ store a mapping from input x and y coordinate to output
+  // x and y coordinate, based on scaling to the supplied target_width and
+  // target_height.
+  x_map_ = new std::vector<float>;
+  y_map_ = new std::vector<float>;
+  // Set a 2-d image array to the run lengths at each pixel.
+  int width = box.width();
+  int height = box.height();
+  GENERIC_2D_ARRAY<int> minruns(width, height, 0);
+  ComputeRunlengthImage(box, x_coords, y_coords, &minruns);
+  // Edge density is the sum of the inverses of the run lengths. Compute
+  // edge density projection profiles.
+  ComputeEdgeDensityProfiles(box, minruns, *x_map_, *y_map_);
+  // Convert the edge density profiles to the coordinates by multiplying by
+  // the desired size and accumulating.
+  (*x_map_)[width] = target_width;
+  for (int x = width - 1; x >= 0; --x) {
+    (*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width;
+  }
+  (*y_map_)[height] = target_height;
+  for (int y = height - 1; y >= 0; --y) {
+    (*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height;
+  }
+  x_origin_ = box.left();
+  y_origin_ = box.bottom();
+  final_xshift_ = final_xshift;
+  final_yshift_ = final_yshift;
+}
+
+// Transforms the given coords one step forward to normalized space, without
+// using any block rotation or predecessor.
+void DENORM::LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const {
+  FCOORD src_pt(pt.x, pt.y);
+  FCOORD float_result;
+  LocalNormTransform(src_pt, &float_result);
+  transformed->x = IntCastRounded(float_result.x());
+  transformed->y = IntCastRounded(float_result.y());
+}
+void DENORM::LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const {
+  FCOORD translated(pt.x() - x_origin_, pt.y() - y_origin_);
+  if (x_map_ != nullptr && y_map_ != nullptr) {
+    int x = ClipToRange(IntCastRounded(translated.x()), 0, static_cast<int>(x_map_->size() - 1));
+    translated.set_x((*x_map_)[x]);
+    int y = ClipToRange(IntCastRounded(translated.y()), 0, static_cast<int>(y_map_->size() - 1));
+    translated.set_y((*y_map_)[y]);
+  } else {
+    translated.set_x(translated.x() * x_scale_);
+    translated.set_y(translated.y() * y_scale_);
+    if (rotation_ != nullptr) {
+      translated.rotate(*rotation_);
+    }
+  }
+  transformed->set_x(translated.x() + final_xshift_);
+  transformed->set_y(translated.y() + final_yshift_);
+}
+
+// Transforms the given coords forward to normalized space using the
+// full transformation sequence defined by the block rotation, the
+// predecessors, deepest first, and finally this. If first_norm is not nullptr,
+// then the first and deepest transformation used is first_norm, ending
+// with this, and the block rotation will not be applied.
+void DENORM::NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const {
+  FCOORD src_pt(pt.x, pt.y);
+  FCOORD float_result;
+  NormTransform(first_norm, src_pt, &float_result);
+  transformed->x = IntCastRounded(float_result.x());
+  transformed->y = IntCastRounded(float_result.y());
+}
+void DENORM::NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const {
+  FCOORD src_pt(pt);
+  if (first_norm != this) {
+    if (predecessor_ != nullptr) {
+      predecessor_->NormTransform(first_norm, pt, &src_pt);
+    } else if (block_ != nullptr) {
+      FCOORD fwd_rotation(block_->re_rotation().x(), -block_->re_rotation().y());
+      src_pt.rotate(fwd_rotation);
+    }
+  }
+  LocalNormTransform(src_pt, transformed);
+}
+
+// Transforms the given coords one step back to source space, without
+// using to any block rotation or predecessor.
+void DENORM::LocalDenormTransform(const TPOINT &pt, TPOINT *original) const {
+  FCOORD src_pt(pt.x, pt.y);
+  FCOORD float_result;
+  LocalDenormTransform(src_pt, &float_result);
+  original->x = IntCastRounded(float_result.x());
+  original->y = IntCastRounded(float_result.y());
+}
+
+void DENORM::LocalDenormTransform(const FCOORD &pt, FCOORD *original) const {
+  FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_);
+  if (x_map_ != nullptr && y_map_ != nullptr) {
+    auto pos = std::upper_bound(x_map_->begin(), x_map_->end(), rotated.x());
+    if (pos > x_map_->begin()) {
+      --pos;
+    }
+    auto x = pos - x_map_->begin();
+    original->set_x(x + x_origin_);
+    pos = std::upper_bound(y_map_->begin(), y_map_->end(), rotated.y());
+    if (pos > y_map_->begin()) {
+      --pos;
+    }
+    auto y = pos - y_map_->begin();
+    original->set_y(y + y_origin_);
+  } else {
+    if (rotation_ != nullptr) {
+      FCOORD inverse_rotation(rotation_->x(), -rotation_->y());
+      rotated.rotate(inverse_rotation);
+    }
+    original->set_x(rotated.x() / x_scale_ + x_origin_);
+    float y_scale = y_scale_;
+    original->set_y(rotated.y() / y_scale + y_origin_);
+  }
+}
+
+// Transforms the given coords all the way back to source image space using
+// the full transformation sequence defined by this and its predecessors
+// recursively, shallowest first, and finally any block re_rotation.
+// If last_denorm is not nullptr, then the last transformation used will
+// be last_denorm, and the block re_rotation will never be executed.
+void DENORM::DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const {
+  FCOORD src_pt(pt.x, pt.y);
+  FCOORD float_result;
+  DenormTransform(last_denorm, src_pt, &float_result);
+  original->x = IntCastRounded(float_result.x());
+  original->y = IntCastRounded(float_result.y());
+}
+void DENORM::DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const {
+  LocalDenormTransform(pt, original);
+  if (last_denorm != this) {
+    if (predecessor_ != nullptr) {
+      predecessor_->DenormTransform(last_denorm, *original, original);
+    } else if (block_ != nullptr) {
+      original->rotate(block_->re_rotation());
+    }
+  }
+}
+
+// Normalize a blob using blob transformations. Less accurate, but
+// more accurately copies the old way.
+void DENORM::LocalNormBlob(TBLOB *blob) const {
+  ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_));
+  blob->Move(translation);
+  if (y_scale_ != 1.0f) {
+    blob->Scale(y_scale_);
+  }
+  if (rotation_ != nullptr) {
+    blob->Rotate(*rotation_);
+  }
+  translation.set_x(IntCastRounded(final_xshift_));
+  translation.set_y(IntCastRounded(final_yshift_));
+  blob->Move(translation);
+}
+
+// Fills in the x-height range accepted by the given unichar_id, given its
+// bounding box in the usual baseline-normalized coordinates, with some
+// initial crude x-height estimate (such as word size) and this denoting the
+// transformation that was used.
+void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox,
+                          float *min_xht, float *max_xht, float *yshift) const {
+  // Default return -- accept anything.
+  *yshift = 0.0f;
+  *min_xht = 0.0f;
+  *max_xht = FLT_MAX;
+
+  if (!unicharset.top_bottom_useful()) {
+    return;
+  }
+
+  // Clip the top and bottom to the limit of normalized feature space.
+  int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
+  int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
+  // A tolerance of yscale corresponds to 1 pixel in the image.
+  double tolerance = y_scale();
+  // If the script doesn't have upper and lower-case characters, widen the
+  // tolerance to allow sloppy baseline/x-height estimates.
+  if (!unicharset.script_has_upper_lower()) {
+    tolerance = y_scale() * kSloppyTolerance;
+  }
+
+  int min_bottom, max_bottom, min_top, max_top;
+  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
+
+  // Calculate the scale factor we'll use to get to image y-pixels
+  double midx = (bbox.left() + bbox.right()) / 2.0;
+  double ydiff = (bbox.top() - bbox.bottom()) + 2.0;
+  FCOORD mid_bot(midx, bbox.bottom()), tmid_bot;
+  FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high;
+  DenormTransform(nullptr, mid_bot, &tmid_bot);
+  DenormTransform(nullptr, mid_high, &tmid_high);
+
+  // bln_y_measure * yscale = image_y_measure
+  double yscale = tmid_high.pt_to_pt_dist(tmid_bot) / ydiff;
+
+  // Calculate y-shift
+  int bln_yshift = 0, bottom_shift = 0, top_shift = 0;
+  if (bottom < min_bottom - tolerance) {
+    bottom_shift = bottom - min_bottom;
+  } else if (bottom > max_bottom + tolerance) {
+    bottom_shift = bottom - max_bottom;
+  }
+  if (top < min_top - tolerance) {
+    top_shift = top - min_top;
+  } else if (top > max_top + tolerance) {
+    top_shift = top - max_top;
+  }
+  if ((top_shift >= 0 && bottom_shift > 0) || (top_shift < 0 && bottom_shift < 0)) {
+    bln_yshift = (top_shift + bottom_shift) / 2;
+  }
+  *yshift = bln_yshift * yscale;
+
+  // To help very high cap/xheight ratio fonts accept the correct x-height,
+  // and to allow the large caps in small caps to accept the xheight of the
+  // small caps, add kBlnBaselineOffset to chars with a maximum max, and have
+  // a top already at a significantly high position.
+  if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2) {
+    max_top += kBlnBaselineOffset;
+  }
+  top -= bln_yshift;
+  int height = top - kBlnBaselineOffset;
+  double min_height = min_top - kBlnBaselineOffset - tolerance;
+  double max_height = max_top - kBlnBaselineOffset + tolerance;
+
+  // We shouldn't try calculations if the characters are very short (for example
+  // for punctuation).
+  if (min_height > kBlnXHeight / 8 && height > 0) {
+    float result = height * kBlnXHeight * yscale / min_height;
+    *max_xht = result + kFinalPixelTolerance;
+    result = height * kBlnXHeight * yscale / max_height;
+    *min_xht = result - kFinalPixelTolerance;
+  }
+}
+
+// Prints the content of the DENORM for debug purposes.
+void DENORM::Print() const {
+  if (pix_ != nullptr) {
+    tprintf("Pix dimensions %d x %d x %d\n", pixGetWidth(pix_), pixGetHeight(pix_),
+            pixGetDepth(pix_));
+  }
+  if (inverse_) {
+    tprintf("Inverse\n");
+  }
+  if (block_ && block_->re_rotation().x() != 1.0f) {
+    tprintf("Block rotation %g, %g\n", block_->re_rotation().x(), block_->re_rotation().y());
+  }
+  tprintf("Input Origin = (%g, %g)\n", x_origin_, y_origin_);
+  if (x_map_ != nullptr && y_map_ != nullptr) {
+    tprintf("x map:\n");
+    for (auto x : *x_map_) {
+      tprintf("%g ", x);
+    }
+    tprintf("\ny map:\n");
+    for (auto y : *y_map_) {
+      tprintf("%g ", y);
+    }
+    tprintf("\n");
+  } else {
+    tprintf("Scale = (%g, %g)\n", x_scale_, y_scale_);
+    if (rotation_ != nullptr) {
+      tprintf("Rotation = (%g, %g)\n", rotation_->x(), rotation_->y());
+    }
+  }
+  tprintf("Final Origin = (%g, %g)\n", final_xshift_, final_xshift_);
+  if (predecessor_ != nullptr) {
+    tprintf("Predecessor:\n");
+    predecessor_->Print();
+  }
+}
+
+// ============== Private Code ======================
+
+// Free allocated memory and clear pointers.
+void DENORM::Clear() {
+  delete x_map_;
+  x_map_ = nullptr;
+  delete y_map_;
+  y_map_ = nullptr;
+  delete rotation_;
+  rotation_ = nullptr;
+}
+
+// Setup default values.
+void DENORM::Init() {
+  inverse_ = false;
+  pix_ = nullptr;
+  block_ = nullptr;
+  rotation_ = nullptr;
+  predecessor_ = nullptr;
+  x_map_ = nullptr;
+  y_map_ = nullptr;
+  x_origin_ = 0.0f;
+  y_origin_ = 0.0f;
+  x_scale_ = 1.0f;
+  y_scale_ = 1.0f;
+  final_xshift_ = 0.0f;
+  final_yshift_ = static_cast<float>(kBlnBaselineOffset);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/normalis.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/normalis.h
@ -0,0 +1,310 @@
+/**********************************************************************
+ * File:        normalis.h  (Formerly denorm.h)
+ * Description: Code for the DENORM class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef NORMALIS_H
+#define NORMALIS_H
+
+#include "image.h"
+
+#include <tesseract/export.h>
+
+#include <vector>
+
+struct Pix;
+
+namespace tesseract {
+
+const int kBlnCellHeight = 256;    // Full-height for baseline normalization.
+const int kBlnXHeight = 128;       // x-height for baseline normalization.
+const int kBlnBaselineOffset = 64; // offset for baseline normalization.
+
+class BLOCK;
+class FCOORD;
+class TBOX;
+class UNICHARSET;
+
+struct TBLOB;
+struct TPOINT;
+
+// Possible normalization methods. Use NEGATIVE values as these also
+// double up as markers for the last sub-classifier.
+enum NormalizationMode {
+  NM_BASELINE = -3,        // The original BL normalization mode.
+  NM_CHAR_ISOTROPIC = -2,  // Character normalization but isotropic.
+  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
+};
+
+class TESS_API DENORM {
+public:
+  DENORM();
+
+  // Copying a DENORM is allowed.
+  DENORM(const DENORM &);
+  DENORM &operator=(const DENORM &);
+  ~DENORM();
+
+  // Setup the normalization transformation parameters.
+  // The normalizations applied to a blob are as follows:
+  // 1. An optional block layout rotation that was applied during layout
+  // analysis to make the textlines horizontal.
+  // 2. A normalization transformation (LocalNormTransform):
+  // Subtract the "origin"
+  // Apply an x,y scaling.
+  // Apply an optional rotation.
+  // Add back a final translation.
+  // The origin is in the block-rotated space, and is usually something like
+  // the x-middle of the word at the baseline.
+  // 3. Zero or more further normalization transformations that are applied
+  // in sequence, with a similar pattern to the first normalization transform.
+  //
+  // A DENORM holds the parameters of a single normalization, and can execute
+  // both the LocalNormTransform (a forwards normalization), and the
+  // LocalDenormTransform which is an inverse transform or de-normalization.
+  // A DENORM may point to a predecessor DENORM, which is actually the earlier
+  // normalization, so the full normalization sequence involves executing all
+  // predecessors first and then the transform in "this".
+  // Let x be image coordinates and that we have normalization classes A, B, C
+  // where we first apply A then B then C to get normalized x':
+  // x' = CBAx
+  // Then the backwards (to original coordinates) would be:
+  // x = A^-1 B^-1 C^-1 x'
+  // and A = B->predecessor_ and B = C->predecessor_
+  // NormTransform executes all predecessors recursively, and then this.
+  // NormTransform would be used to transform an image-based feature to
+  // normalized space for use in a classifier
+  // DenormTransform inverts this and then all predecessors. It can be
+  // used to get back to the original image coordinates from normalized space.
+  // The LocalNormTransform member executes just the transformation
+  // in "this" without the layout rotation or any predecessors. It would be
+  // used to run each successive normalization, eg the word normalization,
+  // and later the character normalization.
+
+  // Arguments:
+  // block: if not nullptr, then this is the first transformation, and
+  //        block->re_rotation() needs to be used after the Denorm
+  //        transformation to get back to the image coords.
+  // rotation: if not nullptr, apply this rotation after translation to the
+  //           origin and scaling. (Usually a classify rotation.)
+  // predecessor: if not nullptr, then predecessor has been applied to the
+  //              input space and needs to be undone to complete the inverse.
+  // The above pointers are not owned by this DENORM and are assumed to live
+  // longer than this denorm, except rotation, which is deep copied on input.
+  //
+  // x_origin: The x origin which will be mapped to final_xshift in the result.
+  // y_origin: The y origin which will be mapped to final_yshift in the result.
+  //           Added to result of row->baseline(x) if not nullptr.
+  //
+  // x_scale: scale factor for the x-coordinate.
+  // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
+  // Note that these scale factors apply to the same x and y system as the
+  // x-origin and y-origin apply, ie after any block rotation, but before
+  // the rotation argument is applied.
+  //
+  // final_xshift: The x component of the final translation.
+  // final_yshift: The y component of the final translation.
+  //
+  // In theory, any of the commonly used normalizations can be setup here:
+  // * Traditional baseline normalization on a word:
+  // SetupNormalization(block, nullptr, nullptr,
+  //                    box.x_middle(), baseline,
+  //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
+  //                    0, kBlnBaselineOffset);
+  // * "Numeric mode" baseline normalization on a word, in which the blobs
+  //   are positioned with the bottom as the baseline is achieved by making
+  //   a separate DENORM for each blob.
+  // SetupNormalization(block, nullptr, nullptr,
+  //                    box.x_middle(), box.bottom(),
+  //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
+  //                    0, kBlnBaselineOffset);
+  // * Anisotropic character normalization used by IntFx.
+  // SetupNormalization(nullptr, nullptr, denorm,
+  //                    centroid_x, centroid_y,
+  //                    51.2 / ry, 51.2 / rx, 128, 128);
+  // * Normalize blob height to x-height (current OSD):
+  // SetupNormalization(nullptr, &rotation, nullptr,
+  //                    box.rotational_x_middle(rotation),
+  //                    box.rotational_y_middle(rotation),
+  //                    kBlnXHeight / box.rotational_height(rotation),
+  //                    kBlnXHeight / box.rotational_height(rotation),
+  //                    0, kBlnBaselineOffset);
+  // * Secondary normalization for classification rotation (current):
+  // FCOORD rotation = block->classify_rotation();
+  // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
+  // SetupNormalization(nullptr, &rotation, denorm,
+  //                    box.rotational_x_middle(rotation),
+  //                    box.rotational_y_middle(rotation),
+  //                    target_height / box.rotational_height(rotation),
+  //                    target_height / box.rotational_height(rotation),
+  //                    0, kBlnBaselineOffset);
+  // * Proposed new normalizations for CJK: Between them there is then
+  // no need for further normalization at all, and the character fills the cell.
+  // ** Replacement for baseline normalization on a word:
+  // Scales height and width independently so that modal height and pitch
+  // fill the cell respectively.
+  // float cap_height = x_height / CCStruct::kXHeightCapRatio;
+  // SetupNormalization(block, nullptr, nullptr,
+  //                    box.x_middle(), cap_height / 2.0f,
+  //                    kBlnCellHeight / fixed_pitch,
+  //                    kBlnCellHeight / cap_height,
+  //                    0, 0);
+  // ** Secondary normalization for classification (with rotation) (proposed):
+  // Requires a simple translation to the center of the appropriate character
+  // cell, no further scaling and a simple rotation (or nothing) about the
+  // cell center.
+  // FCOORD rotation = block->classify_rotation();
+  // SetupNormalization(nullptr, &rotation, denorm,
+  //                    fixed_pitch_cell_center,
+  //                    0.0f,
+  //                    1.0f,
+  //                    1.0f,
+  //                    0, 0);
+  void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
+                          float x_origin, float y_origin, float x_scale, float y_scale,
+                          float final_xshift, float final_yshift);
+
+  // Sets up the DENORM to execute a non-linear transformation based on
+  // preserving an even distribution of stroke edges. The transformation
+  // operates only within the given box, scaling input coords within the box
+  // non-linearly to a box of target_width by target_height, with all other
+  // coords being clipped to the box edge. As with SetupNormalization above,
+  // final_xshift and final_yshift are applied after scaling, and the bottom-
+  // left of box is used as a pre-scaling origin.
+  // x_coords is a collection of the x-coords of vertical edges for each
+  // y-coord starting at box.bottom().
+  // y_coords is a collection of the y-coords of horizontal edges for each
+  // x-coord starting at box.left().
+  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
+  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
+  // The second-level vectors must all be sorted in ascending order.
+  void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
+                      float target_height, float final_xshift, float final_yshift,
+                      const std::vector<std::vector<int>> &x_coords,
+                      const std::vector<std::vector<int>> &y_coords);
+
+  // Transforms the given coords one step forward to normalized space, without
+  // using any block rotation or predecessor.
+  void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const;
+  void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const;
+  // Transforms the given coords forward to normalized space using the
+  // full transformation sequence defined by the block rotation, the
+  // predecessors, deepest first, and finally this. If first_norm is not
+  // nullptr, then the first and deepest transformation used is first_norm,
+  // ending with this, and the block rotation will not be applied.
+  void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const;
+  void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const;
+  // Transforms the given coords one step back to source space, without
+  // using to any block rotation or predecessor.
+  void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const;
+  void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const;
+  // Transforms the given coords all the way back to source image space using
+  // the full transformation sequence defined by this and its predecessors
+  // recursively, shallowest first, and finally any block re_rotation.
+  // If last_denorm is not nullptr, then the last transformation used will
+  // be last_denorm, and the block re_rotation will never be executed.
+  void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const;
+  void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const;
+
+  // Normalize a blob using blob transformations. Less accurate, but
+  // more accurately copies the old way.
+  void LocalNormBlob(TBLOB *blob) const;
+
+  // Fills in the x-height range accepted by the given unichar_id in blob
+  // coordinates, given its bounding box in the usual baseline-normalized
+  // coordinates, with some initial crude x-height estimate (such as word
+  // size) and this denoting the transformation that was used.
+  // Also returns the amount the character must have shifted up or down.
+  void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht,
+                    float *max_xht, float *yshift) const;
+
+  // Prints the content of the DENORM for debug purposes.
+  void Print() const;
+
+  Image pix() const {
+    return pix_;
+  }
+  void set_pix(Image pix) {
+    pix_ = pix;
+  }
+  bool inverse() const {
+    return inverse_;
+  }
+  void set_inverse(bool value) {
+    inverse_ = value;
+  }
+  const DENORM *RootDenorm() const {
+    if (predecessor_ != nullptr) {
+      return predecessor_->RootDenorm();
+    }
+    return this;
+  }
+  const DENORM *predecessor() const {
+    return predecessor_;
+  }
+  // Accessors - perhaps should not be needed.
+  float x_scale() const {
+    return x_scale_;
+  }
+  float y_scale() const {
+    return y_scale_;
+  }
+  const BLOCK *block() const {
+    return block_;
+  }
+  void set_block(const BLOCK *block) {
+    block_ = block;
+  }
+
+private:
+  // Free allocated memory and clear pointers.
+  void Clear();
+  // Setup default values.
+  void Init();
+
+  // Best available image.
+  Image pix_;
+  // True if the source image is white-on-black.
+  bool inverse_;
+  // Block the word came from. If not null, block->re_rotation() takes the
+  // "untransformed" coordinates even further back to the original image.
+  // Used only on the first DENORM in a chain.
+  const BLOCK *block_;
+  // Rotation to apply between translation to the origin and scaling.
+  const FCOORD *rotation_;
+  // Previous transformation in a chain.
+  const DENORM *predecessor_;
+  // Non-linear transformation maps directly from each integer offset from the
+  // origin to the corresponding x-coord. Owned by the DENORM.
+  std::vector<float> *x_map_;
+  // Non-linear transformation maps directly from each integer offset from the
+  // origin to the corresponding y-coord. Owned by the DENORM.
+  std::vector<float> *y_map_;
+  // x-coordinate to be mapped to final_xshift_ in the result.
+  float x_origin_;
+  // y-coordinate to be mapped to final_yshift_ in the result.
+  float y_origin_;
+  // Scale factors for x and y coords. Applied to pre-rotation system.
+  float x_scale_;
+  float y_scale_;
+  // Destination coords of the x_origin_ and y_origin_.
+  float final_xshift_;
+  float final_yshift_;
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrblock.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrblock.cpp
@ -0,0 +1,514 @@
+/**********************************************************************
+ * File:        ocrblock.cpp  (Formerly block.c)
+ * Description: BLOCK member functions and iterator functions.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "ocrblock.h"
+
+#include "stepblob.h"
+#include "tprintf.h"
+
+#include <cstdlib>
+#include <memory> // std::unique_ptr
+
+namespace tesseract {
+
+/**
+ * BLOCK::BLOCK
+ *
+ * Constructor for a simple rectangular block.
+ */
+BLOCK::BLOCK(const char *name,           ///< filename
+             bool prop,                  ///< proportional
+             int16_t kern,               ///< kerning
+             int16_t space,              ///< spacing
+             int16_t xmin,               ///< bottom left
+             int16_t ymin, int16_t xmax, ///< top right
+             int16_t ymax)
+    : pdblk(xmin, ymin, xmax, ymax)
+    , filename(name)
+    , re_rotation_(1.0f, 0.0f)
+    , classify_rotation_(1.0f, 0.0f)
+    , skew_(1.0f, 0.0f) {
+  ICOORDELT_IT left_it = &pdblk.leftside;
+  ICOORDELT_IT right_it = &pdblk.rightside;
+
+  proportional = prop;
+  kerning = kern;
+  spacing = space;
+  font_class = -1; // not assigned
+  cell_over_xheight_ = 2.0f;
+  pdblk.hand_poly = nullptr;
+  left_it.set_to_list(&pdblk.leftside);
+  right_it.set_to_list(&pdblk.rightside);
+  // make default box
+  left_it.add_to_end(new ICOORDELT(xmin, ymin));
+  left_it.add_to_end(new ICOORDELT(xmin, ymax));
+  right_it.add_to_end(new ICOORDELT(xmax, ymin));
+  right_it.add_to_end(new ICOORDELT(xmax, ymax));
+}
+
+/**
+ * decreasing_top_order
+ *
+ * Sort Comparator: Return <0 if row1 top < row2 top
+ */
+
+static int decreasing_top_order(const void *row1, const void *row2) {
+  return (*reinterpret_cast<ROW *const *>(row2))->bounding_box().top() -
+         (*reinterpret_cast<ROW *const *>(row1))->bounding_box().top();
+}
+
+/**
+ * BLOCK::rotate
+ *
+ * Rotate the polygon by the given rotation and recompute the bounding_box.
+ */
+void BLOCK::rotate(const FCOORD &rotation) {
+  pdblk.poly_block()->rotate(rotation);
+  pdblk.box = *pdblk.poly_block()->bounding_box();
+}
+
+// Returns the bounding box including the desired combination of upper and
+// lower noise/diacritic elements.
+TBOX BLOCK::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
+  TBOX box;
+  // This is a read-only iteration of the rows in the block.
+  ROW_IT it(const_cast<ROW_LIST *>(&rows));
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
+  }
+  return box;
+}
+
+/**
+ * BLOCK::reflect_polygon_in_y_axis
+ *
+ * Reflects the polygon in the y-axis and recompute the bounding_box.
+ * Does nothing to any contained rows/words/blobs etc.
+ */
+void BLOCK::reflect_polygon_in_y_axis() {
+  pdblk.poly_block()->reflect_in_y_axis();
+  pdblk.box = *pdblk.poly_block()->bounding_box();
+}
+
+/**
+ * BLOCK::sort_rows
+ *
+ * Order rows so that they are in order of decreasing Y coordinate
+ */
+
+void BLOCK::sort_rows() { // order on "top"
+  ROW_IT row_it(&rows);
+
+  row_it.sort(decreasing_top_order);
+}
+
+/**
+ * BLOCK::compress
+ *
+ * Delete space between the rows. (And maybe one day, compress the rows)
+ * Fill space of block from top down, left aligning rows.
+ */
+
+void BLOCK::compress() { // squash it up
+#define ROW_SPACING 5
+
+  ROW_IT row_it(&rows);
+  ROW *row;
+  ICOORD row_spacing(0, ROW_SPACING);
+
+  ICOORDELT_IT icoordelt_it;
+
+  sort_rows();
+
+  pdblk.box = TBOX(pdblk.box.topleft(), pdblk.box.topleft());
+  pdblk.box.move_bottom_edge(ROW_SPACING);
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    row = row_it.data();
+    row->move(pdblk.box.botleft() - row_spacing - row->bounding_box().topleft());
+    pdblk.box += row->bounding_box();
+  }
+
+  pdblk.leftside.clear();
+  icoordelt_it.set_to_list(&pdblk.leftside);
+  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.bottom()));
+  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.top()));
+  pdblk.rightside.clear();
+  icoordelt_it.set_to_list(&pdblk.rightside);
+  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.bottom()));
+  icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.top()));
+}
+
+/**
+ * BLOCK::check_pitch
+ *
+ * Check whether the block is fixed or prop, set the flag, and set
+ * the pitch if it is fixed.
+ */
+
+void BLOCK::check_pitch() { // check prop
+  //      tprintf("Missing FFT fixed pitch stuff!\n");
+  pitch = -1;
+}
+
+/**
+ * BLOCK::compress
+ *
+ * Compress and move in a single operation.
+ */
+
+void BLOCK::compress( // squash it up
+    const ICOORD vec  // and move
+) {
+  pdblk.box.move(vec);
+  compress();
+}
+
+/**
+ * BLOCK::print
+ *
+ * Print the info on a block
+ */
+
+void BLOCK::print( // print list of sides
+    FILE *,        ///< file to print on
+    bool dump      ///< print full detail
+) {
+  ICOORDELT_IT it = &pdblk.leftside; // iterator
+
+  pdblk.box.print();
+  tprintf("Proportional= %s\n", proportional ? "TRUE" : "FALSE");
+  tprintf("Kerning= %d\n", kerning);
+  tprintf("Spacing= %d\n", spacing);
+  tprintf("Fixed_pitch=%d\n", pitch);
+  tprintf("Filename= %s\n", filename.c_str());
+
+  if (dump) {
+    tprintf("Left side coords are:\n");
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
+    }
+    tprintf("\n");
+    tprintf("Right side coords are:\n");
+    it.set_to_list(&pdblk.rightside);
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
+    }
+    tprintf("\n");
+  }
+}
+
+/**
+ * BLOCK::operator=
+ *
+ * Assignment - duplicate the block structure, but with an EMPTY row list.
+ */
+
+BLOCK &BLOCK::operator=( // assignment
+    const BLOCK &source  // from this
+) {
+  this->ELIST_LINK::operator=(source);
+  pdblk = source.pdblk;
+  proportional = source.proportional;
+  kerning = source.kerning;
+  spacing = source.spacing;
+  filename = source.filename; // STRINGs assign ok
+  if (!rows.empty()) {
+    rows.clear();
+  }
+  re_rotation_ = source.re_rotation_;
+  classify_rotation_ = source.classify_rotation_;
+  skew_ = source.skew_;
+  return *this;
+}
+
+// This function is for finding the approximate (horizontal) distance from
+// the x-coordinate of the left edge of a symbol to the left edge of the
+// text block which contains it.  We are passed:
+//   segments - output of PB_LINE_IT::get_line() which contains x-coordinate
+//       intervals for the scan line going through the symbol's y-coordinate.
+//       Each element of segments is of the form (x()=start_x, y()=length).
+//   x - the x coordinate of the symbol we're interested in.
+//   margin - return value, the distance from x,y to the left margin of the
+//       block containing it.
+// If all segments were to the right of x, we return false and 0.
+static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
+  bool found = false;
+  *margin = 0;
+  if (segments->empty()) {
+    return found;
+  }
+  ICOORDELT_IT seg_it(segments);
+  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
+    int cur_margin = x - seg_it.data()->x();
+    if (cur_margin >= 0) {
+      if (!found) {
+        *margin = cur_margin;
+      } else if (cur_margin < *margin) {
+        *margin = cur_margin;
+      }
+      found = true;
+    }
+  }
+  return found;
+}
+
+// This function is for finding the approximate (horizontal) distance from
+// the x-coordinate of the right edge of a symbol to the right edge of the
+// text block which contains it.  We are passed:
+//   segments - output of PB_LINE_IT::get_line() which contains x-coordinate
+//       intervals for the scan line going through the symbol's y-coordinate.
+//       Each element of segments is of the form (x()=start_x, y()=length).
+//   x - the x coordinate of the symbol we're interested in.
+//   margin - return value, the distance from x,y to the right margin of the
+//       block containing it.
+// If all segments were to the left of x, we return false and 0.
+static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
+  bool found = false;
+  *margin = 0;
+  if (segments->empty()) {
+    return found;
+  }
+  ICOORDELT_IT seg_it(segments);
+  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
+    int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
+    if (cur_margin >= 0) {
+      if (!found) {
+        *margin = cur_margin;
+      } else if (cur_margin < *margin) {
+        *margin = cur_margin;
+      }
+      found = true;
+    }
+  }
+  return found;
+}
+
+// Compute the distance from the left and right ends of each row to the
+// left and right edges of the block's polyblock.  Illustration:
+//  ____________________________   _______________________
+//  |  Howdy neighbor!         |  |rectangular blocks look|
+//  |  This text is  written to|  |more like stacked pizza|
+//  |illustrate how useful poly-  |boxes.                 |
+//  |blobs  are   in -----------  ------   The    polyblob|
+//  |dealing    with|     _________     |for a BLOCK  rec-|
+//  |harder   layout|   /===========\   |ords the possibly|
+//  |issues.        |    |  _    _  |   |skewed    pseudo-|
+//  |  You  see this|    | |_| \|_| |   |rectangular      |
+//  |text is  flowed|    |      }   |   |boundary     that|
+//  |around  a  mid-|     \   ____  |   |forms the  ideal-|
+//  |cloumn portrait._____ \       /  __|ized  text margin|
+//  |  Polyblobs     exist| \    /   |from which we should|
+//  |to account for insets|  |   |   |measure    paragraph|
+//  |which make  otherwise|  -----   |indentation.        |
+//  -----------------------          ----------------------
+//
+// If we identify a drop-cap, we measure the left margin for the lines
+// below the first line relative to one space past the drop cap.  The
+// first line's margin and those past the drop cap area are measured
+// relative to the enclosing polyblock.
+//
+// TODO(rays): Before this will work well, we'll need to adjust the
+//             polyblob tighter around the text near images, as in:
+//             UNLV_AUTO:mag.3G0  page 2
+//             UNLV_AUTO:mag.3G4  page 16
+void BLOCK::compute_row_margins() {
+  if (row_list()->empty() || row_list()->singleton()) {
+    return;
+  }
+
+  // If Layout analysis was not called, default to this.
+  POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT);
+  POLY_BLOCK *pblock = &rect_block;
+  if (pdblk.poly_block() != nullptr) {
+    pblock = pdblk.poly_block();
+  }
+
+  // Step One: Determine if there is a drop-cap.
+  //           TODO(eger): Fix up drop cap code for RTL languages.
+  ROW_IT r_it(row_list());
+  ROW *first_row = r_it.data();
+  ROW *second_row = r_it.data_relative(1);
+
+  // initialize the bottom of a fictitious drop cap far above the first line.
+  int drop_cap_bottom = first_row->bounding_box().top() + first_row->bounding_box().height();
+  int drop_cap_right = first_row->bounding_box().left();
+  int mid_second_line = second_row->bounding_box().top() - second_row->bounding_box().height() / 2;
+  WERD_IT werd_it(r_it.data()->word_list()); // words of line one
+  if (!werd_it.empty()) {
+    C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
+    for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
+      TBOX bbox = cblob_it.data()->bounding_box();
+      if (bbox.bottom() <= mid_second_line) {
+        // we found a real drop cap
+        first_row->set_has_drop_cap(true);
+        if (drop_cap_bottom > bbox.bottom()) {
+          drop_cap_bottom = bbox.bottom();
+        }
+        if (drop_cap_right < bbox.right()) {
+          drop_cap_right = bbox.right();
+        }
+      }
+    }
+  }
+
+  // Step Two: Calculate the margin from the text of each row to the block
+  //           (or drop-cap) boundaries.
+  PB_LINE_IT lines(pblock);
+  r_it.set_to_list(row_list());
+  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+    ROW *row = r_it.data();
+    TBOX row_box = row->bounding_box();
+    int left_y = row->base_line(row_box.left()) + row->x_height();
+    int left_margin;
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y));
+    LeftMargin(segments_left.get(), row_box.left(), &left_margin);
+
+    if (row_box.top() >= drop_cap_bottom) {
+      int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
+      if (drop_cap_distance < 0) {
+        drop_cap_distance = 0;
+      }
+      if (drop_cap_distance < left_margin) {
+        left_margin = drop_cap_distance;
+      }
+    }
+
+    int right_y = row->base_line(row_box.right()) + row->x_height();
+    int right_margin;
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y));
+    RightMargin(segments_right.get(), row_box.right(), &right_margin);
+    row->set_lmargin(left_margin);
+    row->set_rmargin(right_margin);
+  }
+}
+
+/**********************************************************************
+ * PrintSegmentationStats
+ *
+ * Prints segmentation stats for the given block list.
+ **********************************************************************/
+
+void PrintSegmentationStats(BLOCK_LIST *block_list) {
+  int num_blocks = 0;
+  int num_rows = 0;
+  int num_words = 0;
+  int num_blobs = 0;
+  BLOCK_IT block_it(block_list);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    BLOCK *block = block_it.data();
+    ++num_blocks;
+    ROW_IT row_it(block->row_list());
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      ++num_rows;
+      ROW *row = row_it.data();
+      // Iterate over all werds in the row.
+      WERD_IT werd_it(row->word_list());
+      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
+        WERD *werd = werd_it.data();
+        ++num_words;
+        num_blobs += werd->cblob_list()->length();
+      }
+    }
+  }
+  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", num_blocks,
+          num_rows, num_words, num_blobs);
+}
+
+/**********************************************************************
+ * ExtractBlobsFromSegmentation
+ *
+ * Extracts blobs from the given block list and adds them to the output list.
+ * The block list must have been created by performing a page segmentation.
+ **********************************************************************/
+
+void ExtractBlobsFromSegmentation(BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list) {
+  C_BLOB_IT return_list_it(output_blob_list);
+  BLOCK_IT block_it(blocks);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    BLOCK *block = block_it.data();
+    ROW_IT row_it(block->row_list());
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      ROW *row = row_it.data();
+      // Iterate over all werds in the row.
+      WERD_IT werd_it(row->word_list());
+      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
+        WERD *werd = werd_it.data();
+        return_list_it.move_to_last();
+        return_list_it.add_list_after(werd->cblob_list());
+        return_list_it.move_to_last();
+        return_list_it.add_list_after(werd->rej_cblob_list());
+      }
+    }
+  }
+}
+
+/**********************************************************************
+ * RefreshWordBlobsFromNewBlobs()
+ *
+ * Refreshes the words in the block_list by using blobs in the
+ * new_blobs list.
+ * Block list must have word segmentation in it.
+ * It consumes the blobs provided in the new_blobs list. The blobs leftover in
+ * the new_blobs list after the call weren't matched to any blobs of the words
+ * in block list.
+ * The output not_found_blobs is a list of blobs from the original segmentation
+ * in the block_list for which no corresponding new blobs were found.
+ **********************************************************************/
+
+void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs,
+                                  C_BLOB_LIST *not_found_blobs) {
+  // Now iterate over all the blobs in the segmentation_block_list_, and just
+  // replace the corresponding c-blobs inside the werds.
+  BLOCK_IT block_it(block_list);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    BLOCK *block = block_it.data();
+    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
+      continue; // Don't touch non-text blocks.
+    }
+    // Iterate over all rows in the block.
+    ROW_IT row_it(block->row_list());
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      ROW *row = row_it.data();
+      // Iterate over all werds in the row.
+      WERD_IT werd_it(row->word_list());
+      WERD_LIST new_words;
+      WERD_IT new_words_it(&new_words);
+      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
+        WERD *werd = werd_it.extract();
+        WERD *new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, not_found_blobs);
+        if (new_werd) {
+          // Insert this new werd into the actual row's werd-list. Remove the
+          // existing one.
+          new_words_it.add_after_then_move(new_werd);
+          delete werd;
+        } else {
+          // Reinsert the older word back, for lack of better options.
+          // This is critical since dropping the words messes up segmentation:
+          // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
+          new_words_it.add_after_then_move(werd);
+        }
+      }
+      // Get rid of the old word list & replace it with the new one.
+      row->word_list()->clear();
+      werd_it.move_to_first();
+      werd_it.add_list_after(&new_words);
+    }
+  }
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrblock.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrblock.h
@ -0,0 +1,227 @@
+/**********************************************************************
+ * File:        ocrblock.h  (Formerly block.h)
+ * Description: Page block class definition.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef OCRBLOCK_H
+#define OCRBLOCK_H
+
+#include "ocrpara.h"
+#include "ocrrow.h"
+#include "pdblock.h"
+
+namespace tesseract {
+
+class BLOCK; // forward decl
+
+ELISTIZEH(BLOCK)
+
+class TESS_API BLOCK : public ELIST_LINK
+// page block
+{
+  friend class BLOCK_RECT_IT; // block iterator
+public:
+  BLOCK() : re_rotation_(1.0f, 0.0f), classify_rotation_(1.0f, 0.0f), skew_(1.0f, 0.0f) {}
+  BLOCK(const char *name, ///< filename
+        bool prop,        ///< proportional
+        int16_t kern,     ///< kerning
+        int16_t space,    ///< spacing
+        int16_t xmin,     ///< bottom left
+        int16_t ymin,
+        int16_t xmax, ///< top right
+        int16_t ymax);
+
+  ~BLOCK() = default;
+
+  /**
+   * set space size etc.
+   * @param prop proportional
+   * @param kern inter char size
+   * @param space inter word size
+   * @param ch_pitch pitch if fixed
+   */
+  void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch) {
+    proportional = prop;
+    kerning = static_cast<int8_t>(kern);
+    spacing = space;
+    pitch = ch_pitch;
+  }
+  /// set char size
+  void set_xheight(int32_t height) {
+    xheight = height;
+  }
+  /// set font class
+  void set_font_class(int16_t font) {
+    font_class = font;
+  }
+  /// return proportional
+  bool prop() const {
+    return proportional;
+  }
+  bool right_to_left() const {
+    return right_to_left_;
+  }
+  void set_right_to_left(bool value) {
+    right_to_left_ = value;
+  }
+  /// return pitch
+  int32_t fixed_pitch() const {
+    return pitch;
+  }
+  /// return kerning
+  int16_t kern() const {
+    return kerning;
+  }
+  /// return font class
+  int16_t font() const {
+    return font_class;
+  }
+  /// return spacing
+  int16_t space() const {
+    return spacing;
+  }
+  /// return filename
+  const char *name() const {
+    return filename.c_str();
+  }
+  /// return xheight
+  int32_t x_height() const {
+    return xheight;
+  }
+  float cell_over_xheight() const {
+    return cell_over_xheight_;
+  }
+  void set_cell_over_xheight(float ratio) {
+    cell_over_xheight_ = ratio;
+  }
+  /// get rows
+  ROW_LIST *row_list() {
+    return &rows;
+  }
+  // Compute the margins between the edges of each row and this block's
+  // polyblock, and store the results in the rows.
+  void compute_row_margins();
+
+  // get paragraphs
+  PARA_LIST *para_list() {
+    return &paras_;
+  }
+  /// get blobs
+  C_BLOB_LIST *blob_list() {
+    return &c_blobs;
+  }
+  C_BLOB_LIST *reject_blobs() {
+    return &rej_blobs;
+  }
+  FCOORD re_rotation() const {
+    return re_rotation_; // How to transform coords back to image.
+  }
+  void set_re_rotation(const FCOORD &rotation) {
+    re_rotation_ = rotation;
+  }
+  FCOORD classify_rotation() const {
+    return classify_rotation_; // Apply this before classifying.
+  }
+  void set_classify_rotation(const FCOORD &rotation) {
+    classify_rotation_ = rotation;
+  }
+  FCOORD skew() const {
+    return skew_; // Direction of true horizontal.
+  }
+  void set_skew(const FCOORD &skew) {
+    skew_ = skew;
+  }
+  const ICOORD &median_size() const {
+    return median_size_;
+  }
+  void set_median_size(int x, int y) {
+    median_size_.set_x(x);
+    median_size_.set_y(y);
+  }
+
+  Image render_mask(TBOX *mask_box) {
+    return pdblk.render_mask(re_rotation_, mask_box);
+  }
+
+  // Returns the bounding box including the desired combination of upper and
+  // lower noise/diacritic elements.
+  TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
+
+  // Reflects the polygon in the y-axis and recomputes the bounding_box.
+  // Does nothing to any contained rows/words/blobs etc.
+  void reflect_polygon_in_y_axis();
+
+  void rotate(const FCOORD &rotation);
+
+  /// decreasing y order
+  void sort_rows();
+
+  /// shrink white space
+  void compress();
+
+  /// check proportional
+  void check_pitch();
+
+  /// shrink white space and move by vector
+  void compress(const ICOORD vec);
+
+  /// dump whole table
+  void print(FILE *fp, bool dump);
+
+  BLOCK &operator=(const BLOCK &source);
+  PDBLK pdblk; ///< Page Description Block
+
+private:
+  bool proportional = false;       ///< proportional
+  bool right_to_left_ = false;     ///< major script is right to left.
+  int8_t kerning = 0;              ///< inter blob gap
+  int16_t spacing = 0;             ///< inter word gap
+  int16_t pitch = 0;               ///< pitch of non-props
+  int16_t font_class = 0;          ///< correct font class
+  int32_t xheight = 0;             ///< height of chars
+  float cell_over_xheight_ = 0.0f; ///< Ratio of cell height to xheight.
+  std::string filename;            ///< name of block
+  ROW_LIST rows;                   ///< rows in block
+  PARA_LIST paras_;                ///< paragraphs of block
+  C_BLOB_LIST c_blobs;             ///< before textord
+  C_BLOB_LIST rej_blobs;           ///< duff stuff
+  FCOORD re_rotation_;             ///< How to transform coords back to image.
+  FCOORD classify_rotation_;       ///< Apply this before classifying.
+  FCOORD skew_;                    ///< Direction of true horizontal.
+  ICOORD median_size_;             ///< Median size of blobs.
+};
+
+// A function to print segmentation stats for the given block list.
+void PrintSegmentationStats(BLOCK_LIST *block_list);
+
+// Extracts blobs fromo the given block list and adds them to the output list.
+// The block list must have been created by performing a page segmentation.
+void ExtractBlobsFromSegmentation(BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list);
+
+// Refreshes the words in the block_list by using blobs in the
+// new_blobs list.
+// Block list must have word segmentation in it.
+// It consumes the blobs provided in the new_blobs list. The blobs leftover in
+// the new_blobs list after the call weren't matched to any blobs of the words
+// in block list.
+// The output not_found_blobs is a list of blobs from the original segmentation
+// in the block_list for which no corresponding new blobs were found.
+void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs,
+                                  C_BLOB_LIST *not_found_blobs);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrpara.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrpara.cpp
@ -0,0 +1,93 @@
+/////////////////////////////////////////////////////////////////////
+// File:        ocrpara.cpp
+// Description: OCR Paragraph Output Type
+// Author:      David Eger
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "ocrpara.h"
+
+#include "host.h" // For NearlyEqual()
+
+#include <cstdio>
+
+namespace tesseract {
+
+using tesseract::JUSTIFICATION_CENTER;
+using tesseract::JUSTIFICATION_LEFT;
+using tesseract::JUSTIFICATION_RIGHT;
+using tesseract::JUSTIFICATION_UNKNOWN;
+
+static const char *ParagraphJustificationToString(tesseract::ParagraphJustification justification) {
+  switch (justification) {
+    case JUSTIFICATION_LEFT:
+      return "LEFT";
+    case JUSTIFICATION_RIGHT:
+      return "RIGHT";
+    case JUSTIFICATION_CENTER:
+      return "CENTER";
+    default:
+      return "UNKNOWN";
+  }
+}
+
+bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const {
+  switch (justification_) {
+    case JUSTIFICATION_LEFT:
+      return NearlyEqual(lmargin + lindent, margin_ + first_indent_, tolerance_);
+    case JUSTIFICATION_RIGHT:
+      return NearlyEqual(rmargin + rindent, margin_ + first_indent_, tolerance_);
+    case JUSTIFICATION_CENTER:
+      return NearlyEqual(lindent, rindent, tolerance_ * 2);
+    default:
+      // shouldn't happen
+      return false;
+  }
+}
+
+bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const {
+  switch (justification_) {
+    case JUSTIFICATION_LEFT:
+      return NearlyEqual(lmargin + lindent, margin_ + body_indent_, tolerance_);
+    case JUSTIFICATION_RIGHT:
+      return NearlyEqual(rmargin + rindent, margin_ + body_indent_, tolerance_);
+    case JUSTIFICATION_CENTER:
+      return NearlyEqual(lindent, rindent, tolerance_ * 2);
+    default:
+      // shouldn't happen
+      return false;
+  }
+}
+
+bool ParagraphModel::Comparable(const ParagraphModel &other) const {
+  if (justification_ != other.justification_) {
+    return false;
+  }
+  if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) {
+    return true;
+  }
+  int tolerance = (tolerance_ + other.tolerance_) / 4;
+  return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) &&
+         NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, tolerance);
+}
+
+std::string ParagraphModel::ToString() const {
+  char buffer[200];
+  const char *alignment = ParagraphJustificationToString(justification_);
+  snprintf(buffer, sizeof(buffer), "margin: %d, first_indent: %d, body_indent: %d, alignment: %s",
+           margin_, first_indent_, body_indent_, alignment);
+  return std::string(buffer);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrpara.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrpara.h
@ -0,0 +1,203 @@
+/////////////////////////////////////////////////////////////////////
+// File:        ocrpara.h
+// Description: OCR Paragraph Output Type
+// Author:      David Eger
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_OCRPARA_H_
+#define TESSERACT_CCSTRUCT_OCRPARA_H_
+
+#include "elst.h"
+
+#include <tesseract/publictypes.h>
+
+namespace tesseract {
+
+class ParagraphModel;
+
+struct PARA : public ELIST_LINK {
+public:
+  PARA()
+      : model(nullptr)
+      , is_list_item(false)
+      , is_very_first_or_continuation(false)
+      , has_drop_cap(false) {}
+
+  // We do not own the model, we just reference it.
+  // model may be nullptr if there is not a good model for this paragraph.
+  const ParagraphModel *model;
+
+  bool is_list_item;
+
+  // The first paragraph on a page often lacks a first line indent, but should
+  // still be modeled by the same model as other body text paragraphs on the
+  // page.
+  bool is_very_first_or_continuation;
+
+  // Does this paragraph begin with a drop cap?
+  bool has_drop_cap;
+};
+
+ELISTIZEH(PARA)
+
+// A geometric model of paragraph indentation and alignment.
+//
+// Measurements are in pixels. The meaning of the integer arguments changes
+// depending upon the value of justification.  Distances less than or equal
+// to tolerance apart we take as "equivalent" for the purpose of model
+// matching, and in the examples below, we assume tolerance is zero.
+//
+// justification = LEFT:
+//   margin       the "ignored" margin to the left block edge.
+//   first_indent indent from the left margin to a typical first text line.
+//   body_indent  indent from the left margin of a typical body text line.
+//
+// justification = RIGHT:
+//   margin       the "ignored" margin to the right block edge.
+//   first_indent indent from the right margin to a typical first text line.
+//   body_indent  indent from the right margin of a typical body text line.
+//
+// justification = CENTER:
+//   margin       ignored
+//   first_indent ignored
+//   body_indent  ignored
+//
+//  ====== Extended example, assuming each letter is ten pixels wide: =======
+//
+// +--------------------------------+
+// |      Awesome                   | ParagraphModel(CENTER, 0, 0, 0)
+// |   Centered Title               |
+// | Paragraph Detection            |
+// |      OCR TEAM                  |
+// |  10 November 2010              |
+// |                                |
+// |  Look here, I have a paragraph.| ParagraphModel(LEFT, 0, 20, 0)
+// |This paragraph starts at the top|
+// |of the page and takes 3 lines.  |
+// |  Here I have a second paragraph| ParagraphModel(LEFT, 0, 20, 0)
+// |which indicates that the first  |
+// |paragraph is not a continuation |
+// |from a previous page, as it is  |
+// |indented just like this second  |
+// |paragraph.                      |
+// |   Here is a block quote. It    | ParagraphModel(LEFT, 30, 0, 0)
+// |   looks like the prior text    |
+// |   but it  is indented  more    |
+// |   and is fully justified.      |
+// |  So how does one deal with     | ParagraphModel(LEFT, 0, 20, 0)
+// |centered text, block quotes,    |
+// |normal paragraphs, and lists    |
+// |like what follows?              |
+// |1. Make a plan.                 | ParagraphModel(LEFT, 0, 0, 30)
+// |2. Use a heuristic, for example,| ParagraphModel(LEFT, 0, 0, 30)
+// |   looking for lines where the  |
+// |   first word of the next line  |
+// |   would fit on the previous    |
+// |   line.                        |
+// |8. Try to implement the plan in | ParagraphModel(LEFT, 0, 0, 30)
+// |   Python and try it out.       |
+// |4. Determine how to fix the     | ParagraphModel(LEFT, 0, 0, 30)
+// |   mistakes.                    |
+// |5. Repeat.                      | ParagraphModel(LEFT, 0, 0, 30)
+// |  For extra painful penalty work| ParagraphModel(LEFT, 0, 20, 0)
+// |you can try to identify source  |
+// |code.  Ouch!                    |
+// +--------------------------------+
+class TESS_API ParagraphModel {
+public:
+  ParagraphModel(tesseract::ParagraphJustification justification, int margin, int first_indent,
+                 int body_indent, int tolerance)
+      : justification_(justification)
+      , margin_(margin)
+      , first_indent_(first_indent)
+      , body_indent_(body_indent)
+      , tolerance_(tolerance) {
+    // Make one of {first_indent, body_indent} is 0.
+    int added_margin = first_indent;
+    if (body_indent < added_margin) {
+      added_margin = body_indent;
+    }
+    margin_ += added_margin;
+    first_indent_ -= added_margin;
+    body_indent_ -= added_margin;
+  }
+
+  ParagraphModel()
+      : justification_(tesseract::JUSTIFICATION_UNKNOWN)
+      , margin_(0)
+      , first_indent_(0)
+      , body_indent_(0)
+      , tolerance_(0) {}
+
+  // ValidFirstLine() and ValidBodyLine() take arguments describing a text line
+  // in a block of text which we are trying to model:
+  //   lmargin, lindent:  these add up to the distance from the leftmost ink
+  //                      in the text line to the surrounding text block's left
+  //                      edge.
+  //   rmargin, rindent:  these add up to the distance from the rightmost ink
+  //                      in the text line to the surrounding text block's right
+  //                      edge.
+  // The caller determines the division between "margin" and "indent", which
+  // only actually affect whether we think the line may be centered.
+  //
+  // If the amount of whitespace matches the amount of whitespace expected on
+  // the relevant side of the line (within tolerance_) we say it matches.
+
+  // Return whether a given text line could be a first paragraph line according
+  // to this paragraph model.
+  bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const;
+
+  // Return whether a given text line could be a first paragraph line according
+  // to this paragraph model.
+  bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const;
+
+  tesseract::ParagraphJustification justification() const {
+    return justification_;
+  }
+  int margin() const {
+    return margin_;
+  }
+  int first_indent() const {
+    return first_indent_;
+  }
+  int body_indent() const {
+    return body_indent_;
+  }
+  int tolerance() const {
+    return tolerance_;
+  }
+  bool is_flush() const {
+    return (justification_ == tesseract::JUSTIFICATION_LEFT ||
+            justification_ == tesseract::JUSTIFICATION_RIGHT) &&
+           abs(first_indent_ - body_indent_) <= tolerance_;
+  }
+
+  // Return whether this model is likely to agree with the other model on most
+  // paragraphs they are marked.
+  bool Comparable(const ParagraphModel &other) const;
+
+  std::string ToString() const;
+
+private:
+  tesseract::ParagraphJustification justification_;
+  int margin_;
+  int first_indent_;
+  int body_indent_;
+  int tolerance_;
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_OCRPARA_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrrow.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrrow.cpp
@ -0,0 +1,245 @@
+/**********************************************************************
+ * File:        ocrrow.cpp  (Formerly row.c)
+ * Description: Code for the ROW class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "blobbox.h"
+#include "ocrrow.h"
+
+namespace tesseract {
+
+/**********************************************************************
+ * ROW::ROW
+ *
+ * Constructor to build a ROW. Only the stats stuff are given here.
+ * The words are added directly.
+ **********************************************************************/
+ROW::ROW(                // constructor
+    int32_t spline_size, // no of segments
+    int32_t *xstarts,    // segment boundaries
+    double *coeffs,      // coefficients
+    float x_height,      // line height
+    float ascenders,     // ascender size
+    float descenders,    // descender drop
+    int16_t kern,        // char gap
+    int16_t space        // word gap
+    )
+    : baseline(spline_size, xstarts, coeffs), para_(nullptr) {
+  kerning = kern; // just store stuff
+  spacing = space;
+  xheight = x_height;
+  ascrise = ascenders;
+  bodysize = 0.0f;
+  descdrop = descenders;
+  has_drop_cap_ = false;
+  lmargin_ = 0;
+  rmargin_ = 0;
+}
+
+/**********************************************************************
+ * ROW::ROW
+ *
+ * Constructor to build a ROW. Only the stats stuff are given here.
+ * The words are added directly.
+ **********************************************************************/
+
+ROW::ROW(           // constructor
+    TO_ROW *to_row, // source row
+    int16_t kern,   // char gap
+    int16_t space   // word gap
+    )
+    : para_(nullptr) {
+  kerning = kern; // just store stuff
+  spacing = space;
+  xheight = to_row->xheight;
+  bodysize = to_row->body_size;
+  ascrise = to_row->ascrise;
+  descdrop = to_row->descdrop;
+  baseline = to_row->baseline;
+  has_drop_cap_ = false;
+  lmargin_ = 0;
+  rmargin_ = 0;
+}
+
+// Returns the bounding box including the desired combination of upper and
+// lower noise/diacritic elements.
+TBOX ROW::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
+  TBOX box;
+  // This is a read-only iteration of the words in the row.
+  WERD_IT it(const_cast<WERD_LIST *>(&words));
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
+  }
+  return box;
+}
+
+/**********************************************************************
+ * ROW::recalc_bounding_box
+ *
+ * Set the bounding box correctly
+ **********************************************************************/
+
+void ROW::recalc_bounding_box() { // recalculate BB
+  WERD *word;                     // current word
+  WERD_IT it = &words;            // words of ROW
+  int16_t left;                   // of word
+  int16_t prev_left;              // old left
+
+  if (!it.empty()) {
+    word = it.data();
+    prev_left = word->bounding_box().left();
+    it.forward();
+    while (!it.at_first()) {
+      word = it.data();
+      left = word->bounding_box().left();
+      if (left < prev_left) {
+        it.move_to_first();
+        // words in BB order
+        it.sort(word_comparator);
+        break;
+      }
+      prev_left = left;
+      it.forward();
+    }
+  }
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    word = it.data();
+    if (it.at_first()) {
+      word->set_flag(W_BOL, true);
+    } else {
+      // not start of line
+      word->set_flag(W_BOL, false);
+    }
+    if (it.at_last()) {
+      word->set_flag(W_EOL, true);
+    } else {
+      // not end of line
+      word->set_flag(W_EOL, false);
+    }
+    // extend BB as reqd
+    bound_box += word->bounding_box();
+  }
+}
+
+/**********************************************************************
+ * ROW::move
+ *
+ * Reposition row by vector
+ **********************************************************************/
+
+void ROW::move(      // reposition row
+    const ICOORD vec // by vector
+) {
+  WERD_IT it(&words); // word iterator
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->move(vec);
+  }
+
+  bound_box.move(vec);
+  baseline.move(vec);
+}
+
+/**********************************************************************
+ * ROW::print
+ *
+ * Display members
+ **********************************************************************/
+
+void ROW::print( // print
+    FILE *fp     // file to print on
+) const {
+  tprintf("Kerning= %d\n", kerning);
+  tprintf("Spacing= %d\n", spacing);
+  bound_box.print();
+  tprintf("Xheight= %f\n", xheight);
+  tprintf("Ascrise= %f\n", ascrise);
+  tprintf("Descdrop= %f\n", descdrop);
+  tprintf("has_drop_cap= %d\n", has_drop_cap_);
+  tprintf("lmargin= %d, rmargin= %d\n", lmargin_, rmargin_);
+}
+
+/**********************************************************************
+ * ROW::plot
+ *
+ * Draw the ROW in the given colour.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void ROW::plot(              // draw it
+    ScrollView *window,      // window to draw in
+    ScrollView::Color colour // colour to draw in
+) {
+  WERD *word;          // current word
+  WERD_IT it = &words; // words of ROW
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    word = it.data();
+    word->plot(window, colour); // all in one colour
+  }
+}
+
+/**********************************************************************
+ * ROW::plot
+ *
+ * Draw the ROW in rainbow colours.
+ **********************************************************************/
+
+void ROW::plot(        // draw it
+    ScrollView *window // window to draw in
+) {
+  WERD *word;          // current word
+  WERD_IT it = &words; // words of ROW
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    word = it.data();
+    word->plot(window); // in rainbow colours
+  }
+}
+#endif // !GRAPHICS_DISABLED
+
+/**********************************************************************
+ * ROW::operator=
+ *
+ * Assign rows by duplicating the row structure but NOT the WERDLIST
+ **********************************************************************/
+
+ROW &ROW::operator=(const ROW &source) {
+  this->ELIST_LINK::operator=(source);
+  kerning = source.kerning;
+  spacing = source.spacing;
+  xheight = source.xheight;
+  bodysize = source.bodysize;
+  ascrise = source.ascrise;
+  descdrop = source.descdrop;
+  if (!words.empty()) {
+    words.clear();
+  }
+  baseline = source.baseline; // QSPLINES must do =
+  bound_box = source.bound_box;
+  has_drop_cap_ = source.has_drop_cap_;
+  lmargin_ = source.lmargin_;
+  rmargin_ = source.rmargin_;
+  para_ = source.para_;
+  return *this;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrrow.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ocrrow.h
@ -0,0 +1,176 @@
+/**********************************************************************
+ * File:        ocrrow.h  (Formerly row.h)
+ * Description: Code for the ROW class.
+ * Author:      Ray Smith
+ * Created:     Tue Oct 08 15:58:04 BST 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef OCRROW_H
+#define OCRROW_H
+
+#include "elst.h"       // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
+#include "quspline.h"   // for QSPLINE
+#include "rect.h"       // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+#include "werd.h"       // for WERD_LIST
+
+#include <cstdint> // for int16_t, int32_t
+#include <cstdio>  // for FILE
+
+namespace tesseract {
+
+class ICOORD;
+class TO_ROW;
+
+struct PARA;
+
+class ROW : public ELIST_LINK {
+  friend void tweak_row_baseline(ROW *, double, double);
+
+public:
+  ROW() = default;
+  ROW(                     // constructor
+      int32_t spline_size, // no of segments
+      int32_t *xstarts,    // segment boundaries
+      double *coeffs,      // coefficients //ascender size
+      float x_height, float ascenders,
+      float descenders, // descender size
+      int16_t kern,     // char gap
+      int16_t space);   // word gap
+  ROW(                  // constructor
+      TO_ROW *row,      // textord row
+      int16_t kern,     // char gap
+      int16_t space);   // word gap
+
+  WERD_LIST *word_list() { // get words
+    return &words;
+  }
+
+  float base_line(        // compute baseline
+      float xpos) const { // at the position
+    // get spline value
+    return static_cast<float>(baseline.y(xpos));
+  }
+  float x_height() const { // return x height
+    return xheight;
+  }
+  void set_x_height(float new_xheight) { // set x height
+    xheight = new_xheight;
+  }
+  int32_t kern() const { // return kerning
+    return kerning;
+  }
+  float body_size() const { // return body size
+    return bodysize;
+  }
+  void set_body_size(float new_size) { // set body size
+    bodysize = new_size;
+  }
+  int32_t space() const { // return spacing
+    return spacing;
+  }
+  float ascenders() const { // return size
+    return ascrise;
+  }
+  float descenders() const { // return size
+    return descdrop;
+  }
+  TBOX bounding_box() const { // return bounding box
+    return bound_box;
+  }
+  // Returns the bounding box including the desired combination of upper and
+  // lower noise/diacritic elements.
+  TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
+
+  void set_lmargin(int16_t lmargin) {
+    lmargin_ = lmargin;
+  }
+  void set_rmargin(int16_t rmargin) {
+    rmargin_ = rmargin;
+  }
+  int16_t lmargin() const {
+    return lmargin_;
+  }
+  int16_t rmargin() const {
+    return rmargin_;
+  }
+
+  void set_has_drop_cap(bool has) {
+    has_drop_cap_ = has;
+  }
+  bool has_drop_cap() const {
+    return has_drop_cap_;
+  }
+
+  void set_para(PARA *p) {
+    para_ = p;
+  }
+  PARA *para() const {
+    return para_;
+  }
+
+  void recalc_bounding_box(); // recalculate BB
+
+  void move(             // reposition row
+      const ICOORD vec); // by vector
+
+  void print(    // print
+      FILE *fp) const; // file to print on
+
+#ifndef GRAPHICS_DISABLED
+  void plot(                     // draw one
+      ScrollView *window,        // window to draw in
+      ScrollView::Color colour); // uniform colour
+  void plot(                     // draw one
+      ScrollView *window);       // in rainbow colours
+
+  void plot_baseline(             // draw the baseline
+      ScrollView *window,         // window to draw in
+      ScrollView::Color colour) { // colour to draw
+    // draw it
+    baseline.plot(window, colour);
+  }
+#endif // !GRAPHICS_DISABLED
+  ROW &operator=(const ROW &source);
+
+private:
+  // Copy constructor (currently unused, therefore private).
+  ROW(const ROW &source) = delete;
+
+  int32_t kerning;  // inter char gap
+  int32_t spacing;  // inter word gap
+  TBOX bound_box;   // bounding box
+  float xheight;    // height of line
+  float ascrise;    // size of ascenders
+  float descdrop;   //-size of descenders
+  float bodysize;   // CJK character size. (equals to
+                    // xheight+ascrise by default)
+  WERD_LIST words;  // words
+  QSPLINE baseline; // baseline spline
+
+  // These get set after blocks have been determined.
+  bool has_drop_cap_;
+  int16_t lmargin_; // Distance to left polyblock margin.
+  int16_t rmargin_; // Distance to right polyblock margin.
+
+  // This gets set during paragraph analysis.
+  PARA *para_; // Paragraph of which this row is part.
+};
+
+ELISTIZEH(ROW)
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/otsuthr.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/otsuthr.cpp
@ -0,0 +1,212 @@
+/**********************************************************************
+ * File:        otsuthr.cpp
+ * Description: Simple Otsu thresholding for binarizing images.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "otsuthr.h"
+
+#include <allheaders.h>
+#include <cstring>
+#include "helpers.h"
+#if defined(USE_OPENCL)
+#  include "openclwrapper.h" // for OpenclDevice
+#endif
+
+namespace tesseract {
+
+// Computes the Otsu threshold(s) for the given image rectangle, making one
+// for each channel. Each channel is always one byte per pixel.
+// Returns an array of threshold values and an array of hi_values, such
+// that a pixel value >threshold[channel] is considered foreground if
+// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
+// that there is no apparent foreground. At least one hi_value will not be -1.
+// The return value is the number of channels in the input image, being
+// the size of the output thresholds and hi_values arrays.
+int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
+                  std::vector<int> &hi_values) {
+  int num_channels = pixGetDepth(src_pix) / 8;
+  // Of all channels with no good hi_value, keep the best so we can always
+  // produce at least one answer.
+  int best_hi_value = 1;
+  int best_hi_index = 0;
+  bool any_good_hivalue = false;
+  double best_hi_dist = 0.0;
+  thresholds.resize(num_channels);
+  hi_values.resize(num_channels);
+
+  // only use opencl if compiled w/ OpenCL and selected device is opencl
+#ifdef USE_OPENCL
+  // all of channel 0 then all of channel 1...
+  std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
+
+  // Calculate Histogram on GPU
+  OpenclDevice od;
+  if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
+      left == 0) {
+    od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
+                        height, kHistogramSize, &histogramAllChannels[0]);
+
+    // Calculate Threshold from Histogram on cpu
+    for (int ch = 0; ch < num_channels; ++ch) {
+      thresholds[ch] = -1;
+      hi_values[ch] = -1;
+      int *histogram = &histogramAllChannels[kHistogramSize * ch];
+      int H;
+      int best_omega_0;
+      int best_t = OtsuStats(histogram, &H, &best_omega_0);
+      if (best_omega_0 == 0 || best_omega_0 == H) {
+        // This channel is empty.
+        continue;
+      }
+      // To be a convincing foreground we must have a small fraction of H
+      // or to be a convincing background we must have a large fraction of H.
+      // In between we assume this channel contains no thresholding information.
+      int hi_value = best_omega_0 < H * 0.5;
+      (*thresholds)[ch] = best_t;
+      if (best_omega_0 > H * 0.75) {
+        any_good_hivalue = true;
+        hi_values[ch] = 0;
+      } else if (best_omega_0 < H * 0.25) {
+        any_good_hivalue = true;
+        hi_values[ch] = 1;
+      } else {
+        // In case all channels are like this, keep the best of the bad lot.
+        double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
+        if (hi_dist > best_hi_dist) {
+          best_hi_dist = hi_dist;
+          best_hi_value = hi_value;
+          best_hi_index = ch;
+        }
+      }
+    }
+  } else {
+#endif
+    for (int ch = 0; ch < num_channels; ++ch) {
+      thresholds[ch] = -1;
+      hi_values[ch] = -1;
+      // Compute the histogram of the image rectangle.
+      int histogram[kHistogramSize];
+      HistogramRect(src_pix, ch, left, top, width, height, histogram);
+      int H;
+      int best_omega_0;
+      int best_t = OtsuStats(histogram, &H, &best_omega_0);
+      if (best_omega_0 == 0 || best_omega_0 == H) {
+        // This channel is empty.
+        continue;
+      }
+      // To be a convincing foreground we must have a small fraction of H
+      // or to be a convincing background we must have a large fraction of H.
+      // In between we assume this channel contains no thresholding information.
+      int hi_value = best_omega_0 < H * 0.5;
+      thresholds[ch] = best_t;
+      if (best_omega_0 > H * 0.75) {
+        any_good_hivalue = true;
+        hi_values[ch] = 0;
+      } else if (best_omega_0 < H * 0.25) {
+        any_good_hivalue = true;
+        hi_values[ch] = 1;
+      } else {
+        // In case all channels are like this, keep the best of the bad lot.
+        double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
+        if (hi_dist > best_hi_dist) {
+          best_hi_dist = hi_dist;
+          best_hi_value = hi_value;
+          best_hi_index = ch;
+        }
+      }
+    }
+#ifdef USE_OPENCL
+  }
+#endif // USE_OPENCL
+
+  if (!any_good_hivalue) {
+    // Use the best of the ones that were not good enough.
+    hi_values[best_hi_index] = best_hi_value;
+  }
+  return num_channels;
+}
+
+// Computes the histogram for the given image rectangle, and the given
+// single channel. Each channel is always one byte per pixel.
+// Histogram is always a kHistogramSize(256) element array to count
+// occurrences of each pixel value.
+void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
+                   int *histogram) {
+  int num_channels = pixGetDepth(src_pix) / 8;
+  channel = ClipToRange(channel, 0, num_channels - 1);
+  int bottom = top + height;
+  memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
+  int src_wpl = pixGetWpl(src_pix);
+  l_uint32 *srcdata = pixGetData(src_pix);
+  for (int y = top; y < bottom; ++y) {
+    const l_uint32 *linedata = srcdata + y * src_wpl;
+    for (int x = 0; x < width; ++x) {
+      int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel);
+      ++histogram[pixel];
+    }
+  }
+}
+
+// Computes the Otsu threshold(s) for the given histogram.
+// Also returns H = total count in histogram, and
+// omega0 = count of histogram below threshold.
+int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
+  int H = 0;
+  double mu_T = 0.0;
+  for (int i = 0; i < kHistogramSize; ++i) {
+    H += histogram[i];
+    mu_T += static_cast<double>(i) * histogram[i];
+  }
+
+  // Now maximize sig_sq_B over t.
+  // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
+  int best_t = -1;
+  int omega_0, omega_1;
+  int best_omega_0 = 0;
+  double best_sig_sq_B = 0.0;
+  double mu_0, mu_1, mu_t;
+  omega_0 = 0;
+  mu_t = 0.0;
+  for (int t = 0; t < kHistogramSize - 1; ++t) {
+    omega_0 += histogram[t];
+    mu_t += t * static_cast<double>(histogram[t]);
+    if (omega_0 == 0) {
+      continue;
+    }
+    omega_1 = H - omega_0;
+    if (omega_1 == 0) {
+      break;
+    }
+    mu_0 = mu_t / omega_0;
+    mu_1 = (mu_T - mu_t) / omega_1;
+    double sig_sq_B = mu_1 - mu_0;
+    sig_sq_B *= sig_sq_B * omega_0 * omega_1;
+    if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
+      best_sig_sq_B = sig_sq_B;
+      best_t = t;
+      best_omega_0 = omega_0;
+    }
+  }
+  if (H_out != nullptr) {
+    *H_out = H;
+  }
+  if (omega0_out != nullptr) {
+    *omega0_out = best_omega_0;
+  }
+  return best_t;
+}
+
+} // namespace tesseract.
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/otsuthr.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/otsuthr.h
@ -0,0 +1,58 @@
+///////////////////////////////////////////////////////////////////////
+// File:        otsuthr.h
+// Description: Simple Otsu thresholding for binarizing images.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_OTSUTHR_H_
+#define TESSERACT_CCMAIN_OTSUTHR_H_
+
+#include "image.h"
+
+#include <vector> // for std::vector
+
+struct Pix;
+
+namespace tesseract {
+
+const int kHistogramSize = 256; // The size of a histogram of pixel values.
+
+// Computes the Otsu threshold(s) for the given image rectangle, making one
+// for each channel. Each channel is always one byte per pixel.
+// Returns an array of threshold values and an array of hi_values, such
+// that a pixel value >threshold[channel] is considered foreground if
+// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
+// that there is no apparent foreground. At least one hi_value will not be -1.
+// The return value is the number of channels in the input image, being
+// the size of the output thresholds and hi_values arrays.
+int OtsuThreshold(Image src_pix, int left, int top, int width, int height,
+                  std::vector<int> &thresholds,
+                  std::vector<int> &hi_values);
+
+// Computes the histogram for the given image rectangle, and the given
+// single channel. Each channel is always one byte per pixel.
+// Histogram is always a kHistogramSize(256) element array to count
+// occurrences of each pixel value.
+void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
+                   int *histogram);
+
+// Computes the Otsu threshold(s) for the given histogram.
+// Also returns H = total count in histogram, and
+// omega0 = count of histogram below threshold.
+int OtsuStats(const int *histogram, int *H_out, int *omega0_out);
+
+} // namespace tesseract.
+
+#endif // TESSERACT_CCMAIN_OTSUTHR_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pageres.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pageres.cpp
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pageres.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pageres.h
@ -0,0 +1,798 @@
+/**********************************************************************
+ * File:        pageres.h  (Formerly page_res.h)
+ * Description: Results classes used by control.c
+ * Author:      Phil Cheatle
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef PAGERES_H
+#define PAGERES_H
+
+#include "blamer.h"     // for BlamerBundle (ptr only), IRR_NUM_REASONS
+#include "clst.h"       // for CLIST_ITERATOR, CLISTIZEH
+#include "genericvector.h" // for PointerVector
+#include "elst.h"       // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH
+#include "matrix.h"     // for MATRIX
+#include "normalis.h"   // for DENORM
+#include "ratngs.h"     // for WERD_CHOICE, BLOB_CHOICE (ptr only)
+#include "rect.h"       // for TBOX
+#include "rejctmap.h"   // for REJMAP
+#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI...
+#include "werd.h"       // for WERD, W_BOL, W_EOL
+
+#include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID
+
+#include <cstdint>     // for int32_t, int16_t
+#include <functional>  // for std::function
+#include <set>         // for std::pair
+#include <vector>      // for std::vector
+
+#include <sys/types.h> // for int8_t
+
+struct Pix;
+
+namespace tesseract {
+
+class BLOCK;
+class BLOCK_LIST;
+class BLOCK_RES;
+class ROW;
+class ROW_RES;
+class SEAM;
+class WERD_RES;
+
+struct TWERD;
+
+class BoxWord;
+class Tesseract;
+struct FontInfo;
+
+/* Forward declarations */
+
+class BLOCK_RES;
+
+ELISTIZEH(BLOCK_RES)
+CLISTIZEH(BLOCK_RES)
+class ROW_RES;
+
+ELISTIZEH(ROW_RES)
+class WERD_RES;
+
+ELISTIZEH(WERD_RES)
+
+/*************************************************************************
+ * PAGE_RES - Page results
+ *************************************************************************/
+class PAGE_RES { // page result
+public:
+  int32_t char_count;
+  int32_t rej_count;
+  BLOCK_RES_LIST block_res_list;
+  bool rejected;
+  // Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to
+  // the next word. This pointer is not owned by PAGE_RES class.
+  WERD_CHOICE **prev_word_best_choice;
+  // Sums of blame reasons computed by the blamer.
+  std::vector<int> blame_reasons;
+  // Debug information about all the misadaptions on this page.
+  // Each BlamerBundle contains an index into this vector, so that words that
+  // caused misadaption could be marked. However, since words could be
+  // deleted/split/merged, the log is stored on the PAGE_RES level.
+  std::vector<std::string> misadaption_log;
+
+  inline void Init() {
+    char_count = 0;
+    rej_count = 0;
+    rejected = false;
+    prev_word_best_choice = nullptr;
+    blame_reasons.resize(IRR_NUM_REASONS);
+  }
+
+  PAGE_RES() {
+    Init();
+  } // empty constructor
+
+  PAGE_RES(bool merge_similar_words,
+           BLOCK_LIST *block_list, // real blocks
+           WERD_CHOICE **prev_word_best_choice_ptr);
+
+  ~PAGE_RES() = default;
+};
+
+/*************************************************************************
+ * BLOCK_RES - Block results
+ *************************************************************************/
+
+class BLOCK_RES : public ELIST_LINK {
+public:
+  BLOCK *block;       // real block
+  int32_t char_count; // chars in block
+  int32_t rej_count;  // rejected chars
+  int16_t font_class; //
+  int16_t row_count;
+  float x_height;
+  bool font_assigned; // block already
+  //      processed
+
+  ROW_RES_LIST row_res_list;
+
+  BLOCK_RES() = default;
+
+  BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block
+
+  ~BLOCK_RES() = default;
+};
+
+/*************************************************************************
+ * ROW_RES - Row results
+ *************************************************************************/
+
+class ROW_RES : public ELIST_LINK {
+public:
+  ROW *row;                     // real row
+  int32_t char_count;           // chars in block
+  int32_t rej_count;            // rejected chars
+  int32_t whole_word_rej_count; // rejs in total rej wds
+  WERD_RES_LIST word_res_list;
+
+  ROW_RES() = default;
+
+  ROW_RES(bool merge_similar_words, ROW *the_row); // real row
+
+  ~ROW_RES() = default;
+};
+
+/*************************************************************************
+ * WERD_RES - Word results
+ *************************************************************************/
+enum CRUNCH_MODE { CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE };
+
+// WERD_RES is a collection of publicly accessible members that gathers
+// information about a word result.
+class TESS_API WERD_RES : public ELIST_LINK {
+public:
+  // Which word is which?
+  // There are 3 coordinate spaces in use here: a possibly rotated pixel space,
+  // the original image coordinate space, and the BLN space in which the
+  // baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight,
+  // and the x-middle of the word is at 0.
+  // In the rotated pixel space, coordinates correspond to the input image,
+  // but may be rotated about the origin by a multiple of 90 degrees,
+  // and may therefore be negative.
+  // In any case a rotation by denorm.block()->re_rotation() will take them
+  // back to the original image.
+  // The other differences between words all represent different stages of
+  // processing during recognition.
+
+  // ---------------------------INPUT-------------------------------------
+
+  // The word is the input C_BLOBs in the rotated pixel space.
+  // word is NOT owned by the WERD_RES unless combination is true.
+  // All the other word pointers ARE owned by the WERD_RES.
+  WERD *word = nullptr; // Input C_BLOB word.
+
+  // -------------SETUP BY SetupFor*Recognition---READONLY-INPUT------------
+
+  // The bln_boxes contains the bounding boxes (only) of the input word, in the
+  // BLN space. The lengths of word and bln_boxes
+  // match as they are both before any chopping.
+  // TODO(rays) determine if docqual does anything useful and delete bln_boxes
+  // if it doesn't.
+  tesseract::BoxWord *bln_boxes = nullptr; // BLN input bounding boxes.
+  // The ROW that this word sits in. NOT owned by the WERD_RES.
+  ROW *blob_row = nullptr;
+  // The denorm provides the transformation to get back to the rotated image
+  // coords from the chopped_word/rebuild_word BLN coords, but each blob also
+  // has its own denorm.
+  DENORM denorm; // For use on chopped_word.
+  // Unicharset used by the classifier output in best_choice and raw_choice.
+  const UNICHARSET *uch_set = nullptr; // For converting back to utf8.
+
+  // ----Initialized by SetupFor*Recognition---BUT OUTPUT FROM RECOGNITION----
+  // ----Setup to a (different!) state expected by the various classifiers----
+  // TODO(rays) Tidy and make more consistent.
+
+  // The chopped_word is also in BLN space, and represents the fully chopped
+  // character fragments that make up the word.
+  // The length of chopped_word matches length of seam_array + 1 (if set).
+  TWERD *chopped_word = nullptr; // BLN chopped fragments output.
+  // Vector of SEAM* holding chopping points matching chopped_word.
+  std::vector<SEAM *> seam_array;
+  // Widths of blobs in chopped_word.
+  std::vector<int> blob_widths;
+  // Gaps between blobs in chopped_word. blob_gaps[i] is the gap between
+  // blob i and blob i+1.
+  std::vector<int> blob_gaps;
+  // Stores the lstm choices of every timestep
+  std::vector<std::vector<std::pair<const char *, float>>> timesteps;
+  // Stores the lstm choices of every timestep segmented by character
+  std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
+  // Symbolchoices acquired during CTC
+  std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
+  // Stores if the timestep vector starts with a space
+  bool leading_space = false;
+  // Stores value when the word ends
+  int end = 0;
+  // Ratings matrix contains classifier choices for each classified combination
+  // of blobs. The dimension is the same as the number of blobs in chopped_word
+  // and the leading diagonal corresponds to classifier results of the blobs
+  // in chopped_word. The state_ members of best_choice, raw_choice and
+  // best_choices all correspond to this ratings matrix and allow extraction
+  // of the blob choices for any given WERD_CHOICE.
+  MATRIX *ratings = nullptr; // Owned pointer.
+  // Pointer to the first WERD_CHOICE in best_choices. This is the result that
+  // will be output from Tesseract. Note that this is now a borrowed pointer
+  // and should NOT be deleted.
+  WERD_CHOICE *best_choice = nullptr; // Borrowed pointer.
+  // The best raw_choice found during segmentation search. Differs from the
+  // best_choice by being the best result according to just the character
+  // classifier, not taking any language model information into account.
+  // Unlike best_choice, the pointer IS owned by this WERD_RES.
+  WERD_CHOICE *raw_choice = nullptr; // Owned pointer.
+  // Alternative results found during chopping/segmentation search stages.
+  // Note that being an ELIST, best_choices owns the WERD_CHOICEs.
+  WERD_CHOICE_LIST best_choices;
+
+  // Truth bounding boxes, text and incorrect choice reason.
+  BlamerBundle *blamer_bundle = nullptr;
+
+  // --------------OUTPUT FROM RECOGNITION-------------------------------
+  // --------------Not all fields are necessarily set.-------------------
+  // ---best_choice, raw_choice *must* end up set, with a box_word-------
+  // ---In complete output, the number of blobs in rebuild_word matches---
+  // ---the number of boxes in box_word, the number of unichar_ids in---
+  // ---best_choice, the number of ints in best_state, and the number---
+  // ---of strings in correct_text--------------------------------------
+  // ---SetupFake Sets everything to appropriate values if the word is---
+  // ---known to be bad before recognition.------------------------------
+
+  // The rebuild_word is also in BLN space, but represents the final best
+  // segmentation of the word. Its length is therefore the same as box_word.
+  TWERD *rebuild_word = nullptr; // BLN best segmented word.
+  // The box_word is in the original image coordinate space. It is the
+  // bounding boxes of the rebuild_word, after denormalization.
+  // The length of box_word matches rebuild_word, best_state (if set) and
+  // correct_text (if set), as well as best_choice and represents the
+  // number of classified units in the output.
+  tesseract::BoxWord *box_word = nullptr; // Denormalized output boxes.
+  // The Tesseract that was used to recognize this word. Just a borrowed
+  // pointer. Note: Tesseract's class definition is in a higher-level library.
+  // We avoid introducing a cyclic dependency by not using the Tesseract
+  // within WERD_RES. We are just storing it to provide access to it
+  // for the top-level multi-language controller, and maybe for output of
+  // the recognized language.
+  // tesseract points to data owned elsewhere.
+  tesseract::Tesseract *tesseract = nullptr;
+  // The best_state stores the relationship between chopped_word and
+  // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i]
+  // adjacent blobs in chopped_word. The seams in seam_array are hidden
+  // within a rebuild_word blob and revealed between them.
+  std::vector<int> best_state; // Number of blobs in each best blob.
+  // The correct_text is used during training and adaption to carry the
+  // text to the training system without the need for a unicharset. There
+  // is one entry in the vector for each blob in rebuild_word and box_word.
+  std::vector<std::string> correct_text;
+
+  // Less-well documented members.
+  // TODO(rays) Add more documentation here.
+  WERD_CHOICE *ep_choice = nullptr; // ep text TODO(rays) delete this.
+  REJMAP reject_map;                // best_choice rejects
+  bool tess_failed = false;
+  /*
+  If tess_failed is true, one of the following tests failed when Tess
+  returned:
+  - The outword blob list was not the same length as the best_choice string;
+  - The best_choice string contained ALL blanks;
+  - The best_choice string was zero length
+*/
+  bool tess_accepted = false;    // Tess thinks its ok?
+  bool tess_would_adapt = false; // Tess would adapt?
+  bool done = false;             // ready for output?
+  bool small_caps = false;       // word appears to be small caps
+  bool odd_size = false;         // word is bigger than line or leader dots.
+  // The fontinfos are pointers to data owned by the classifier.
+  const FontInfo *fontinfo = nullptr;
+  const FontInfo *fontinfo2 = nullptr;
+  int8_t fontinfo_id_count = 0;  // number of votes
+  int8_t fontinfo_id2_count = 0; // number of votes
+  bool guessed_x_ht = true;
+  bool guessed_caps_ht = true;
+  CRUNCH_MODE unlv_crunch_mode = CR_NONE;
+  float x_height = 0.0f;       // post match estimate
+  float caps_height = 0.0f;    // post match estimate
+  float baseline_shift = 0.0f; // post match estimate.
+  // Certainty score for the spaces either side of this word (LSTM mode).
+  // MIN this value with the actual word certainty.
+  float space_certainty = 0.0f;
+
+  /*
+  To deal with fuzzy spaces we need to be able to combine "words" to form
+  combinations when we suspect that the gap is a non-space. The (new) text
+  ord code generates separate words for EVERY fuzzy gap - flags in the word
+  indicate whether the gap is below the threshold (fuzzy kern) and is thus
+  NOT a real word break by default, or above the threshold (fuzzy space) and
+  this is a real word break by default.
+
+  The WERD_RES list contains all these words PLUS "combination" words built
+  out of (copies of) the words split by fuzzy kerns. The separate parts have
+  their "part_of_combo" flag set true and should be IGNORED on a default
+  reading of the list.
+
+  Combination words are FOLLOWED by the sequence of part_of_combo words
+  which they combine.
+*/
+  bool combination = false;   // of two fuzzy gap wds
+  bool part_of_combo = false; // part of a combo
+  bool reject_spaces = false; // Reject spacing?
+
+  WERD_RES() = default;
+
+  WERD_RES(WERD *the_word) {
+    word = the_word;
+  }
+  // Deep copies everything except the ratings MATRIX.
+  // To get that use deep_copy below.
+  WERD_RES(const WERD_RES &source) : ELIST_LINK(source) {
+    // combination is used in function Clear which is called from operator=.
+    combination = false;
+    *this = source; // see operator=
+  }
+
+  ~WERD_RES();
+
+  // Returns the UTF-8 string for the given blob index in the best_choice word,
+  // given that we know whether we are in a right-to-left reading context.
+  // This matters for mirrorable characters such as parentheses.  We recognize
+  // characters purely based on their shape on the page, and by default produce
+  // the corresponding unicode for a left-to-right context.
+  const char *BestUTF8(int blob_index, bool in_rtl_context) const {
+    if (blob_index < 0 || best_choice == nullptr || blob_index >= best_choice->length()) {
+      return nullptr;
+    }
+    UNICHAR_ID id = best_choice->unichar_id(blob_index);
+    if (id < 0 || id >= uch_set->size()) {
+      return nullptr;
+    }
+    UNICHAR_ID mirrored = uch_set->get_mirror(id);
+    if (in_rtl_context && mirrored > 0) {
+      id = mirrored;
+    }
+    return uch_set->id_to_unichar_ext(id);
+  }
+  // Returns the UTF-8 string for the given blob index in the raw_choice word.
+  const char *RawUTF8(int blob_index) const {
+    if (blob_index < 0 || blob_index >= raw_choice->length()) {
+      return nullptr;
+    }
+    UNICHAR_ID id = raw_choice->unichar_id(blob_index);
+    if (id < 0 || id >= uch_set->size()) {
+      return nullptr;
+    }
+    return uch_set->id_to_unichar(id);
+  }
+
+  UNICHARSET::Direction SymbolDirection(int blob_index) const {
+    if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0) {
+      return UNICHARSET::U_OTHER_NEUTRAL;
+    }
+    return uch_set->get_direction(best_choice->unichar_id(blob_index));
+  }
+
+  bool AnyRtlCharsInWord() const {
+    if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
+      return false;
+    }
+    for (int id = 0; id < best_choice->length(); id++) {
+      int unichar_id = best_choice->unichar_id(id);
+      if (unichar_id < 0 || unichar_id >= uch_set->size()) {
+        continue; // Ignore illegal chars.
+      }
+      UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
+      if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool AnyLtrCharsInWord() const {
+    if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
+      return false;
+    }
+    for (int id = 0; id < best_choice->length(); id++) {
+      int unichar_id = best_choice->unichar_id(id);
+      if (unichar_id < 0 || unichar_id >= uch_set->size()) {
+        continue; // Ignore illegal chars.
+      }
+      UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
+      if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Return whether the blobs in this WERD_RES 0, 1,... come from an engine
+  // that gave us the unichars in reading order (as opposed to strict left
+  // to right).
+  bool UnicharsInReadingOrder() const {
+    return best_choice->unichars_in_script_order();
+  }
+
+  void Clear();
+  void ClearResults();
+  void ClearWordChoices();
+  void ClearRatings();
+
+  // Deep copies everything except the ratings MATRIX.
+  // To get that use deep_copy below.
+  WERD_RES &operator=(const WERD_RES &source); // from this
+
+  void CopySimpleFields(const WERD_RES &source);
+
+  // Initializes a blank (default constructed) WERD_RES from one that has
+  // already been recognized.
+  // Use SetupFor*Recognition afterwards to complete the setup and make
+  // it ready for a retry recognition.
+  void InitForRetryRecognition(const WERD_RES &source);
+
+  // Sets up the members used in recognition: bln_boxes, chopped_word,
+  // seam_array, denorm.  Returns false if
+  // the word is empty and sets up fake results.  If use_body_size is
+  // true and row->body_size is set, then body_size will be used for
+  // blob normalization instead of xheight + ascrise. This flag is for
+  // those languages that are using CJK pitch model and thus it has to
+  // be true if and only if tesseract->textord_use_cjk_fp_model is
+  // true.
+  // If allow_detailed_fx is true, the feature extractor will receive fine
+  // precision outline information, allowing smoother features and better
+  // features on low resolution images.
+  // The norm_mode sets the default mode for normalization in absence
+  // of any of the above flags. It should really be a tesseract::OcrEngineMode
+  // but is declared as int for ease of use with tessedit_ocr_engine_mode.
+  // Returns false if the word is empty and sets up fake results.
+  bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
+                           Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
+                           bool use_body_size, bool allow_detailed_fx, ROW *row,
+                           const BLOCK *block);
+
+  // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
+  // accumulators from a made chopped word.  We presume the fields are already
+  // empty.
+  void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in);
+
+  // Sets up the members used in recognition for an empty recognition result:
+  // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
+  void SetupFake(const UNICHARSET &uch);
+
+  // Set the word as having the script of the input unicharset.
+  void SetupWordScript(const UNICHARSET &unicharset_in);
+
+  // Sets up the blamer_bundle if it is not null, using the initialized denorm.
+  void SetupBlamerBundle();
+
+  // Computes the blob_widths and blob_gaps from the chopped_word.
+  void SetupBlobWidthsAndGaps();
+
+  // Updates internal data to account for a new SEAM (chop) at the given
+  // blob_number. Fixes the ratings matrix and states in the choices, as well
+  // as the blob widths and gaps.
+  void InsertSeam(int blob_number, SEAM *seam);
+
+  // Returns true if all the word choices except the first have adjust_factors
+  // worse than the given threshold.
+  bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const;
+
+  // Returns true if the current word is ambiguous (by number of answers or
+  // by dangerous ambigs.)
+  bool IsAmbiguous();
+
+  // Returns true if the ratings matrix size matches the sum of each of the
+  // segmentation states.
+  bool StatesAllValid();
+
+  // Prints a list of words found if debug is true or the word result matches
+  // the word_to_debug.
+  void DebugWordChoices(bool debug, const char *word_to_debug);
+
+  // Prints the top choice along with the accepted/done flags.
+  void DebugTopChoice(const char *msg) const;
+
+  // Removes from best_choices all choices which are not within a reasonable
+  // range of the best choice.
+  void FilterWordChoices(int debug_level);
+
+  // Computes a set of distance thresholds used to control adaption.
+  // Compares the best choice for the current word to the best raw choice
+  // to determine which characters were classified incorrectly by the
+  // classifier. Then places a separate threshold into thresholds for each
+  // character in the word. If the classifier was correct, max_rating is placed
+  // into thresholds. If the classifier was incorrect, the mean match rating
+  // (error percentage) of the classifier's incorrect choice minus some margin
+  // is placed into thresholds. This can then be used by the caller to try to
+  // create a new template for the desired class that will classify the
+  // character with a rating better than the threshold value. The match rating
+  // placed into thresholds is never allowed to be below min_rating in order to
+  // prevent trying to make overly tight templates.
+  // min_rating limits how tight to make a template.
+  // max_rating limits how loose to make a template.
+  // rating_margin denotes the amount of margin to put in template.
+  void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating,
+                                 float rating_margin, float *thresholds);
+
+  // Saves a copy of the word_choice if it has the best unadjusted rating.
+  // Returns true if the word_choice was the new best.
+  bool LogNewRawChoice(WERD_CHOICE *word_choice);
+  // Consumes word_choice by adding it to best_choices, (taking ownership) if
+  // the certainty for word_choice is some distance of the best choice in
+  // best_choices, or by deleting the word_choice and returning false.
+  // The best_choices list is kept in sorted order by rating. Duplicates are
+  // removed, and the list is kept no longer than max_num_choices in length.
+  // Returns true if the word_choice is still a valid pointer.
+  bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice);
+
+  // Prints a brief list of all the best choices.
+  void PrintBestChoices() const;
+
+  // Returns the sum of the widths of the blob between start_blob and last_blob
+  // inclusive.
+  int GetBlobsWidth(int start_blob, int last_blob) const;
+  // Returns the width of a gap between the specified blob and the next one.
+  int GetBlobsGap(int blob_index) const;
+
+  // Returns the BLOB_CHOICE corresponding to the given index in the
+  // best choice word taken from the appropriate cell in the ratings MATRIX.
+  // Borrowed pointer, so do not delete. May return nullptr if there is no
+  // BLOB_CHOICE matching the unichar_id at the given index.
+  BLOB_CHOICE *GetBlobChoice(int index) const;
+
+  // Returns the BLOB_CHOICE_LIST corresponding to the given index in the
+  // best choice word taken from the appropriate cell in the ratings MATRIX.
+  // Borrowed pointer, so do not delete.
+  BLOB_CHOICE_LIST *GetBlobChoices(int index) const;
+
+  // Moves the results fields from word to this. This takes ownership of all
+  // the data, so src can be destructed.
+  // word1.ConsumeWordResult(word);
+  // delete word;
+  // is simpler and faster than:
+  // word1 = *word;
+  // delete word;
+  // as it doesn't need to copy and reallocate anything.
+  void ConsumeWordResults(WERD_RES *word);
+
+  // Replace the best choice and rebuild box word.
+  // choice must be from the current best_choices list.
+  void ReplaceBestChoice(WERD_CHOICE *choice);
+
+  // Builds the rebuild_word and sets the best_state from the chopped_word and
+  // the best_choice->state.
+  void RebuildBestState();
+
+  // Copies the chopped_word to the rebuild_word, faking a best_state as well.
+  // Also sets up the output box_word.
+  void CloneChoppedToRebuild();
+
+  // Sets/replaces the box_word with one made from the rebuild_word.
+  void SetupBoxWord();
+
+  // Sets up the script positions in the best_choice using the best_choice
+  // to get the unichars, and the unicharset to get the target positions.
+  void SetScriptPositions();
+  // Sets all the blobs in all the words (best choice and alternates) to be
+  // the given position. (When a sub/superscript is recognized as a separate
+  // word, it falls victim to the rule that a whole word cannot be sub or
+  // superscript, so this function overrides that problem.)
+  void SetAllScriptPositions(tesseract::ScriptPos position);
+
+  // Classifies the word with some already-calculated BLOB_CHOICEs.
+  // The choices are an array of blob_count pointers to BLOB_CHOICE,
+  // providing a single classifier result for each blob.
+  // The BLOB_CHOICEs are consumed and the word takes ownership.
+  // The number of blobs in the box_word must match blob_count.
+  void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices);
+
+  // Creates a WERD_CHOICE for the word using the top choices from the leading
+  // diagonal of the ratings matrix.
+  void FakeWordFromRatings(PermuterType permuter);
+
+  // Copies the best_choice strings to the correct_text for adaption/training.
+  void BestChoiceToCorrectText();
+
+  // Merges 2 adjacent blobs in the result if the permanent callback
+  // class_cb returns other than INVALID_UNICHAR_ID, AND the permanent
+  // callback box_cb is nullptr or returns true, setting the merged blob
+  // result to the class returned from class_cb.
+  // Returns true if anything was merged.
+  bool ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb,
+                            std::function<bool(const TBOX &, const TBOX &)> box_cb);
+
+  // Merges 2 adjacent blobs in the result (index and index+1) and corrects
+  // all the data to account for the change.
+  void MergeAdjacentBlobs(int index);
+
+  // Callback helper for fix_quotes returns a double quote if both
+  // arguments are quote, otherwise INVALID_UNICHAR_ID.
+  UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2);
+  void fix_quotes();
+
+  // Callback helper for fix_hyphens returns UNICHAR_ID of - if both
+  // arguments are hyphen, otherwise INVALID_UNICHAR_ID.
+  UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2);
+  // Callback helper for fix_hyphens returns true if box1 and box2 overlap
+  // (assuming both on the same textline, are in order and a chopped em dash.)
+  bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2);
+  void fix_hyphens();
+
+  // Callback helper for merge_tess_fails returns a space if both
+  // arguments are space, otherwise INVALID_UNICHAR_ID.
+  UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2);
+  void merge_tess_fails();
+
+  // Returns a really deep copy of *src, including the ratings MATRIX.
+  static WERD_RES *deep_copy(const WERD_RES *src) {
+    auto *result = new WERD_RES(*src);
+    // That didn't copy the ratings, but we want a copy if there is one to
+    // begin with.
+    if (src->ratings != nullptr) {
+      result->ratings = src->ratings->DeepCopy();
+    }
+    return result;
+  }
+
+  // Copy blobs from word_res onto this word (eliminating spaces between).
+  // Since this may be called bidirectionally OR both the BOL and EOL flags.
+  void copy_on(WERD_RES *word_res) { // from this word
+    word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
+    word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
+    word->copy_on(word_res->word);
+  }
+
+  // Returns true if the collection of count pieces, starting at start, are all
+  // natural connected components, ie there are no real chops involved.
+  bool PiecesAllNatural(int start, int count) const;
+};
+
+/*************************************************************************
+ * PAGE_RES_IT - Page results iterator
+ *************************************************************************/
+
+class TESS_API PAGE_RES_IT {
+public:
+  PAGE_RES *page_res; // page being iterated
+
+  PAGE_RES_IT() = default;
+
+  PAGE_RES_IT(PAGE_RES *the_page_res) { // page result
+    page_res = the_page_res;
+    restart_page(); // ready to scan
+  }
+
+  // Do two PAGE_RES_ITs point at the same word?
+  // This is much cheaper than cmp().
+  bool operator==(const PAGE_RES_IT &other) const {
+    return word_res == other.word_res && row_res == other.row_res && block_res == other.block_res;
+  }
+
+  bool operator!=(const PAGE_RES_IT &other) const {
+    return !(*this == other);
+  }
+
+  // Given another PAGE_RES_IT to the same page,
+  //  this before other:     -1
+  //  this equal to other:    0
+  //  this later than other:  1
+  int cmp(const PAGE_RES_IT &other) const;
+
+  WERD_RES *restart_page() {
+    return start_page(false); // Skip empty blocks.
+  }
+  WERD_RES *restart_page_with_empties() {
+    return start_page(true); // Allow empty blocks.
+  }
+  WERD_RES *start_page(bool empty_ok);
+
+  WERD_RES *restart_row();
+
+  // ============ Methods that mutate the underling structures ===========
+  // Note that these methods will potentially invalidate other PAGE_RES_ITs
+  // and are intended to be used only while a single PAGE_RES_IT is  active.
+  // This problem needs to be taken into account if these mutation operators
+  // are ever provided to PageIterator or its subclasses.
+
+  // Inserts the new_word and a corresponding WERD_RES before the current
+  // position. The simple fields of the WERD_RES are copied from clone_res and
+  // the resulting WERD_RES is returned for further setup with best_choice etc.
+  WERD_RES *InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word);
+
+  // Replaces the current WERD/WERD_RES with the given words. The given words
+  // contain fake blobs that indicate the position of the characters. These are
+  // replaced with real blobs from the current word as much as possible.
+  void ReplaceCurrentWord(PointerVector<WERD_RES> *words);
+
+  // Deletes the current WERD_RES and its underlying WERD.
+  void DeleteCurrentWord();
+
+  // Makes the current word a fuzzy space if not already fuzzy. Updates
+  // corresponding part of combo if required.
+  void MakeCurrentWordFuzzy();
+
+  WERD_RES *forward() { // Get next word.
+    return internal_forward(false, false);
+  }
+  // Move forward, but allow empty blocks to show as single nullptr words.
+  WERD_RES *forward_with_empties() {
+    return internal_forward(false, true);
+  }
+
+  WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph
+  WERD_RES *forward_block();     // get first word in next non-empty block
+
+  WERD_RES *prev_word() const { // previous word
+    return prev_word_res;
+  }
+  ROW_RES *prev_row() const { // row of prev word
+    return prev_row_res;
+  }
+  BLOCK_RES *prev_block() const { // block of prev word
+    return prev_block_res;
+  }
+  WERD_RES *word() const { // current word
+    return word_res;
+  }
+  ROW_RES *row() const { // row of current word
+    return row_res;
+  }
+  BLOCK_RES *block() const { // block of cur. word
+    return block_res;
+  }
+  WERD_RES *next_word() const { // next word
+    return next_word_res;
+  }
+  ROW_RES *next_row() const { // row of next word
+    return next_row_res;
+  }
+  BLOCK_RES *next_block() const { // block of next word
+    return next_block_res;
+  }
+  void rej_stat_word(); // for page/block/row
+  void ResetWordIterator();
+
+private:
+  WERD_RES *internal_forward(bool new_block, bool empty_ok);
+
+  WERD_RES *prev_word_res;   // previous word
+  ROW_RES *prev_row_res;     // row of prev word
+  BLOCK_RES *prev_block_res; // block of prev word
+
+  WERD_RES *word_res;   // current word
+  ROW_RES *row_res;     // row of current word
+  BLOCK_RES *block_res; // block of cur. word
+
+  WERD_RES *next_word_res;   // next word
+  ROW_RES *next_row_res;     // row of next word
+  BLOCK_RES *next_block_res; // block of next word
+
+  BLOCK_RES_IT block_res_it; // iterators
+  ROW_RES_IT row_res_it;
+  WERD_RES_IT word_res_it;
+  // Iterators used to get the state of word_res_it for the current word.
+  // Since word_res_it is 2 words further on, this is otherwise hard to do.
+  WERD_RES_IT wr_it_of_current_word;
+  WERD_RES_IT wr_it_of_next_word;
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/params_training_featdef.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/params_training_featdef.cpp
@ -0,0 +1,43 @@
+///////////////////////////////////////////////////////////////////////
+// File:        params_training_featdef.cpp
+// Description: Utility functions for params training features.
+// Author:      David Eger
+// Created:     Mon Jun 11 11:26:42 PDT 2012
+//
+// (C) Copyright 2012, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include <cstring>
+
+#include "params_training_featdef.h"
+
+namespace tesseract {
+
+int ParamsTrainingFeatureByName(const char *name) {
+  if (name == nullptr) {
+    return -1;
+  }
+  int array_size =
+      sizeof(kParamsTrainingFeatureTypeName) / sizeof(kParamsTrainingFeatureTypeName[0]);
+  for (int i = 0; i < array_size; i++) {
+    if (kParamsTrainingFeatureTypeName[i] == nullptr) {
+      continue;
+    }
+    if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/params_training_featdef.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/params_training_featdef.h
@ -0,0 +1,154 @@
+///////////////////////////////////////////////////////////////////////
+// File:        params_training_featdef.h
+// Description: Feature definitions for params training.
+// Author:      Rika Antonova
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
+#define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
+
+#include <cstring> // for memset
+#include <string>
+#include <vector>
+
+namespace tesseract {
+
+// Maximum number of unichars in the small and medium sized words
+static const int kMaxSmallWordUnichars = 3;
+static const int kMaxMediumWordUnichars = 6;
+
+// Raw features extracted from a single OCR hypothesis.
+// The features are normalized (by outline length or number of unichars as
+// appropriate) real-valued quantities with unbounded range and
+// unknown distribution.
+// Normalization / binarization of these features is done at a later stage.
+// Note: when adding new fields to this enum make sure to modify
+// kParamsTrainingFeatureTypeName
+enum kParamsTrainingFeatureType {
+  // Digits
+  PTRAIN_DIGITS_SHORT, // 0
+  PTRAIN_DIGITS_MED,   // 1
+  PTRAIN_DIGITS_LONG,  // 2
+  // Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
+  PTRAIN_NUM_SHORT, // 3
+  PTRAIN_NUM_MED,   // 4
+  PTRAIN_NUM_LONG,  // 5
+  // Document word (DOC_DAWG_PERM)
+  PTRAIN_DOC_SHORT, // 6
+  PTRAIN_DOC_MED,   // 7
+  PTRAIN_DOC_LONG,  // 8
+  // Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
+  PTRAIN_DICT_SHORT, // 9
+  PTRAIN_DICT_MED,   // 10
+  PTRAIN_DICT_LONG,  // 11
+  // Frequent word (FREQ_DAWG_PERM)
+  PTRAIN_FREQ_SHORT,          // 12
+  PTRAIN_FREQ_MED,            // 13
+  PTRAIN_FREQ_LONG,           // 14
+  PTRAIN_SHAPE_COST_PER_CHAR, // 15
+  PTRAIN_NGRAM_COST_PER_CHAR, // 16
+  PTRAIN_NUM_BAD_PUNC,        // 17
+  PTRAIN_NUM_BAD_CASE,        // 18
+  PTRAIN_XHEIGHT_CONSISTENCY, // 19
+  PTRAIN_NUM_BAD_CHAR_TYPE,   // 20
+  PTRAIN_NUM_BAD_SPACING,     // 21
+  PTRAIN_NUM_BAD_FONT,        // 22
+  PTRAIN_RATING_PER_CHAR,     // 23
+
+  PTRAIN_NUM_FEATURE_TYPES
+};
+
+static const char *const kParamsTrainingFeatureTypeName[] = {
+    "PTRAIN_DIGITS_SHORT",        // 0
+    "PTRAIN_DIGITS_MED",          // 1
+    "PTRAIN_DIGITS_LONG",         // 2
+    "PTRAIN_NUM_SHORT",           // 3
+    "PTRAIN_NUM_MED",             // 4
+    "PTRAIN_NUM_LONG",            // 5
+    "PTRAIN_DOC_SHORT",           // 6
+    "PTRAIN_DOC_MED",             // 7
+    "PTRAIN_DOC_LONG",            // 8
+    "PTRAIN_DICT_SHORT",          // 9
+    "PTRAIN_DICT_MED",            // 10
+    "PTRAIN_DICT_LONG",           // 11
+    "PTRAIN_FREQ_SHORT",          // 12
+    "PTRAIN_FREQ_MED",            // 13
+    "PTRAIN_FREQ_LONG",           // 14
+    "PTRAIN_SHAPE_COST_PER_CHAR", // 15
+    "PTRAIN_NGRAM_COST_PER_CHAR", // 16
+    "PTRAIN_NUM_BAD_PUNC",        // 17
+    "PTRAIN_NUM_BAD_CASE",        // 18
+    "PTRAIN_XHEIGHT_CONSISTENCY", // 19
+    "PTRAIN_NUM_BAD_CHAR_TYPE",   // 20
+    "PTRAIN_NUM_BAD_SPACING",     // 21
+    "PTRAIN_NUM_BAD_FONT",        // 22
+    "PTRAIN_RATING_PER_CHAR",     // 23
+};
+
+// Returns the index of the given feature (by name),
+// or -1 meaning the feature is unknown.
+int ParamsTrainingFeatureByName(const char *name);
+
+// Entry with features extracted from a single OCR hypothesis for a word.
+struct ParamsTrainingHypothesis {
+  ParamsTrainingHypothesis() : cost(0.0) {
+    memset(features, 0, sizeof(features));
+  }
+  ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) {
+    memcpy(features, other.features, sizeof(features));
+    str = other.str;
+    cost = other.cost;
+  }
+  ParamsTrainingHypothesis &operator=(const ParamsTrainingHypothesis &other) {
+    memcpy(features, other.features, sizeof(features));
+    str = other.str;
+    cost = other.cost;
+    return *this;
+  }
+  std::string str; // string corresponding to word hypothesis (for debugging)
+  float features[PTRAIN_NUM_FEATURE_TYPES];
+  float cost; // path cost computed by segsearch
+};
+
+// A list of hypotheses explored during one run of segmentation search.
+using ParamsTrainingHypothesisList = std::vector<ParamsTrainingHypothesis>;
+
+// A bundle that accumulates all of the hypothesis lists explored during all
+// of the runs of segmentation search on a word (e.g. a list of hypotheses
+// explored on PASS1, PASS2, fix xheight pass, etc).
+class ParamsTrainingBundle {
+public:
+  ParamsTrainingBundle() = default;
+  // Starts a new hypothesis list.
+  // Should be called at the beginning of a new run of the segmentation search.
+  void StartHypothesisList() {
+    hyp_list_vec.emplace_back();
+  }
+  // Adds a new ParamsTrainingHypothesis to the current hypothesis list
+  // and returns the reference to the newly added entry.
+  ParamsTrainingHypothesis &AddHypothesis(const ParamsTrainingHypothesis &other) {
+    if (hyp_list_vec.empty()) {
+      StartHypothesisList();
+    }
+    hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other));
+    return hyp_list_vec.back().back();
+  }
+
+  std::vector<ParamsTrainingHypothesisList> hyp_list_vec;
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pdblock.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pdblock.cpp
@ -0,0 +1,388 @@
+/**********************************************************************
+ * File:        pdblock.cpp
+ * Description: PDBLK member functions and iterator functions.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "pdblock.h"
+
+#include <allheaders.h>
+
+#include <cinttypes> // for PRId32
+#include <cstdlib>
+#include <memory> // std::unique_ptr
+
+namespace tesseract {
+
+#define BLOCK_LABEL_HEIGHT 150 // char height of block id
+
+constexpr ERRCODE BADBLOCKLINE("Y coordinate in block out of bounds");
+constexpr ERRCODE LOSTBLOCKLINE("Can't find rectangle for line");
+
+/**********************************************************************
+ * PDBLK::PDBLK
+ *
+ * Constructor for a simple rectangular block.
+ **********************************************************************/
+PDBLK::PDBLK(                   // rectangular block
+    int16_t xmin,               // bottom left
+    int16_t ymin, int16_t xmax, // top right
+    int16_t ymax)
+    : box(ICOORD(xmin, ymin), ICOORD(xmax, ymax)) {
+  // boundaries
+  ICOORDELT_IT left_it = &leftside;
+  ICOORDELT_IT right_it = &rightside;
+
+  hand_poly = nullptr;
+  left_it.set_to_list(&leftside);
+  right_it.set_to_list(&rightside);
+  // make default box
+  left_it.add_to_end(new ICOORDELT(xmin, ymin));
+  left_it.add_to_end(new ICOORDELT(xmin, ymax));
+  right_it.add_to_end(new ICOORDELT(xmax, ymin));
+  right_it.add_to_end(new ICOORDELT(xmax, ymax));
+  index_ = 0;
+}
+
+/**********************************************************************
+ * PDBLK::set_sides
+ *
+ * Sets left and right vertex lists
+ **********************************************************************/
+
+void PDBLK::set_sides(    // set vertex lists
+    ICOORDELT_LIST *left, // left vertices
+    ICOORDELT_LIST *right // right vertices
+) {
+  // boundaries
+  ICOORDELT_IT left_it = &leftside;
+  ICOORDELT_IT right_it = &rightside;
+
+  leftside.clear();
+  left_it.move_to_first();
+  left_it.add_list_before(left);
+  rightside.clear();
+  right_it.move_to_first();
+  right_it.add_list_before(right);
+}
+
+/**********************************************************************
+ * PDBLK::contains
+ *
+ * Return true if the given point is within the block.
+ **********************************************************************/
+
+bool PDBLK::contains( // test containment
+    ICOORD pt         // point to test
+) {
+  BLOCK_RECT_IT it = this; // rectangle iterator
+  ICOORD bleft, tright;    // corners of rectangle
+
+  for (it.start_block(); !it.cycled_rects(); it.forward()) {
+    // get rectangle
+    it.bounding_box(bleft, tright);
+    // inside rect
+    if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() &&
+        pt.y() <= tright.y()) {
+      return true; // is inside
+    }
+  }
+  return false; // not inside
+}
+
+/**********************************************************************
+ * PDBLK::move
+ *
+ * Reposition block
+ **********************************************************************/
+
+void PDBLK::move(    // reposition block
+    const ICOORD vec // by vector
+) {
+  ICOORDELT_IT it(&leftside);
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    *(it.data()) += vec;
+  }
+
+  it.set_to_list(&rightside);
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    *(it.data()) += vec;
+  }
+
+  box.move(vec);
+}
+
+// Returns a binary Pix mask with a 1 pixel for every pixel within the
+// block. Rotates the coordinate system by rerotation prior to rendering.
+Image PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
+  TBOX rotated_box(box);
+  rotated_box.rotate(rerotation);
+  Image pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
+  if (hand_poly != nullptr) {
+    // We are going to rotate, so get a deep copy of the points and
+    // make a new POLY_BLOCK with it.
+    ICOORDELT_LIST polygon;
+    polygon.deep_copy(hand_poly->points(), ICOORDELT::deep_copy);
+    POLY_BLOCK image_block(&polygon, hand_poly->isA());
+    image_block.rotate(rerotation);
+    // Block outline is a polygon, so use a PB_LINE_IT to get the
+    // rasterized interior. (Runs of interior pixels on a line.)
+    auto *lines = new PB_LINE_IT(&image_block);
+    for (int y = box.bottom(); y < box.top(); ++y) {
+      const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y));
+      if (!segments->empty()) {
+        ICOORDELT_IT s_it(segments.get());
+        // Each element of segments is a start x and x size of the
+        // run of interior pixels.
+        for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) {
+          int start = s_it.data()->x();
+          int xext = s_it.data()->y();
+          // Set the run of pixels to 1.
+          pixRasterop(pix, start - rotated_box.left(),
+                      rotated_box.height() - 1 - (y - rotated_box.bottom()), xext, 1, PIX_SET,
+                      nullptr, 0, 0);
+        }
+      }
+    }
+    delete lines;
+  } else {
+    // Just fill the whole block as there is only a bounding box.
+    pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), PIX_SET, nullptr, 0, 0);
+  }
+  if (mask_box != nullptr) {
+    *mask_box = rotated_box;
+  }
+  return pix;
+}
+
+/**********************************************************************
+ * PDBLK::plot
+ *
+ * Plot the outline of a block in the given colour.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void PDBLK::plot(            // draw outline
+    ScrollView *window,      // window to draw in
+    int32_t serial,          // serial number
+    ScrollView::Color colour // colour to draw in
+) {
+  ICOORD startpt;              // start of outline
+  ICOORD endpt;                // end of outline
+  ICOORD prevpt;               // previous point
+  ICOORDELT_IT it = &leftside; // iterator
+
+  // set the colour
+  window->Pen(colour);
+  window->TextAttributes("Times", BLOCK_LABEL_HEIGHT, false, false, false);
+
+  if (hand_poly != nullptr) {
+    hand_poly->plot(window, serial);
+  } else if (!leftside.empty()) {
+    startpt = *(it.data()); // bottom left corner
+    //              tprintf("Block %d bottom left is (%d,%d)\n",
+    //                      serial,startpt.x(),startpt.y());
+    char temp_buff[34];
+#  if !defined(_WIN32) || defined(__MINGW32__)
+    snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, serial);
+#  else
+    _ultoa(serial, temp_buff, 10);
+#  endif
+    window->Text(startpt.x(), startpt.y(), temp_buff);
+
+    window->SetCursor(startpt.x(), startpt.y());
+    do {
+      prevpt = *(it.data()); // previous point
+      it.forward();          // move to next point
+                             // draw round corner
+      window->DrawTo(prevpt.x(), it.data()->y());
+      window->DrawTo(it.data()->x(), it.data()->y());
+    } while (!it.at_last()); // until end of list
+    endpt = *(it.data());    // end point
+
+    // other side of boundary
+    window->SetCursor(startpt.x(), startpt.y());
+    it.set_to_list(&rightside);
+    prevpt = startpt;
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      // draw round corner
+      window->DrawTo(prevpt.x(), it.data()->y());
+      window->DrawTo(it.data()->x(), it.data()->y());
+      prevpt = *(it.data()); // previous point
+    }
+    // close boundary
+    window->DrawTo(endpt.x(), endpt.y());
+  }
+}
+#endif
+
+/**********************************************************************
+ * PDBLK::operator=
+ *
+ * Assignment - duplicate the block structure, but with an EMPTY row list.
+ **********************************************************************/
+
+PDBLK &PDBLK::operator=( // assignment
+    const PDBLK &source  // from this
+) {
+  //      this->ELIST_LINK::operator=(source);
+  if (!leftside.empty()) {
+    leftside.clear();
+  }
+  if (!rightside.empty()) {
+    rightside.clear();
+  }
+  leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy);
+  rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy);
+  box = source.box;
+  return *this;
+}
+
+/**********************************************************************
+ * BLOCK_RECT_IT::BLOCK_RECT_IT
+ *
+ * Construct a block rectangle iterator.
+ **********************************************************************/
+
+BLOCK_RECT_IT::BLOCK_RECT_IT(
+    // iterate rectangles
+    PDBLK *blkptr // from block
+    )
+    : left_it(&blkptr->leftside), right_it(&blkptr->rightside) {
+  block = blkptr; // remember block
+                  // non empty list
+  if (!blkptr->leftside.empty()) {
+    start_block(); // ready for iteration
+  }
+}
+
+/**********************************************************************
+ * BLOCK_RECT_IT::set_to_block
+ *
+ * Start a new block.
+ **********************************************************************/
+
+void BLOCK_RECT_IT::set_to_block( // start (new) block
+    PDBLK *blkptr) {              // block to start
+  block = blkptr;                 // remember block
+                                  // set iterators
+  left_it.set_to_list(&blkptr->leftside);
+  right_it.set_to_list(&blkptr->rightside);
+  if (!blkptr->leftside.empty()) {
+    start_block(); // ready for iteration
+  }
+}
+
+/**********************************************************************
+ * BLOCK_RECT_IT::start_block
+ *
+ * Restart a block.
+ **********************************************************************/
+
+void BLOCK_RECT_IT::start_block() { // start (new) block
+  left_it.move_to_first();
+  right_it.move_to_first();
+  left_it.mark_cycle_pt();
+  right_it.mark_cycle_pt();
+  ymin = left_it.data()->y(); // bottom of first box
+  ymax = left_it.data_relative(1)->y();
+  if (right_it.data_relative(1)->y() < ymax) {
+    // smallest step
+    ymax = right_it.data_relative(1)->y();
+  }
+}
+
+/**********************************************************************
+ * BLOCK_RECT_IT::forward
+ *
+ * Move to the next rectangle in the block.
+ **********************************************************************/
+
+void BLOCK_RECT_IT::forward() { // next rectangle
+  if (!left_it.empty()) {       // non-empty list
+    if (left_it.data_relative(1)->y() == ymax) {
+      left_it.forward(); // move to meet top
+    }
+    if (right_it.data_relative(1)->y() == ymax) {
+      right_it.forward();
+    }
+    // last is special
+    if (left_it.at_last() || right_it.at_last()) {
+      left_it.move_to_first(); // restart
+      right_it.move_to_first();
+      // now at bottom
+      ymin = left_it.data()->y();
+    } else {
+      ymin = ymax; // new bottom
+    }
+    // next point
+    ymax = left_it.data_relative(1)->y();
+    if (right_it.data_relative(1)->y() < ymax) {
+      // least step forward
+      ymax = right_it.data_relative(1)->y();
+    }
+  }
+}
+
+/**********************************************************************
+ * BLOCK_LINE_IT::get_line
+ *
+ * Get the the start and width of a line in the block.
+ **********************************************************************/
+
+int16_t BLOCK_LINE_IT::get_line( // get a line
+    int16_t y,                   // line to get
+    int16_t &xext                // output extent
+) {
+  ICOORD bleft;  // bounding box
+  ICOORD tright; // of block & rect
+
+  // get block box
+  block->bounding_box(bleft, tright);
+  if (y < bleft.y() || y >= tright.y()) {
+    //              block->print(stderr,false);
+    BADBLOCKLINE.error("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y);
+  }
+
+  // get rectangle box
+  rect_it.bounding_box(bleft, tright);
+  // inside rectangle
+  if (y >= bleft.y() && y < tright.y()) {
+    // width of line
+    xext = tright.x() - bleft.x();
+    return bleft.x(); // start of line
+  }
+  for (rect_it.start_block(); !rect_it.cycled_rects(); rect_it.forward()) {
+    // get rectangle box
+    rect_it.bounding_box(bleft, tright);
+    // inside rectangle
+    if (y >= bleft.y() && y < tright.y()) {
+      // width of line
+      xext = tright.x() - bleft.x();
+      return bleft.x(); // start of line
+    }
+  }
+  LOSTBLOCKLINE.error("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y);
+  return 0; // dummy to stop warning
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pdblock.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/pdblock.h
@ -0,0 +1,184 @@
+/**********************************************************************
+ * File:        pdblock.h  (Formerly pdblk.h)
+ * Description: Page block class definition.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef PDBLOCK_H
+#define PDBLOCK_H
+
+#include "clst.h"
+#include "polyblk.h"
+
+struct Pix;
+
+namespace tesseract {
+
+class PDBLK; // forward decl
+
+CLISTIZEH(PDBLK)
+/// page block
+class PDBLK {
+  friend class BLOCK_RECT_IT; ///< block iterator
+  friend class BLOCK;         ///< Page Block
+
+public:
+  /// empty constructor
+  PDBLK() {
+    hand_poly = nullptr;
+    index_ = 0;
+  }
+  /// simple constructor
+  PDBLK(int16_t xmin, ///< bottom left
+        int16_t ymin,
+        int16_t xmax, ///< top right
+        int16_t ymax);
+
+  /// set vertex lists
+  ///@param left list of left vertices
+  ///@param right list of right vertices
+  void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right);
+
+  /// destructor
+  ~PDBLK() {
+    delete hand_poly;
+  }
+
+  POLY_BLOCK *poly_block() const {
+    return hand_poly;
+  }
+  /// set the poly block
+  void set_poly_block(POLY_BLOCK *blk) {
+    hand_poly = blk;
+  }
+  /// get box
+  void bounding_box(ICOORD &bottom_left,       // bottom left
+                    ICOORD &top_right) const { // topright
+    bottom_left = box.botleft();
+    top_right = box.topright();
+  }
+  /// get real box
+  const TBOX &bounding_box() const {
+    return box;
+  }
+
+  int index() const {
+    return index_;
+  }
+  void set_index(int value) {
+    index_ = value;
+  }
+
+  /// is pt inside block
+  bool contains(ICOORD pt);
+
+  /// reposition block
+  void move(const ICOORD vec); // by vector
+
+  // Returns a binary Pix mask with a 1 pixel for every pixel within the
+  // block. Rotates the coordinate system by rerotation prior to rendering.
+  // If not nullptr, mask_box is filled with the position box of the returned
+  // mask image.
+  Image render_mask(const FCOORD &rerotation, TBOX *mask_box);
+
+#ifndef GRAPHICS_DISABLED
+  /// draw histogram
+  ///@param window window to draw in
+  ///@param serial serial number
+  ///@param colour colour to draw in
+  void plot(ScrollView *window, int32_t serial, ScrollView::Color colour);
+#endif // !GRAPHICS_DISABLED
+
+  /// assignment
+  ///@param source from this
+  PDBLK &operator=(const PDBLK &source);
+
+protected:
+  POLY_BLOCK *hand_poly;    ///< weird as well
+  ICOORDELT_LIST leftside;  ///< left side vertices
+  ICOORDELT_LIST rightside; ///< right side vertices
+  TBOX box;                 ///< bounding box
+  int index_;               ///< Serial number of this block.
+};
+
+class BLOCK_RECT_IT // rectangle iterator
+{
+public:
+  /// constructor
+  ///@param blkptr block to iterate
+  BLOCK_RECT_IT(PDBLK *blkptr);
+
+  /// start (new) block
+  void set_to_block(PDBLK *blkptr); // block to iterate
+
+  /// start iteration
+  void start_block();
+
+  /// next rectangle
+  void forward();
+
+  /// test end
+  bool cycled_rects() const {
+    return left_it.cycled_list() && right_it.cycled_list();
+  }
+
+  /// current rectangle
+  ///@param bleft bottom left
+  ///@param tright top right
+  void bounding_box(ICOORD &bleft, ICOORD &tright) {
+    // bottom left
+    bleft = ICOORD(left_it.data()->x(), ymin);
+    // top right
+    tright = ICOORD(right_it.data()->x(), ymax);
+  }
+
+private:
+  int16_t ymin = 0;       ///< bottom of rectangle
+  int16_t ymax = 0;       ///< top of rectangle
+  PDBLK *block = nullptr; ///< block to iterate
+  ICOORDELT_IT left_it;   ///< boundary iterators
+  ICOORDELT_IT right_it;
+};
+
+/// rectangle iterator
+class BLOCK_LINE_IT {
+public:
+  /// constructor
+  ///@param blkptr from block
+  BLOCK_LINE_IT(PDBLK *blkptr) : rect_it(blkptr) {
+    block = blkptr; // remember block
+  }
+
+  /// start (new) block
+  ///@param blkptr block to start
+  void set_to_block(PDBLK *blkptr) {
+    block = blkptr; // remember block
+                    // set iterator
+    rect_it.set_to_block(blkptr);
+  }
+
+  /// get a line
+  ///@param y line to get
+  ///@param xext output extent
+  int16_t get_line(int16_t y, int16_t &xext);
+
+private:
+  PDBLK *block;          ///< block to iterate
+  BLOCK_RECT_IT rect_it; ///< rectangle iterator
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/points.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/points.cpp
@ -0,0 +1,158 @@
+/**********************************************************************
+ * File:        points.cpp  (Formerly coords.c)
+ * Description: Member functions for coordinate classes.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#define _USE_MATH_DEFINES // for M_PI
+
+#include "points.h"
+
+#include "helpers.h"
+#include "serialis.h"
+
+#include <algorithm>
+#include <cmath> // for M_PI
+#include <cstdlib>
+
+namespace tesseract {
+
+bool FCOORD::normalise() { // Convert to unit vec
+  float len = length();
+
+  if (len < 0.0000000001) {
+    return false;
+  }
+  xcoord /= len;
+  ycoord /= len;
+  return true;
+}
+
+bool ICOORD::DeSerialize(TFile *f) {
+  return f->DeSerialize(&xcoord) && f->DeSerialize(&ycoord);
+}
+
+bool ICOORD::Serialize(TFile *f) const {
+  return f->Serialize(&xcoord) && f->Serialize(&ycoord);
+}
+
+// Set from the given x,y, shrinking the vector to fit if needed.
+void ICOORD::set_with_shrink(int x, int y) {
+  // Fit the vector into an ICOORD, which is 16 bit.
+  int factor = 1;
+  int max_extent = std::max(abs(x), abs(y));
+  if (max_extent > INT16_MAX) {
+    factor = max_extent / INT16_MAX + 1;
+  }
+  xcoord = x / factor;
+  ycoord = y / factor;
+}
+
+// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0
+// respectively.
+static int sign(int x) {
+  if (x < 0) {
+    return -1;
+  } else {
+    return x > 0 ? 1 : 0;
+  }
+}
+
+// Writes to the given file. Returns false in case of error.
+bool ICOORD::Serialize(FILE *fp) const {
+  return tesseract::Serialize(fp, &xcoord) && tesseract::Serialize(fp, &ycoord);
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool ICOORD::DeSerialize(bool swap, FILE *fp) {
+  if (!tesseract::DeSerialize(fp, &xcoord)) {
+    return false;
+  }
+  if (!tesseract::DeSerialize(fp, &ycoord)) {
+    return false;
+  }
+  if (swap) {
+    ReverseN(&xcoord, sizeof(xcoord));
+    ReverseN(&ycoord, sizeof(ycoord));
+  }
+  return true;
+}
+
+// Setup for iterating over the pixels in a vector by the well-known
+// Bresenham rendering algorithm.
+// Starting with major/2 in the accumulator, on each step add major_step,
+// and then add minor to the accumulator. When the accumulator >= major
+// subtract major and step a minor step.
+
+void ICOORD::setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const {
+  int abs_x = abs(xcoord);
+  int abs_y = abs(ycoord);
+  if (abs_x >= abs_y) {
+    // X-direction is major.
+    major_step->xcoord = sign(xcoord);
+    major_step->ycoord = 0;
+    minor_step->xcoord = 0;
+    minor_step->ycoord = sign(ycoord);
+    *major = abs_x;
+    *minor = abs_y;
+  } else {
+    // Y-direction is major.
+    major_step->xcoord = 0;
+    major_step->ycoord = sign(ycoord);
+    minor_step->xcoord = sign(xcoord);
+    minor_step->ycoord = 0;
+    *major = abs_y;
+    *minor = abs_x;
+  }
+}
+
+// Returns the standard feature direction corresponding to this.
+// See binary_angle_plus_pi below for a description of the direction.
+uint8_t FCOORD::to_direction() const {
+  return binary_angle_plus_pi(angle());
+}
+// Sets this with a unit vector in the given standard feature direction.
+void FCOORD::from_direction(uint8_t direction) {
+  double radians = angle_from_direction(direction);
+  xcoord = cos(radians);
+  ycoord = sin(radians);
+}
+
+// Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a
+// standard feature direction as an unsigned angle in 256ths of a circle
+// measured anticlockwise from (-1, 0).
+uint8_t FCOORD::binary_angle_plus_pi(double radians) {
+  return Modulo(IntCastRounded((radians + M_PI) * 128.0 / M_PI), 256);
+}
+// Inverse of binary_angle_plus_pi returns an angle in radians for the
+// given standard feature direction.
+double FCOORD::angle_from_direction(uint8_t direction) {
+  return direction * M_PI / 128.0 - M_PI;
+}
+
+// Returns the point on the given line nearest to this, ie the point such
+// that the vector point->this is perpendicular to the line.
+// The line is defined as a line_point and a dir_vector for its direction.
+FCOORD FCOORD::nearest_pt_on_line(const FCOORD &line_point, const FCOORD &dir_vector) const {
+  FCOORD point_vector(*this - line_point);
+  // The dot product (%) is |dir_vector||point_vector|cos theta, so dividing by
+  // the square of the length of dir_vector gives us the fraction of dir_vector
+  // to add to line1 to get the appropriate point, so
+  // result = line1 + lambda dir_vector.
+  double lambda = point_vector % dir_vector / dir_vector.sqlength();
+  return line_point + (dir_vector * lambda);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/points.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/points.h
@ -0,0 +1,726 @@
+/**********************************************************************
+ * File:        points.h  (Formerly coords.h)
+ * Description: Coordinate class definitions.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef POINTS_H
+#define POINTS_H
+
+#include "elst.h"
+#include "errcode.h" // for ASSERT_HOST
+
+#include <tesseract/export.h> // for DLLSYM
+
+#include <cmath> // for sqrt, atan2
+#include <cstdio>
+
+namespace tesseract {
+
+class FCOORD;
+
+/// integer coordinate
+class ICOORD {
+  friend class FCOORD;
+
+public:
+  /// empty constructor
+  ICOORD() {
+    xcoord = ycoord = 0; // default zero
+  }
+  /// constructor
+  ///@param xin x value
+  ///@param yin y value
+  ICOORD(int16_t xin, int16_t yin) {
+    xcoord = xin;
+    ycoord = yin;
+  }
+  /// destructor
+  ~ICOORD() = default;
+
+  bool DeSerialize(TFile *f);
+  bool Serialize(TFile *f) const;
+
+  /// access function
+  int16_t x() const {
+    return xcoord;
+  }
+  /// access_function
+  int16_t y() const {
+    return ycoord;
+  }
+
+  /// rewrite function
+  void set_x(int16_t xin) {
+    xcoord = xin; // write new value
+  }
+  /// rewrite function
+  void set_y(int16_t yin) { // value to set
+    ycoord = yin;
+  }
+
+  /// Set from the given x,y, shrinking the vector to fit if needed.
+  void set_with_shrink(int x, int y);
+
+  /// find sq length
+  float sqlength() const {
+    return (float)(xcoord * xcoord + ycoord * ycoord);
+  }
+
+  /// find length
+  float length() const {
+    return std::sqrt(sqlength());
+  }
+
+  /// sq dist between pts
+  float pt_to_pt_sqdist(const ICOORD &pt) const {
+    ICOORD gap;
+
+    gap.xcoord = xcoord - pt.xcoord;
+    gap.ycoord = ycoord - pt.ycoord;
+    return gap.sqlength();
+  }
+
+  /// Distance between pts
+  float pt_to_pt_dist(const ICOORD &pt) const {
+    return std::sqrt(pt_to_pt_sqdist(pt));
+  }
+
+  /// find angle
+  float angle() const {
+    return (float)std::atan2(ycoord, xcoord);
+  }
+
+  /// test equality
+  bool operator==(const ICOORD &other) const {
+    return xcoord == other.xcoord && ycoord == other.ycoord;
+  }
+  /// test inequality
+  bool operator!=(const ICOORD &other) const {
+    return xcoord != other.xcoord || ycoord != other.ycoord;
+  }
+  /// rotate 90 deg anti
+  friend ICOORD operator!(const ICOORD &);
+  /// unary minus
+  friend ICOORD operator-(const ICOORD &);
+  /// add
+  friend ICOORD operator+(const ICOORD &, const ICOORD &);
+  /// add
+  friend ICOORD &operator+=(ICOORD &, const ICOORD &);
+  /// subtract
+  friend ICOORD operator-(const ICOORD &, const ICOORD &);
+  /// subtract
+  friend ICOORD &operator-=(ICOORD &, const ICOORD &);
+  /// scalar product
+  friend int32_t operator%(const ICOORD &, const ICOORD &);
+  /// cross product
+  friend int32_t operator*(const ICOORD &, const ICOORD &);
+  /// multiply
+  friend ICOORD operator*(const ICOORD &, int16_t);
+  /// multiply
+  friend ICOORD operator*(int16_t, const ICOORD &);
+  /// multiply
+  friend ICOORD &operator*=(ICOORD &, int16_t);
+  /// divide
+  friend ICOORD operator/(const ICOORD &, int16_t);
+  /// divide
+  friend ICOORD &operator/=(ICOORD &, int16_t);
+  /// rotate
+  ///@param vec by vector
+  void rotate(const FCOORD &vec);
+
+  /// Setup for iterating over the pixels in a vector by the well-known
+  /// Bresenham rendering algorithm.
+  /// Starting with major/2 in the accumulator, on each step move by
+  /// major_step, and then add minor to the accumulator. When
+  /// accumulator >= major subtract major and also move by minor_step.
+  void setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const;
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE *fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE *fp);
+
+protected:
+  int16_t xcoord; ///< x value
+  int16_t ycoord; ///< y value
+};
+
+class ICOORDELT : public ELIST_LINK,
+                  public ICOORD
+// embedded coord list
+{
+public:
+  /// empty constructor
+  ICOORDELT() = default;
+  /// constructor from ICOORD
+  ICOORDELT(ICOORD icoord) : ICOORD(icoord) {}
+  /// constructor
+  ///@param xin x value
+  ///@param yin y value
+  ICOORDELT(int16_t xin, int16_t yin) {
+    xcoord = xin;
+    ycoord = yin;
+  }
+
+  static ICOORDELT *deep_copy(const ICOORDELT *src) {
+    auto *elt = new ICOORDELT;
+    *elt = *src;
+    return elt;
+  }
+};
+
+ELISTIZEH(ICOORDELT)
+
+class TESS_API FCOORD {
+public:
+  /// empty constructor
+  FCOORD() = default;
+  /// constructor
+  ///@param xvalue x value
+  ///@param yvalue y value
+  FCOORD(float xvalue, float yvalue) {
+    xcoord = xvalue; // set coords
+    ycoord = yvalue;
+  }
+  FCOORD(              // make from ICOORD
+      ICOORD icoord) { // coords to set
+    xcoord = icoord.xcoord;
+    ycoord = icoord.ycoord;
+  }
+
+  float x() const { // get coords
+    return xcoord;
+  }
+  float y() const {
+    return ycoord;
+  }
+  /// rewrite function
+  void set_x(float xin) {
+    xcoord = xin; // write new value
+  }
+  /// rewrite function
+  void set_y(float yin) { // value to set
+    ycoord = yin;
+  }
+
+  /// find sq length
+  float sqlength() const {
+    return xcoord * xcoord + ycoord * ycoord;
+  }
+
+  /// find length
+  float length() const {
+    return std::sqrt(sqlength());
+  }
+
+  /// sq dist between pts
+  float pt_to_pt_sqdist(const FCOORD &pt) const {
+    FCOORD gap;
+
+    gap.xcoord = xcoord - pt.xcoord;
+    gap.ycoord = ycoord - pt.ycoord;
+    return gap.sqlength();
+  }
+
+  /// Distance between pts
+  float pt_to_pt_dist(const FCOORD &pt) const {
+    return std::sqrt(pt_to_pt_sqdist(pt));
+  }
+
+  /// find angle
+  float angle() const {
+    return std::atan2(ycoord, xcoord);
+  }
+  // Returns the standard feature direction corresponding to this.
+  // See binary_angle_plus_pi below for a description of the direction.
+  uint8_t to_direction() const;
+  // Sets this with a unit vector in the given standard feature direction.
+  void from_direction(uint8_t direction);
+
+  // Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a
+  // standard feature direction as an unsigned angle in 256ths of a circle
+  // measured anticlockwise from (-1, 0).
+  static uint8_t binary_angle_plus_pi(double angle);
+  // Inverse of binary_angle_plus_pi returns an angle in radians for the
+  // given standard feature direction.
+  static double angle_from_direction(uint8_t direction);
+  // Returns the point on the given line nearest to this, ie the point such
+  // that the vector point->this is perpendicular to the line.
+  // The line is defined as a line_point and a dir_vector for its direction.
+  // dir_vector need not be a unit vector.
+  FCOORD nearest_pt_on_line(const FCOORD &line_point, const FCOORD &dir_vector) const;
+
+  /// Convert to unit vec
+  bool normalise();
+
+  /// test equality
+  bool operator==(const FCOORD &other) {
+    return xcoord == other.xcoord && ycoord == other.ycoord;
+  }
+  /// test inequality
+  bool operator!=(const FCOORD &other) {
+    return xcoord != other.xcoord || ycoord != other.ycoord;
+  }
+  /// rotate 90 deg anti
+  friend FCOORD operator!(const FCOORD &);
+  /// unary minus
+  friend FCOORD operator-(const FCOORD &);
+  /// add
+  friend FCOORD operator+(const FCOORD &, const FCOORD &);
+  /// add
+  friend FCOORD &operator+=(FCOORD &, const FCOORD &);
+  /// subtract
+  friend FCOORD operator-(const FCOORD &, const FCOORD &);
+  /// subtract
+  friend FCOORD &operator-=(FCOORD &, const FCOORD &);
+  /// scalar product
+  friend float operator%(const FCOORD &, const FCOORD &);
+  /// cross product
+  friend float operator*(const FCOORD &, const FCOORD &);
+  /// multiply
+  friend FCOORD operator*(const FCOORD &, float);
+  /// multiply
+  friend FCOORD operator*(float, const FCOORD &);
+
+  /// multiply
+  friend FCOORD &operator*=(FCOORD &, float);
+  /// divide
+  friend FCOORD operator/(const FCOORD &, float);
+  /// rotate
+  ///@param vec by vector
+  void rotate(const FCOORD vec);
+  // unrotate - undo a rotate(vec)
+  // @param vec by vector
+  void unrotate(const FCOORD &vec);
+  /// divide
+  friend FCOORD &operator/=(FCOORD &, float);
+
+private:
+  float xcoord; // 2 floating coords
+  float ycoord;
+};
+
+/**********************************************************************
+ * operator!
+ *
+ * Rotate an ICOORD 90 degrees anticlockwise.
+ **********************************************************************/
+
+inline ICOORD operator!( // rotate 90 deg anti
+    const ICOORD &src    // thing to rotate
+) {
+  ICOORD result; // output
+
+  result.xcoord = -src.ycoord;
+  result.ycoord = src.xcoord;
+  return result;
+}
+
+/**********************************************************************
+ * operator-
+ *
+ * Unary minus of an ICOORD.
+ **********************************************************************/
+
+inline ICOORD operator-( // unary minus
+    const ICOORD &src    // thing to minus
+) {
+  ICOORD result; // output
+
+  result.xcoord = -src.xcoord;
+  result.ycoord = -src.ycoord;
+  return result;
+}
+
+/**********************************************************************
+ * operator+
+ *
+ * Add 2 ICOORDS.
+ **********************************************************************/
+
+inline ICOORD operator+( // sum vectors
+    const ICOORD &op1,   // operands
+    const ICOORD &op2) {
+  ICOORD sum; // result
+
+  sum.xcoord = op1.xcoord + op2.xcoord;
+  sum.ycoord = op1.ycoord + op2.ycoord;
+  return sum;
+}
+
+/**********************************************************************
+ * operator+=
+ *
+ * Add 2 ICOORDS.
+ **********************************************************************/
+
+inline ICOORD &operator+=( // sum vectors
+    ICOORD &op1,           // operands
+    const ICOORD &op2) {
+  op1.xcoord += op2.xcoord;
+  op1.ycoord += op2.ycoord;
+  return op1;
+}
+
+/**********************************************************************
+ * operator-
+ *
+ * Subtract 2 ICOORDS.
+ **********************************************************************/
+
+inline ICOORD operator-( // subtract vectors
+    const ICOORD &op1,   // operands
+    const ICOORD &op2) {
+  ICOORD sum; // result
+
+  sum.xcoord = op1.xcoord - op2.xcoord;
+  sum.ycoord = op1.ycoord - op2.ycoord;
+  return sum;
+}
+
+/**********************************************************************
+ * operator-=
+ *
+ * Subtract 2 ICOORDS.
+ **********************************************************************/
+
+inline ICOORD &operator-=( // subtract vectors
+    ICOORD &op1,           // operands
+    const ICOORD &op2) {
+  op1.xcoord -= op2.xcoord;
+  op1.ycoord -= op2.ycoord;
+  return op1;
+}
+
+/**********************************************************************
+ * operator%
+ *
+ * Scalar product of 2 ICOORDS.
+ **********************************************************************/
+
+inline int32_t operator%( // scalar product
+    const ICOORD &op1,    // operands
+    const ICOORD &op2) {
+  return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
+}
+
+/**********************************************************************
+ * operator*
+ *
+ * Cross product of 2 ICOORDS.
+ **********************************************************************/
+
+inline int32_t operator*( // cross product
+    const ICOORD &op1,    // operands
+    const ICOORD &op2) {
+  return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
+}
+
+/**********************************************************************
+ * operator*
+ *
+ * Scalar multiply of an ICOORD.
+ **********************************************************************/
+
+inline ICOORD operator*( // scalar multiply
+    const ICOORD &op1,   // operands
+    int16_t scale) {
+  ICOORD result; // output
+
+  result.xcoord = op1.xcoord * scale;
+  result.ycoord = op1.ycoord * scale;
+  return result;
+}
+
+inline ICOORD operator*( // scalar multiply
+    int16_t scale,
+    const ICOORD &op1 // operands
+) {
+  ICOORD result; // output
+
+  result.xcoord = op1.xcoord * scale;
+  result.ycoord = op1.ycoord * scale;
+  return result;
+}
+
+/**********************************************************************
+ * operator*=
+ *
+ * Scalar multiply of an ICOORD.
+ **********************************************************************/
+
+inline ICOORD &operator*=( // scalar multiply
+    ICOORD &op1,           // operands
+    int16_t scale) {
+  op1.xcoord *= scale;
+  op1.ycoord *= scale;
+  return op1;
+}
+
+/**********************************************************************
+ * operator/
+ *
+ * Scalar divide of an ICOORD.
+ **********************************************************************/
+
+inline ICOORD operator/( // scalar divide
+    const ICOORD &op1,   // operands
+    int16_t scale) {
+  ICOORD result; // output
+
+  result.xcoord = op1.xcoord / scale;
+  result.ycoord = op1.ycoord / scale;
+  return result;
+}
+
+/**********************************************************************
+ * operator/=
+ *
+ * Scalar divide of an ICOORD.
+ **********************************************************************/
+
+inline ICOORD &operator/=( // scalar divide
+    ICOORD &op1,           // operands
+    int16_t scale) {
+  op1.xcoord /= scale;
+  op1.ycoord /= scale;
+  return op1;
+}
+
+/**********************************************************************
+ * ICOORD::rotate
+ *
+ * Rotate an ICOORD by the given (normalized) (cos,sin) vector.
+ **********************************************************************/
+
+inline void ICOORD::rotate( // rotate by vector
+    const FCOORD &vec) {
+  auto tmp = static_cast<int16_t>(std::floor(xcoord * vec.x() - ycoord * vec.y() + 0.5f));
+  ycoord = static_cast<int16_t>(std::floor(ycoord * vec.x() + xcoord * vec.y() + 0.5f));
+  xcoord = tmp;
+}
+
+/**********************************************************************
+ * operator!
+ *
+ * Rotate an FCOORD 90 degrees anticlockwise.
+ **********************************************************************/
+
+inline FCOORD operator!( // rotate 90 deg anti
+    const FCOORD &src    // thing to rotate
+) {
+  FCOORD result; // output
+
+  result.xcoord = -src.ycoord;
+  result.ycoord = src.xcoord;
+  return result;
+}
+
+/**********************************************************************
+ * operator-
+ *
+ * Unary minus of an FCOORD.
+ **********************************************************************/
+
+inline FCOORD operator-( // unary minus
+    const FCOORD &src    // thing to minus
+) {
+  FCOORD result; // output
+
+  result.xcoord = -src.xcoord;
+  result.ycoord = -src.ycoord;
+  return result;
+}
+
+/**********************************************************************
+ * operator+
+ *
+ * Add 2 FCOORDS.
+ **********************************************************************/
+
+inline FCOORD operator+( // sum vectors
+    const FCOORD &op1,   // operands
+    const FCOORD &op2) {
+  FCOORD sum; // result
+
+  sum.xcoord = op1.xcoord + op2.xcoord;
+  sum.ycoord = op1.ycoord + op2.ycoord;
+  return sum;
+}
+
+/**********************************************************************
+ * operator+=
+ *
+ * Add 2 FCOORDS.
+ **********************************************************************/
+
+inline FCOORD &operator+=( // sum vectors
+    FCOORD &op1,           // operands
+    const FCOORD &op2) {
+  op1.xcoord += op2.xcoord;
+  op1.ycoord += op2.ycoord;
+  return op1;
+}
+
+/**********************************************************************
+ * operator-
+ *
+ * Subtract 2 FCOORDS.
+ **********************************************************************/
+
+inline FCOORD operator-( // subtract vectors
+    const FCOORD &op1,   // operands
+    const FCOORD &op2) {
+  FCOORD sum; // result
+
+  sum.xcoord = op1.xcoord - op2.xcoord;
+  sum.ycoord = op1.ycoord - op2.ycoord;
+  return sum;
+}
+
+/**********************************************************************
+ * operator-=
+ *
+ * Subtract 2 FCOORDS.
+ **********************************************************************/
+
+inline FCOORD &operator-=( // subtract vectors
+    FCOORD &op1,           // operands
+    const FCOORD &op2) {
+  op1.xcoord -= op2.xcoord;
+  op1.ycoord -= op2.ycoord;
+  return op1;
+}
+
+/**********************************************************************
+ * operator%
+ *
+ * Scalar product of 2 FCOORDS.
+ **********************************************************************/
+
+inline float operator%( // scalar product
+    const FCOORD &op1,  // operands
+    const FCOORD &op2) {
+  return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
+}
+
+/**********************************************************************
+ * operator*
+ *
+ * Cross product of 2 FCOORDS.
+ **********************************************************************/
+
+inline float operator*( // cross product
+    const FCOORD &op1,  // operands
+    const FCOORD &op2) {
+  return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
+}
+
+/**********************************************************************
+ * operator*
+ *
+ * Scalar multiply of an FCOORD.
+ **********************************************************************/
+
+inline FCOORD operator*( // scalar multiply
+    const FCOORD &op1,   // operands
+    float scale) {
+  FCOORD result; // output
+
+  result.xcoord = op1.xcoord * scale;
+  result.ycoord = op1.ycoord * scale;
+  return result;
+}
+
+inline FCOORD operator*( // scalar multiply
+    float scale,
+    const FCOORD &op1 // operands
+) {
+  FCOORD result; // output
+
+  result.xcoord = op1.xcoord * scale;
+  result.ycoord = op1.ycoord * scale;
+  return result;
+}
+
+/**********************************************************************
+ * operator*=
+ *
+ * Scalar multiply of an FCOORD.
+ **********************************************************************/
+
+inline FCOORD &operator*=( // scalar multiply
+    FCOORD &op1,           // operands
+    float scale) {
+  op1.xcoord *= scale;
+  op1.ycoord *= scale;
+  return op1;
+}
+
+/**********************************************************************
+ * operator/
+ *
+ * Scalar divide of an FCOORD.
+ **********************************************************************/
+
+inline FCOORD operator/( // scalar divide
+    const FCOORD &op1,   // operands
+    float scale) {
+  FCOORD result; // output
+  ASSERT_HOST(scale != 0.0f);
+  result.xcoord = op1.xcoord / scale;
+  result.ycoord = op1.ycoord / scale;
+  return result;
+}
+
+/**********************************************************************
+ * operator/=
+ *
+ * Scalar divide of an FCOORD.
+ **********************************************************************/
+
+inline FCOORD &operator/=( // scalar divide
+    FCOORD &op1,           // operands
+    float scale) {
+  ASSERT_HOST(scale != 0.0f);
+  op1.xcoord /= scale;
+  op1.ycoord /= scale;
+  return op1;
+}
+
+/**********************************************************************
+ * rotate
+ *
+ * Rotate an FCOORD by the given (normalized) (cos,sin) vector.
+ **********************************************************************/
+
+inline void FCOORD::rotate( // rotate by vector
+    const FCOORD vec) {
+  float tmp;
+
+  tmp = xcoord * vec.x() - ycoord * vec.y();
+  ycoord = ycoord * vec.x() + xcoord * vec.y();
+  xcoord = tmp;
+}
+
+inline void FCOORD::unrotate(const FCOORD &vec) {
+  rotate(FCOORD(vec.x(), -vec.y()));
+}
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyaprx.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyaprx.cpp
@ -0,0 +1,571 @@
+/**********************************************************************
+ * File:        polyaprx.cpp  (Formerly polygon.c)
+ * Description: Code for polygonal approximation from old edgeprog.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "polyaprx.h"
+
+#include "blobs.h"   // for EDGEPT, TPOINT, VECTOR, TESSLINE
+#include "coutln.h"  // for C_OUTLINE
+#include "errcode.h" // for ASSERT_HOST
+#include "mod128.h"  // for DIR128
+#include "params.h"  // for BoolParam, BOOL_VAR
+#include "points.h"  // for ICOORD
+#include "rect.h"    // for TBOX
+#include "tprintf.h" // for tprintf
+
+#include <cstdint> // for INT16_MAX, int8_t
+
+namespace tesseract {
+
+#define FASTEDGELENGTH 256
+
+static BOOL_VAR(poly_debug, false, "Debug old poly");
+static BOOL_VAR(poly_wide_objects_better, true, "More accurate approx on wide things");
+
+#define fixed_dist 20  // really an int_variable
+#define approx_dist 15 // really an int_variable
+
+const int par1 = 4500 / (approx_dist * approx_dist);
+const int par2 = 6750 / (approx_dist * approx_dist);
+
+/**********************************************************************
+ * tesspoly_outline
+ *
+ * Approximate an outline from chain codes form using the old tess algorithm.
+ * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
+ * contain pointers to the input C_OUTLINEs that enable higher-resolution
+ * feature extraction that does not use the polygonal approximation.
+ **********************************************************************/
+
+TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
+  EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
+  EDGEPT *edgepts = stack_edgepts;
+
+  // Use heap memory if the stack buffer is not big enough.
+  if (c_outline->pathlength() > FASTEDGELENGTH) {
+    edgepts = new EDGEPT[c_outline->pathlength()];
+  }
+
+  // bounding box
+  const auto &loop_box = c_outline->bounding_box();
+  int32_t area = loop_box.height();
+  if (!poly_wide_objects_better && loop_box.width() > area) {
+    area = loop_box.width();
+  }
+  area *= area;
+  edgesteps_to_edgepts(c_outline, edgepts);
+  fix2(edgepts, area);
+  EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
+  EDGEPT *startpt = edgept;
+  EDGEPT *result = nullptr;
+  EDGEPT *prev_result = nullptr;
+  do {
+    auto *new_pt = new EDGEPT;
+    new_pt->pos = edgept->pos;
+    new_pt->prev = prev_result;
+    if (prev_result == nullptr) {
+      result = new_pt;
+    } else {
+      prev_result->next = new_pt;
+      new_pt->prev = prev_result;
+    }
+    if (allow_detailed_fx) {
+      new_pt->src_outline = edgept->src_outline;
+      new_pt->start_step = edgept->start_step;
+      new_pt->step_count = edgept->step_count;
+    }
+    prev_result = new_pt;
+    edgept = edgept->next;
+  } while (edgept != startpt);
+  prev_result->next = result;
+  result->prev = prev_result;
+  if (edgepts != stack_edgepts) {
+    delete[] edgepts;
+  }
+  return TESSLINE::BuildFromOutlineList(result);
+}
+
+/**********************************************************************
+ * edgesteps_to_edgepts
+ *
+ * Convert a C_OUTLINE to EDGEPTs.
+ **********************************************************************/
+
+EDGEPT *edgesteps_to_edgepts( // convert outline
+    C_OUTLINE *c_outline,     // input
+    EDGEPT edgepts[]          // output is array
+) {
+  int32_t length;    // steps in path
+  ICOORD pos;        // current coords
+  int32_t stepindex; // current step
+  int32_t stepinc;   // increment
+  int32_t epindex;   // current EDGEPT
+  ICOORD vec;        // for this 8 step
+  ICOORD prev_vec;
+  int8_t epdir;   // of this step
+  DIR128 prevdir; // previous dir
+  DIR128 dir;     // of this step
+
+  pos = c_outline->start_pos(); // start of loop
+  length = c_outline->pathlength();
+  stepindex = 0;
+  epindex = 0;
+  prevdir = -1;
+  // repeated steps
+  uint32_t count = 0;
+  int prev_stepindex = 0;
+  do {
+    dir = c_outline->step_dir(stepindex);
+    vec = c_outline->step(stepindex);
+    if (stepindex < length - 1 && c_outline->step_dir(stepindex + 1) - dir == -32) {
+      dir += 128 - 16;
+      vec += c_outline->step(stepindex + 1);
+      stepinc = 2;
+    } else {
+      stepinc = 1;
+    }
+    if (count == 0) {
+      prevdir = dir;
+      prev_vec = vec;
+    }
+    if (prevdir.get_dir() != dir.get_dir()) {
+      edgepts[epindex].pos.x = pos.x();
+      edgepts[epindex].pos.y = pos.y();
+      prev_vec *= count;
+      edgepts[epindex].vec.x = prev_vec.x();
+      edgepts[epindex].vec.y = prev_vec.y();
+      pos += prev_vec;
+      edgepts[epindex].runlength = count;
+      edgepts[epindex].prev = &edgepts[epindex - 1];
+      // TODO: reset is_hidden, too?
+      edgepts[epindex].fixed = false;
+      edgepts[epindex].next = &edgepts[epindex + 1];
+      prevdir += 64;
+      epdir = DIR128(0) - prevdir;
+      epdir >>= 4;
+      epdir &= 7;
+      edgepts[epindex].dir = epdir;
+      edgepts[epindex].src_outline = c_outline;
+      edgepts[epindex].start_step = prev_stepindex;
+      edgepts[epindex].step_count = stepindex - prev_stepindex;
+      epindex++;
+      prevdir = dir;
+      prev_vec = vec;
+      count = 1;
+      prev_stepindex = stepindex;
+    } else {
+      count++;
+    }
+    stepindex += stepinc;
+  } while (stepindex < length);
+  edgepts[epindex].pos.x = pos.x();
+  edgepts[epindex].pos.y = pos.y();
+  prev_vec *= count;
+  edgepts[epindex].vec.x = prev_vec.x();
+  edgepts[epindex].vec.y = prev_vec.y();
+  pos += prev_vec;
+  edgepts[epindex].runlength = count;
+  // TODO: reset is_hidden, too?
+  edgepts[epindex].fixed = false;
+  edgepts[epindex].src_outline = c_outline;
+  edgepts[epindex].start_step = prev_stepindex;
+  edgepts[epindex].step_count = stepindex - prev_stepindex;
+  edgepts[epindex].prev = &edgepts[epindex - 1];
+  edgepts[epindex].next = &edgepts[0];
+  prevdir += 64;
+  epdir = DIR128(0) - prevdir;
+  epdir >>= 4;
+  epdir &= 7;
+  edgepts[epindex].dir = epdir;
+  edgepts[0].prev = &edgepts[epindex];
+  ASSERT_HOST(pos.x() == c_outline->start_pos().x() && pos.y() == c_outline->start_pos().y());
+  return &edgepts[0];
+}
+
+/**********************************************************************
+ *fix2(start,area) fixes points on the outline according to a trial method*
+ **********************************************************************/
+
+void fix2(         // polygonal approx
+    EDGEPT *start, /*loop to approimate */
+    int area) {
+  EDGEPT *edgept; /*current point */
+  EDGEPT *edgept1;
+  EDGEPT *loopstart; /*modified start of loop */
+  EDGEPT *linestart; /*start of line segment */
+  int stopped;       /*completed flag */
+  int fixed_count;   // no of fixed points
+  int8_t dir;
+  int d01, d12, d23, gapmin;
+  TPOINT d01vec, d12vec, d23vec;
+  EDGEPT *edgefix, *startfix;
+  EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
+
+  edgept = start; /*start of loop */
+  while (((edgept->dir - edgept->prev->dir + 1) & 7) < 3 &&
+         (dir = (edgept->prev->dir - edgept->next->dir) & 7) != 2 && dir != 6) {
+    edgept = edgept->next; /*find suitable start */
+  }
+  loopstart = edgept;      /*remember start */
+
+  stopped = 0;                   /*not finished yet */
+  edgept->fixed = true; //fix it
+  do {
+    linestart = edgept;        /*possible start of line */
+    auto dir1 = edgept->dir; //first direction
+    //length of dir1
+    auto sum1 = edgept->runlength;
+    edgept = edgept->next;
+    auto dir2 = edgept->dir; //2nd direction
+    //length in dir2
+    auto sum2 = edgept->runlength;
+    if (((dir1 - dir2 + 1) & 7) < 3) {
+      while (edgept->prev->dir == edgept->next->dir) {
+        edgept = edgept->next; /*look at next */
+        if (edgept->dir == dir1) {
+          /*sum lengths */
+          sum1 += edgept->runlength;
+        } else {
+          sum2 += edgept->runlength;
+        }
+      }
+
+      if (edgept == loopstart) {
+        stopped = 1; /*finished */
+      }
+      if (sum2 + sum1 > 2 && linestart->prev->dir == dir2 &&
+          (linestart->prev->runlength > linestart->runlength || sum2 > sum1)) {
+        /*start is back one */
+        linestart = linestart->prev;
+        linestart->fixed = true;
+      }
+
+      if (((edgept->next->dir - edgept->dir + 1) & 7) >= 3 ||
+          (edgept->dir == dir1 && sum1 >= sum2) ||
+          ((edgept->prev->runlength < edgept->runlength ||
+            (edgept->dir == dir2 && sum2 >= sum1)) &&
+           linestart->next != edgept)) {
+        edgept = edgept->next;
+      }
+    }
+    /*sharp bend */
+    edgept->fixed = true;
+  }
+  /*do whole loop */
+  while (edgept != loopstart && !stopped);
+
+  edgept = start;
+  do {
+    if (((edgept->runlength >= 8) && (edgept->dir != 2) &&
+         (edgept->dir != 6)) ||
+        ((edgept->runlength >= 8) &&
+         ((edgept->dir == 2) || (edgept->dir == 6)))) {
+      edgept->fixed = true;
+      edgept1 = edgept->next;
+      edgept1->fixed = true;
+    }
+    edgept = edgept->next;
+  } while (edgept != start);
+
+  edgept = start;
+  do {
+    /*single fixed step */
+    if (edgept->fixed &&
+        edgept->runlength == 1
+        /*and neighbours free */
+        && edgept->next->fixed &&
+        !edgept->prev->fixed
+        /*same pair of dirs */
+        && !edgept->next->next->fixed &&
+        edgept->prev->dir == edgept->next->dir &&
+        edgept->prev->prev->dir == edgept->next->next->dir &&
+        ((edgept->prev->dir - edgept->dir + 1) & 7) < 3) {
+      // unfix it
+      edgept->fixed = false;
+      edgept->next->fixed = false;
+    }
+    edgept = edgept->next;   /*do all points */
+  } while (edgept != start); /*until finished */
+
+  stopped = 0;
+  if (area < 450) {
+    area = 450;
+  }
+
+  gapmin = area * fixed_dist * fixed_dist / 44000;
+
+  edgept = start;
+  fixed_count = 0;
+  do {
+    if (edgept->fixed) {
+      fixed_count++;
+    }
+    edgept = edgept->next;
+  } while (edgept != start);
+  while (!edgept->fixed) {
+    edgept = edgept->next;
+  }
+  edgefix0 = edgept;
+
+  edgept = edgept->next;
+  while (!edgept->fixed) {
+    edgept = edgept->next;
+  }
+  edgefix1 = edgept;
+
+  edgept = edgept->next;
+  while (!edgept->fixed) {
+    edgept = edgept->next;
+  }
+  edgefix2 = edgept;
+
+  edgept = edgept->next;
+  while (!edgept->fixed) {
+    edgept = edgept->next;
+  }
+  edgefix3 = edgept;
+
+  startfix = edgefix2;
+
+  do {
+    if (fixed_count <= 3) {
+      break; // already too few
+    }
+    d12vec.diff(edgefix1->pos, edgefix2->pos);
+    d12 = d12vec.length();
+    // TODO(rays) investigate this change:
+    // Only unfix a point if it is part of a low-curvature section
+    // of outline and the total angle change of the outlines is
+    // less than 90 degrees, ie the scalar product is positive.
+    // if (d12 <= gapmin && edgefix0->vec.dot(edgefix2->vec) > 0) {
+    if (d12 <= gapmin) {
+      d01vec.diff(edgefix0->pos, edgefix1->pos);
+      d01 = d01vec.length();
+      d23vec.diff(edgefix2->pos, edgefix3->pos);
+      d23 = d23vec.length();
+      if (d01 > d23) {
+        edgefix2->fixed = false;
+        fixed_count--;
+      } else {
+        edgefix1->fixed = false;
+        fixed_count--;
+        edgefix1 = edgefix2;
+      }
+    } else {
+      edgefix0 = edgefix1;
+      edgefix1 = edgefix2;
+    }
+    edgefix2 = edgefix3;
+    edgept = edgept->next;
+    while (!edgept->fixed) {
+      if (edgept == startfix) {
+        stopped = 1;
+      }
+      edgept = edgept->next;
+    }
+    edgefix3 = edgept;
+    edgefix = edgefix2;
+  } while ((edgefix != startfix) && (!stopped));
+}
+
+/**********************************************************************
+ *poly2(startpt,area,path) applies a second approximation to the outline
+ *using the points which have been fixed by the first approximation*
+ **********************************************************************/
+
+EDGEPT *poly2(       // second poly
+    EDGEPT *startpt, /*start of loop */
+    int area         /*area of blob box */
+) {
+  EDGEPT *edgept;    /*current outline point */
+  EDGEPT *loopstart; /*starting point */
+  EDGEPT *linestart; /*start of line */
+  int edgesum;       /*correction count */
+
+  if (area < 1200) {
+    area = 1200; /*minimum value */
+  }
+
+  loopstart = nullptr; /*not found it yet */
+  edgept = startpt;    /*start of loop */
+
+  do {
+    // current point fixed and next not
+    if (edgept->fixed && !edgept->next->fixed) {
+      loopstart = edgept; /*start of repoly */
+      break;
+    }
+    edgept = edgept->next;     /*next point */
+  } while (edgept != startpt); /*until found or finished */
+
+  if (loopstart == nullptr && !startpt->fixed) {
+    /*fixed start of loop */
+    startpt->fixed = true;
+    loopstart = startpt; /*or start of loop */
+  }
+  if (loopstart) {
+    do {
+      edgept = loopstart; /*first to do */
+      do {
+        linestart = edgept;
+        edgesum = 0; /*sum of lengths */
+        do {
+          /*sum lengths */
+          edgesum += edgept->runlength;
+          edgept = edgept->next; /*move on */
+        } while (!edgept->fixed && edgept != loopstart && edgesum < 126);
+        if (poly_debug) {
+          tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", linestart->pos.x,
+                  linestart->pos.y, linestart->dir, linestart->vec.x, linestart->vec.y,
+                  edgesum, edgept->pos.x, edgept->pos.y);
+        }
+        /*reapproximate */
+        cutline(linestart, edgept, area);
+
+        while (edgept->next->fixed && edgept != loopstart) {
+          edgept = edgept->next; /*look for next non-fixed */
+        }
+      }
+      /*do all the loop */
+      while (edgept != loopstart);
+      edgesum = 0;
+      do {
+        if (edgept->fixed) {
+          edgesum++;
+        }
+        edgept = edgept->next;
+      }
+      // count fixed pts
+      while (edgept != loopstart);
+      if (edgesum < 3) {
+        area /= 2; // must have 3 pts
+      }
+    } while (edgesum < 3);
+    do {
+      linestart = edgept;
+      do {
+        edgept = edgept->next;
+      } while (!edgept->fixed);
+      linestart->next = edgept;
+      edgept->prev = linestart;
+      linestart->vec.x = edgept->pos.x - linestart->pos.x;
+      linestart->vec.y = edgept->pos.y - linestart->pos.y;
+    } while (edgept != loopstart);
+  } else {
+    edgept = startpt; /*start of loop */
+  }
+
+  loopstart = edgept; /*new start */
+  return loopstart;   /*correct exit */
+}
+
+/**********************************************************************
+ *cutline(first,last,area) straightens out a line by partitioning
+ *and joining the ends by a straight line*
+ **********************************************************************/
+
+void cutline(              // recursive refine
+    EDGEPT *first,         /*ends of line */
+    EDGEPT *last, int area /*area of object */
+) {
+  EDGEPT *edge;     /*current edge */
+  TPOINT vecsum;    /*vector sum */
+  int vlen;         /*approx length of vecsum */
+  TPOINT vec;       /*accumulated vector */
+  EDGEPT *maxpoint; /*worst point */
+  int maxperp;      /*max deviation */
+  int perp;         /*perp distance */
+  int ptcount;      /*no of points */
+  int squaresum;    /*sum of perps */
+
+  edge = first; /*start of line */
+  if (edge->next == last) {
+    return; /*simple line */
+  }
+
+  /*vector sum */
+  vecsum.x = last->pos.x - edge->pos.x;
+  vecsum.y = last->pos.y - edge->pos.y;
+  if (vecsum.x == 0 && vecsum.y == 0) {
+    /*special case */
+    vecsum.x = -edge->prev->vec.x;
+    vecsum.y = -edge->prev->vec.y;
+  }
+  /*absolute value */
+  vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
+  if (vecsum.y > vlen) {
+    vlen = vecsum.y; /*maximum */
+  } else if (-vecsum.y > vlen) {
+    vlen = -vecsum.y; /*absolute value */
+  }
+
+  vec.x = edge->vec.x; /*accumulated vector */
+  vec.y = edge->vec.y;
+  maxperp = 0; /*none yet */
+  squaresum = ptcount = 0;
+  edge = edge->next; /*move to actual point */
+  maxpoint = edge;   /*in case there isn't one */
+  do {
+    perp = vec.cross(vecsum); // get perp distance
+    if (perp != 0) {
+      perp *= perp; /*squared deviation */
+    }
+    squaresum += perp; /*sum squares */
+    ptcount++;         /*count points */
+    if (poly_debug) {
+      tprintf("Cutline:Final perp=%d\n", perp);
+    }
+    if (perp > maxperp) {
+      maxperp = perp;
+      maxpoint = edge; /*find greatest deviation */
+    }
+    vec.x += edge->vec.x; /*accumulate vectors */
+    vec.y += edge->vec.y;
+    edge = edge->next;
+  } while (edge != last); /*test all line */
+
+  perp = vecsum.length();
+  ASSERT_HOST(perp != 0);
+
+  if (maxperp < 256 * INT16_MAX) {
+    maxperp <<= 8;
+    maxperp /= perp; /*true max perp */
+  } else {
+    maxperp /= perp;
+    maxperp <<= 8; /*avoid overflow */
+  }
+  if (squaresum < 256 * INT16_MAX) {
+    /*mean squared perp */
+    perp = (squaresum << 8) / (perp * ptcount);
+  } else {
+    /*avoid overflow */
+    perp = (squaresum / perp << 8) / ptcount;
+  }
+
+  if (poly_debug) {
+    tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area, maxperp / 256.0,
+            maxperp * 200.0 / area, perp / 256.0, perp * 300.0 / area);
+  }
+  if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
+    maxpoint->fixed = true;
+    /*partitions */
+    cutline(first, maxpoint, area);
+    cutline(maxpoint, last, area);
+  }
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyaprx.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyaprx.h
@ -0,0 +1,49 @@
+/**********************************************************************
+ * File:        polyaprx.h  (Formerly polygon.h)
+ * Description: Code for polygonal approximation from old edgeprog.
+ * Author:      Ray Smith
+ * Created:     Thu Nov 25 11:42:04 GMT 1993
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef POLYAPRX_H
+#define POLYAPRX_H
+
+namespace tesseract {
+
+class C_OUTLINE;
+struct EDGEPT;
+struct TESSLINE;
+
+// convert a chain-coded input to the old OUTLINE approximation
+TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline);
+EDGEPT *edgesteps_to_edgepts( // convert outline
+    C_OUTLINE *c_outline,     // input
+    EDGEPT edgepts[]          // output is array
+);
+void fix2(         // polygonal approx
+    EDGEPT *start, /*loop to approimate */
+    int area);
+EDGEPT *poly2(       // second poly
+    EDGEPT *startpt, /*start of loop */
+    int area         /*area of blob box */
+);
+void cutline(              // recursive refine
+    EDGEPT *first,         /*ends of line */
+    EDGEPT *last, int area /*area of object */
+);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyblk.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyblk.cpp
@ -0,0 +1,422 @@
+/**********************************************************************
+ * File:        polyblk.cpp  (Formerly poly_block.c)
+ * Description: Polygonal blocks
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "polyblk.h"
+
+#include "elst.h"
+
+#include <cctype>
+#include <cinttypes> // PRId32
+#include <cmath>
+#include <cstdio>
+#include <memory> // std::unique_ptr
+
+namespace tesseract {
+
+#define INTERSECTING INT16_MAX
+
+int lessthan(const void *first, const void *second);
+
+POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) {
+  ICOORDELT_IT v = &vertices;
+
+  vertices.clear();
+  v.move_to_first();
+  v.add_list_before(points);
+  compute_bb();
+  type = t;
+}
+
+// Initialize from box coordinates.
+POLY_BLOCK::POLY_BLOCK(const TBOX &tbox, PolyBlockType t) {
+  vertices.clear();
+  ICOORDELT_IT v = &vertices;
+  v.move_to_first();
+  v.add_to_end(new ICOORDELT(tbox.left(), tbox.top()));
+  v.add_to_end(new ICOORDELT(tbox.left(), tbox.bottom()));
+  v.add_to_end(new ICOORDELT(tbox.right(), tbox.bottom()));
+  v.add_to_end(new ICOORDELT(tbox.right(), tbox.top()));
+  compute_bb();
+  type = t;
+}
+
+/**
+ * @name POLY_BLOCK::compute_bb
+ *
+ * Compute the bounding box from the outline points.
+ */
+
+void POLY_BLOCK::compute_bb() { // constructor
+  ICOORD ibl, itr;              // integer bb
+  ICOORD botleft;               // bounding box
+  ICOORD topright;
+  ICOORD pos;                   // current pos;
+  ICOORDELT_IT pts = &vertices; // iterator
+
+  botleft = *pts.data();
+  topright = botleft;
+  do {
+    pos = *pts.data();
+    if (pos.x() < botleft.x()) {
+      // get bounding box
+      botleft = ICOORD(pos.x(), botleft.y());
+    }
+    if (pos.y() < botleft.y()) {
+      botleft = ICOORD(botleft.x(), pos.y());
+    }
+    if (pos.x() > topright.x()) {
+      topright = ICOORD(pos.x(), topright.y());
+    }
+    if (pos.y() > topright.y()) {
+      topright = ICOORD(topright.x(), pos.y());
+    }
+    pts.forward();
+  } while (!pts.at_first());
+  ibl = ICOORD(botleft.x(), botleft.y());
+  itr = ICOORD(topright.x(), topright.y());
+  box = TBOX(ibl, itr);
+}
+
+/**
+ * @name POLY_BLOCK::winding_number
+ *
+ * Return the winding number of the outline around the given point.
+ * @param point point to wind around
+ */
+
+int16_t POLY_BLOCK::winding_number(const ICOORD &point) {
+  int16_t count;               // winding count
+  ICOORD pt;                   // current point
+  ICOORD vec;                  // point to current point
+  ICOORD vvec;                 // current point to next point
+  int32_t cross;               // cross product
+  ICOORDELT_IT it = &vertices; // iterator
+
+  count = 0;
+  do {
+    pt = *it.data();
+    vec = pt - point;
+    vvec = *it.data_relative(1) - pt;
+    // crossing the line
+    if (vec.y() <= 0 && vec.y() + vvec.y() > 0) {
+      cross = vec * vvec; // cross product
+      if (cross > 0) {
+        count++; // crossing right half
+      } else if (cross == 0) {
+        return INTERSECTING; // going through point
+      }
+    } else if (vec.y() > 0 && vec.y() + vvec.y() <= 0) {
+      cross = vec * vvec;
+      if (cross < 0) {
+        count--; // crossing back
+      } else if (cross == 0) {
+        return INTERSECTING; // illegal
+      }
+    } else if (vec.y() == 0 && vec.x() == 0) {
+      return INTERSECTING;
+    }
+    it.forward();
+  } while (!it.at_first());
+  return count; // winding number
+}
+
+/// @return true if other is inside this.
+bool POLY_BLOCK::contains(POLY_BLOCK *other) {
+  int16_t count;               // winding count
+  ICOORDELT_IT it = &vertices; // iterator
+  ICOORD vertex;
+
+  if (!box.overlap(*(other->bounding_box()))) {
+    return false; // can't be contained
+  }
+
+  /* check that no vertex of this is inside other */
+
+  do {
+    vertex = *it.data();
+    // get winding number
+    count = other->winding_number(vertex);
+    if (count != INTERSECTING) {
+      if (count != 0) {
+        return false;
+      }
+    }
+    it.forward();
+  } while (!it.at_first());
+
+  /* check that all vertices of other are inside this */
+
+  // switch lists
+  it.set_to_list(other->points());
+  do {
+    vertex = *it.data();
+    // try other way round
+    count = winding_number(vertex);
+    if (count != INTERSECTING) {
+      if (count == 0) {
+        return false;
+      }
+    }
+    it.forward();
+  } while (!it.at_first());
+  return true;
+}
+
+/**
+ * @name POLY_BLOCK::rotate
+ *
+ * Rotate the POLY_BLOCK.
+ * @param rotation cos, sin of angle
+ */
+
+void POLY_BLOCK::rotate(FCOORD rotation) {
+  FCOORD pos;                   // current pos;
+  ICOORDELT *pt;                // current point
+  ICOORDELT_IT pts = &vertices; // iterator
+
+  do {
+    pt = pts.data();
+    pos.set_x(pt->x());
+    pos.set_y(pt->y());
+    pos.rotate(rotation);
+    pt->set_x(static_cast<int16_t>(floor(pos.x() + 0.5)));
+    pt->set_y(static_cast<int16_t>(floor(pos.y() + 0.5)));
+    pts.forward();
+  } while (!pts.at_first());
+  compute_bb();
+}
+
+/**
+ * @name POLY_BLOCK::reflect_in_y_axis
+ *
+ * Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)
+ */
+
+void POLY_BLOCK::reflect_in_y_axis() {
+  ICOORDELT *pt;                // current point
+  ICOORDELT_IT pts = &vertices; // Iterator.
+
+  do {
+    pt = pts.data();
+    pt->set_x(-pt->x());
+    pts.forward();
+  } while (!pts.at_first());
+  compute_bb();
+}
+
+/**
+ * POLY_BLOCK::move
+ *
+ * Move the POLY_BLOCK.
+ * @param shift x,y translation vector
+ */
+
+void POLY_BLOCK::move(ICOORD shift) {
+  ICOORDELT *pt;                // current point
+  ICOORDELT_IT pts = &vertices; // iterator
+
+  do {
+    pt = pts.data();
+    *pt += shift;
+    pts.forward();
+  } while (!pts.at_first());
+  compute_bb();
+}
+
+#ifndef GRAPHICS_DISABLED
+void POLY_BLOCK::plot(ScrollView *window, int32_t num) {
+  ICOORDELT_IT v = &vertices;
+
+  window->Pen(ColorForPolyBlockType(type));
+
+  v.move_to_first();
+
+  if (num > 0) {
+    window->TextAttributes("Times", 80, false, false, false);
+    char temp_buff[34];
+#  if !defined(_WIN32) || defined(__MINGW32__)
+    snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, num);
+#  else
+    _ltoa(num, temp_buff, 10);
+#  endif
+    window->Text(v.data()->x(), v.data()->y(), temp_buff);
+  }
+
+  window->SetCursor(v.data()->x(), v.data()->y());
+  for (v.mark_cycle_pt(); !v.cycled_list(); v.forward()) {
+    window->DrawTo(v.data()->x(), v.data()->y());
+  }
+  v.move_to_first();
+  window->DrawTo(v.data()->x(), v.data()->y());
+}
+
+void POLY_BLOCK::fill(ScrollView *window, ScrollView::Color colour) {
+  int16_t y;
+  int16_t width;
+  PB_LINE_IT *lines;
+  ICOORDELT_IT s_it;
+
+  lines = new PB_LINE_IT(this);
+  window->Pen(colour);
+
+  for (y = this->bounding_box()->bottom(); y <= this->bounding_box()->top(); y++) {
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y));
+    if (!segments->empty()) {
+      s_it.set_to_list(segments.get());
+      for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) {
+        // Note different use of ICOORDELT, x coord is x coord of pixel
+        // at the start of line segment, y coord is length of line segment
+        // Last pixel is start pixel + length.
+        width = s_it.data()->y();
+        window->SetCursor(s_it.data()->x(), y);
+        window->DrawTo(s_it.data()->x() + static_cast<float>(width), y);
+      }
+    }
+  }
+
+  delete lines;
+}
+#endif
+
+/// @return true if the polygons of other and this overlap.
+bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
+  int16_t count;               // winding count
+  ICOORDELT_IT it = &vertices; // iterator
+  ICOORD vertex;
+
+  if (!box.overlap(*(other->bounding_box()))) {
+    return false; // can't be any overlap.
+  }
+
+  /* see if a vertex of this is inside other */
+
+  do {
+    vertex = *it.data();
+    // get winding number
+    count = other->winding_number(vertex);
+    if (count != INTERSECTING) {
+      if (count != 0) {
+        return true;
+      }
+    }
+    it.forward();
+  } while (!it.at_first());
+
+  /* see if a vertex of other is inside this */
+
+  // switch lists
+  it.set_to_list(other->points());
+  do {
+    vertex = *it.data();
+    // try other way round
+    count = winding_number(vertex);
+    if (count != INTERSECTING) {
+      if (count != 0) {
+        return true;
+      }
+    }
+    it.forward();
+  } while (!it.at_first());
+  return false;
+}
+
+ICOORDELT_LIST *PB_LINE_IT::get_line(int16_t y) {
+  ICOORDELT_IT v, r;
+  ICOORDELT_LIST *result;
+  ICOORDELT *x, *current, *previous;
+  float fy = y + 0.5f;
+  result = new ICOORDELT_LIST();
+  r.set_to_list(result);
+  v.set_to_list(block->points());
+
+  for (v.mark_cycle_pt(); !v.cycled_list(); v.forward()) {
+    if (((v.data_relative(-1)->y() > y) && (v.data()->y() <= y)) ||
+        ((v.data_relative(-1)->y() <= y) && (v.data()->y() > y))) {
+      previous = v.data_relative(-1);
+      current = v.data();
+      float fx =
+          0.5f + previous->x() +
+          (current->x() - previous->x()) * (fy - previous->y()) / (current->y() - previous->y());
+      x = new ICOORDELT(static_cast<int16_t>(fx), 0);
+      r.add_to_end(x);
+    }
+  }
+
+  if (!r.empty()) {
+    r.sort(lessthan);
+    for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
+      x = r.data();
+    }
+    for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
+      r.data()->set_y(r.data_relative(1)->x() - r.data()->x());
+      r.forward();
+      delete (r.extract());
+    }
+  }
+
+  return result;
+}
+
+int lessthan(const void *first, const void *second) {
+  const ICOORDELT *p1 = *reinterpret_cast<const ICOORDELT *const *>(first);
+  const ICOORDELT *p2 = *reinterpret_cast<const ICOORDELT *const *>(second);
+
+  if (p1->x() < p2->x()) {
+    return (-1);
+  } else if (p1->x() > p2->x()) {
+    return (1);
+  } else {
+    return (0);
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+/// Returns a color to draw the given type.
+ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) {
+  // Keep kPBColors in sync with PolyBlockType.
+  const ScrollView::Color kPBColors[PT_COUNT] = {
+      ScrollView::WHITE,       // Type is not yet known. Keep as the 1st element.
+      ScrollView::BLUE,        // Text that lives inside a column.
+      ScrollView::CYAN,        // Text that spans more than one column.
+      ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out
+                               // region.
+      ScrollView::AQUAMARINE,  // Partition belonging to an equation region.
+      ScrollView::SKY_BLUE,    // Partition belonging to an inline equation
+                               // region.
+      ScrollView::MAGENTA,     // Partition belonging to a table region.
+      ScrollView::GREEN,       // Text-line runs vertically.
+      ScrollView::LIGHT_BLUE,  // Text that belongs to an image.
+      ScrollView::RED,         // Image that lives inside a column.
+      ScrollView::YELLOW,      // Image that spans more than one column.
+      ScrollView::ORANGE,      // Image in a cross-column pull-out region.
+      ScrollView::BROWN,       // Horizontal Line.
+      ScrollView::DARK_GREEN,  // Vertical Line.
+      ScrollView::GREY         // Lies outside of any column.
+  };
+  if (type < PT_COUNT) {
+    return kPBColors[type];
+  }
+  return ScrollView::WHITE;
+}
+#endif // !GRAPHICS_DISABLED
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyblk.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/polyblk.h
@ -0,0 +1,117 @@
+/**********************************************************************
+ * File:        polyblk.h  (Formerly poly_block.h)
+ * Description: Polygonal blocks
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef POLYBLK_H
+#define POLYBLK_H
+
+#include "elst.h"
+#include "points.h"
+#include "rect.h"
+#include "scrollview.h"
+
+#include <tesseract/publictypes.h>
+
+namespace tesseract {
+
+class TESS_API POLY_BLOCK {
+public:
+  POLY_BLOCK() = default;
+  // Initialize from box coordinates.
+  POLY_BLOCK(const TBOX &tbox, PolyBlockType type);
+  POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type);
+  ~POLY_BLOCK() = default;
+
+  TBOX *bounding_box() { // access function
+    return &box;
+  }
+
+  ICOORDELT_LIST *points() { // access function
+    return &vertices;
+  }
+
+  void compute_bb();
+
+  PolyBlockType isA() const {
+    return type;
+  }
+
+  bool IsText() const {
+    return PTIsTextType(type);
+  }
+
+  // Rotate about the origin by the given rotation. (Analogous to
+  // multiplying by a complex number.
+  void rotate(FCOORD rotation);
+  // Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)
+  void reflect_in_y_axis();
+  // Move by adding shift to all coordinates.
+  void move(ICOORD shift);
+
+#ifndef GRAPHICS_DISABLED
+
+  void plot(ScrollView *window, int32_t num);
+
+  void fill(ScrollView *window, ScrollView::Color colour);
+#endif // !GRAPHICS_DISABLED
+
+  // Returns true if other is inside this.
+  bool contains(POLY_BLOCK *other);
+
+  // Returns true if the polygons of other and this overlap.
+  bool overlap(POLY_BLOCK *other);
+
+  // Returns the winding number of this around the test_pt.
+  // Positive for anticlockwise, negative for clockwise, and zero for
+  // test_pt outside this.
+  int16_t winding_number(const ICOORD &test_pt);
+
+#ifndef GRAPHICS_DISABLED
+  // Static utility functions to handle the PolyBlockType.
+  // Returns a color to draw the given type.
+  static ScrollView::Color ColorForPolyBlockType(PolyBlockType type);
+#endif // !GRAPHICS_DISABLED
+
+private:
+  ICOORDELT_LIST vertices; // vertices
+  TBOX box;                // bounding box
+  PolyBlockType type;      // Type of this region.
+};
+
+// Class to iterate the scanlines of a polygon.
+class PB_LINE_IT {
+public:
+  PB_LINE_IT(POLY_BLOCK *blkptr) {
+    block = blkptr;
+  }
+
+  void set_to_block(POLY_BLOCK *blkptr) {
+    block = blkptr;
+  }
+
+  // Returns a list of runs of pixels for the given y coord.
+  // Each element of the returned list is the start (x) and extent(y) of
+  // a run inside the region.
+  // Delete the returned list after use.
+  ICOORDELT_LIST *get_line(int16_t y);
+
+private:
+  POLY_BLOCK *block;
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quadlsq.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quadlsq.cpp
@ -0,0 +1,144 @@
+/**********************************************************************
+ * File:        quadlsq.cpp  (Formerly qlsq.c)
+ * Description: Code for least squares approximation of quadratics.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "quadlsq.h"
+
+#include "tprintf.h"
+
+#include <cmath>
+#include <cstdio>
+
+namespace tesseract {
+
+// Minimum variance in least squares before backing off to a lower degree.
+const long double kMinVariance = 1.0L / 1024;
+
+/**********************************************************************
+ * QLSQ::clear
+ *
+ * Function to initialize a QLSQ.
+ **********************************************************************/
+
+void QLSQ::clear() { // initialize
+  a = 0.0;
+  b = 0.0;
+  c = 0.0;
+  n = 0;      // No elements.
+  sigx = 0.0; // Zero accumulators.
+  sigy = 0.0;
+  sigxx = 0.0;
+  sigxy = 0.0;
+  sigyy = 0.0;
+  sigxxx = 0.0;
+  sigxxy = 0.0;
+  sigxxxx = 0.0;
+}
+
+/**********************************************************************
+ * QLSQ::add
+ *
+ * Add an element to the accumulator.
+ **********************************************************************/
+
+void QLSQ::add(double x, double y) {
+  n++;       // Count elements.
+  sigx += x; // Update accumulators.
+  sigy += y;
+  sigxx += x * x;
+  sigxy += x * y;
+  sigyy += y * y;
+  sigxxx += static_cast<long double>(x) * x * x;
+  sigxxy += static_cast<long double>(x) * x * y;
+  sigxxxx += static_cast<long double>(x) * x * x * x;
+}
+
+/**********************************************************************
+ * QLSQ::remove
+ *
+ * Delete an element from the accumulator.
+ **********************************************************************/
+
+void QLSQ::remove(double x, double y) {
+  if (n <= 0) {
+    tprintf("Can't remove an element from an empty QLSQ accumulator!\n");
+    return;
+  }
+  n--;       // Count elements.
+  sigx -= x; // Update accumulators.
+  sigy -= y;
+  sigxx -= x * x;
+  sigxy -= x * y;
+  sigyy -= y * y;
+  sigxxx -= static_cast<long double>(x) * x * x;
+  sigxxy -= static_cast<long double>(x) * x * y;
+  sigxxxx -= static_cast<long double>(x) * x * x * x;
+}
+
+/**********************************************************************
+ * QLSQ::fit
+ *
+ * Fit the given degree of polynomial and store the result.
+ * This creates a quadratic of the form axx + bx + c, but limited to
+ * the given degree.
+ **********************************************************************/
+
+void QLSQ::fit(int degree) {
+  long double x_variance =
+      static_cast<long double>(sigxx) * n - static_cast<long double>(sigx) * sigx;
+
+  // Note: for computational efficiency, we do not normalize the variance,
+  // covariance and cube variance here as they are in the same order in both
+  // nominators and denominators. However, we need be careful in value range
+  // check.
+  if (x_variance < kMinVariance * n * n || degree < 1 || n < 2) {
+    // We cannot calculate b reliably so forget a and b, and just work on c.
+    a = b = 0.0;
+    if (n >= 1 && degree >= 0) {
+      c = sigy / n;
+    } else {
+      c = 0.0;
+    }
+    return;
+  }
+  long double top96 = 0.0;    // Accurate top.
+  long double bottom96 = 0.0; // Accurate bottom.
+  long double cubevar = sigxxx * n - static_cast<long double>(sigxx) * sigx;
+  long double covariance =
+      static_cast<long double>(sigxy) * n - static_cast<long double>(sigx) * sigy;
+
+  if (n >= 4 && degree >= 2) {
+    top96 = cubevar * covariance;
+    top96 += x_variance * (static_cast<long double>(sigxx) * sigy - sigxxy * n);
+
+    bottom96 = cubevar * cubevar;
+    bottom96 -= x_variance * (sigxxxx * n - static_cast<long double>(sigxx) * sigxx);
+  }
+  if (bottom96 >= kMinVariance * n * n * n * n) {
+    // Denominators looking good
+    a = top96 / bottom96;
+    top96 = covariance - cubevar * a;
+    b = top96 / x_variance;
+  } else {
+    // Forget a, and concentrate on b.
+    a = 0.0;
+    b = covariance / x_variance;
+  }
+  c = (sigy - a * sigxx - b * sigx) / n;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quadlsq.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quadlsq.h
@ -0,0 +1,70 @@
+/**********************************************************************
+ * File:        quadlsq.h  (Formerly qlsq.h)
+ * Description: Code for least squares approximation of quadratics.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef QUADLSQ_H
+#define QUADLSQ_H
+
+#include "points.h"
+
+namespace tesseract {
+
+class QLSQ {
+public:
+  QLSQ() {   // constructor
+    clear(); // set to zeros
+  }
+  void clear(); // initialize
+
+  void add(     // add element
+      double x, // coords to add
+      double y);
+  void remove(  // delete element
+      double x, // coords to delete
+      double y);
+  int32_t count() { // no of elements
+    return n;
+  }
+
+  void fit(        // fit the given
+      int degree); // return actual
+  double get_a() const { // get x squard
+    return a;
+  }
+  double get_b() const { // get x squard
+    return b;
+  }
+  double get_c() const { // get x squard
+    return c;
+  }
+
+private:
+  int32_t n;           // no of elements
+  double a, b, c;      // result
+  double sigx;         // sum of x
+  double sigy;         // sum of y
+  double sigxx;        // sum x squared
+  double sigxy;        // sum of xy
+  double sigyy;        // sum y squared
+  long double sigxxx;  // sum x cubed
+  long double sigxxy;  // sum xsquared y
+  long double sigxxxx; // sum x fourth
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quadratc.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quadratc.h
@ -0,0 +1,65 @@
+/**********************************************************************
+ * File:        quadratc.h  (Formerly quadrtic.h)
+ * Description: Code for the QUAD_COEFFS class.
+ * Author:      Ray Smith
+ * Created:     Tue Oct 08 17:24:40 BST 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef QUADRATC_H
+#define QUADRATC_H
+
+#include "points.h"
+
+namespace tesseract {
+
+class QUAD_COEFFS {
+public:
+  QUAD_COEFFS() = default;
+  QUAD_COEFFS(    // constructor
+      double xsq, // coefficients
+      float x, float constant) {
+    a = xsq;
+    b = x;
+    c = constant;
+  }
+
+  float y(             // evaluate
+      float x) const { // at x
+    return static_cast<float>((a * x + b) * x + c);
+  }
+
+  void move(        // reposition word
+      ICOORD vec) { // by vector
+    /************************************************************
+  y - q = a (x - p)^2 + b (x - p) + c
+  y - q = ax^2 - 2apx + ap^2 + bx - bp + c
+    y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q)
+************************************************************/
+    int16_t p = vec.x();
+    int16_t q = vec.y();
+
+    c = static_cast<float>(c - b * p + a * p * p + q);
+    b = static_cast<float>(b - 2 * a * p);
+  }
+
+  double a; // x squared
+  float b;  // x
+  float c;  // constant
+private:
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quspline.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quspline.cpp
@ -0,0 +1,402 @@
+/**********************************************************************
+ * File:        quspline.cpp  (Formerly qspline.c)
+ * Description: Code for the QSPLINE class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "quspline.h"
+
+#include "points.h"   // for ICOORD
+#include "quadlsq.h"  // for QLSQ
+#include "quadratc.h" // for QUAD_COEFFS
+
+#include <allheaders.h> // for pixRenderPolyline, pixGetDepth, pixGetHeight
+#include "pix.h"        // for L_CLEAR_PIXELS, L_SET_PIXELS, Pix (ptr only)
+
+namespace tesseract {
+
+#define QSPLINE_PRECISION 16 // no of steps to draw
+
+/**********************************************************************
+ * QSPLINE::QSPLINE
+ *
+ * Constructor to build a QSPLINE given the components used in the old code.
+ **********************************************************************/
+
+QSPLINE::QSPLINE(     // constructor
+    int32_t count,    // no of segments
+    int32_t *xstarts, // start coords
+    double *coeffs    // coefficients
+) {
+  int32_t index; // segment index
+
+  // get memory
+  xcoords = new int32_t[count + 1];
+  quadratics = new QUAD_COEFFS[count];
+  segments = count;
+  for (index = 0; index < segments; index++) {
+    // copy them
+    xcoords[index] = xstarts[index];
+    quadratics[index] =
+        QUAD_COEFFS(coeffs[index * 3], coeffs[index * 3 + 1], coeffs[index * 3 + 2]);
+  }
+  // right edge
+  xcoords[index] = xstarts[index];
+}
+
+/**********************************************************************
+ * QSPLINE::QSPLINE
+ *
+ * Constructor to build a QSPLINE by appproximation of points.
+ **********************************************************************/
+
+QSPLINE::QSPLINE(               // constructor
+    int xstarts[],              // spline boundaries
+    int segcount,               // no of segments
+    int xpts[],                 // points to fit
+    int ypts[], int pointcount, // no of pts
+    int degree                  // fit required
+) {
+  int pointindex;    /*no along text line */
+  int segment;       /*segment no */
+  int32_t *ptcounts; // no in each segment
+  QLSQ qlsq;         /*accumulator */
+
+  segments = segcount;
+  xcoords = new int32_t[segcount + 1];
+  ptcounts = new int32_t[segcount + 1];
+  quadratics = new QUAD_COEFFS[segcount];
+  memmove(xcoords, xstarts, (segcount + 1) * sizeof(int32_t));
+  ptcounts[0] = 0; /*none in any yet */
+  for (segment = 0, pointindex = 0; pointindex < pointcount; pointindex++) {
+    while (segment < segcount && xpts[pointindex] >= xstarts[segment]) {
+      segment++; /*try next segment */
+                 /*cumulative counts */
+      ptcounts[segment] = ptcounts[segment - 1];
+    }
+    ptcounts[segment]++; /*no in previous partition */
+  }
+  while (segment < segcount) {
+    segment++;
+    /*zero the rest */
+    ptcounts[segment] = ptcounts[segment - 1];
+  }
+
+  for (segment = 0; segment < segcount; segment++) {
+    qlsq.clear();
+    /*first blob */
+    pointindex = ptcounts[segment];
+    if (pointindex > 0 && xpts[pointindex] != xpts[pointindex - 1] &&
+        xpts[pointindex] != xstarts[segment]) {
+      qlsq.add(xstarts[segment],
+               ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
+                                          (xstarts[segment] - xpts[pointindex - 1]) /
+                                          (xpts[pointindex] - xpts[pointindex - 1]));
+    }
+    for (; pointindex < ptcounts[segment + 1]; pointindex++) {
+      qlsq.add(xpts[pointindex], ypts[pointindex]);
+    }
+    if (pointindex > 0 && pointindex < pointcount && xpts[pointindex] != xstarts[segment + 1]) {
+      qlsq.add(xstarts[segment + 1],
+               ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
+                                          (xstarts[segment + 1] - xpts[pointindex - 1]) /
+                                          (xpts[pointindex] - xpts[pointindex - 1]));
+    }
+    qlsq.fit(degree);
+    quadratics[segment].a = qlsq.get_a();
+    quadratics[segment].b = qlsq.get_b();
+    quadratics[segment].c = qlsq.get_c();
+  }
+  delete[] ptcounts;
+}
+
+/**********************************************************************
+ * QSPLINE::QSPLINE
+ *
+ * Constructor to build a QSPLINE from another.
+ **********************************************************************/
+
+QSPLINE::QSPLINE( // constructor
+    const QSPLINE &src) {
+  segments = 0;
+  xcoords = nullptr;
+  quadratics = nullptr;
+  *this = src;
+}
+
+/**********************************************************************
+ * QSPLINE::~QSPLINE
+ *
+ * Destroy a QSPLINE.
+ **********************************************************************/
+
+QSPLINE::~QSPLINE() {
+  delete[] xcoords;
+  delete[] quadratics;
+}
+
+/**********************************************************************
+ * QSPLINE::operator=
+ *
+ * Copy a QSPLINE
+ **********************************************************************/
+
+QSPLINE &QSPLINE::operator=( // assignment
+    const QSPLINE &source) {
+  delete[] xcoords;
+  delete[] quadratics;
+
+  segments = source.segments;
+  xcoords = new int32_t[segments + 1];
+  quadratics = new QUAD_COEFFS[segments];
+  memmove(xcoords, source.xcoords, (segments + 1) * sizeof(int32_t));
+  memmove(quadratics, source.quadratics, segments * sizeof(QUAD_COEFFS));
+  return *this;
+}
+
+/**********************************************************************
+ * QSPLINE::step
+ *
+ * Return the total of the step functions between the given coords.
+ **********************************************************************/
+
+double QSPLINE::step( // find step functions
+    double x1,        // between coords
+    double x2) {
+  int index1, index2; // indices of coords
+  double total;       /*total steps */
+
+  index1 = spline_index(x1);
+  index2 = spline_index(x2);
+  total = 0;
+  while (index1 < index2) {
+    total += static_cast<double>(quadratics[index1 + 1].y(static_cast<float>(xcoords[index1 + 1])));
+    total -= static_cast<double>(quadratics[index1].y(static_cast<float>(xcoords[index1 + 1])));
+    index1++; /*next segment */
+  }
+  return total; /*total steps */
+}
+
+/**********************************************************************
+ * QSPLINE::y
+ *
+ * Return the y value at the given x value.
+ **********************************************************************/
+
+double QSPLINE::y( // evaluate
+    double x       // coord to evaluate at
+    ) const {
+  int32_t index; // segment index
+
+  index = spline_index(x);
+  return quadratics[index].y(x); // in correct segment
+}
+
+/**********************************************************************
+ * QSPLINE::spline_index
+ *
+ * Return the index to the largest xcoord not greater than x.
+ **********************************************************************/
+
+int32_t QSPLINE::spline_index( // evaluate
+    double x                   // coord to evaluate at
+    ) const {
+  int32_t index;  // segment index
+  int32_t bottom; // bottom of range
+  int32_t top;    // top of range
+
+  bottom = 0;
+  top = segments;
+  while (top - bottom > 1) {
+    index = (top + bottom) / 2; // centre of range
+    if (x >= xcoords[index]) {
+      bottom = index; // new min
+    } else {
+      top = index; // new max
+    }
+  }
+  return bottom;
+}
+
+/**********************************************************************
+ * QSPLINE::move
+ *
+ * Reposition spline by vector
+ **********************************************************************/
+
+void QSPLINE::move( // reposition spline
+    ICOORD vec      // by vector
+) {
+  int32_t segment; // index of segment
+  int16_t x_shift = vec.x();
+
+  for (segment = 0; segment < segments; segment++) {
+    xcoords[segment] += x_shift;
+    quadratics[segment].move(vec);
+  }
+  xcoords[segment] += x_shift;
+}
+
+/**********************************************************************
+ * QSPLINE::overlap
+ *
+ * Return true if spline2 overlaps this by no more than fraction less
+ * than the bounds of this.
+ **********************************************************************/
+
+bool QSPLINE::overlap( // test overlap
+    QSPLINE *spline2,  // 2 cannot be smaller
+    double fraction    // by more than this
+) {
+  int leftlimit = xcoords[1];             /*common left limit */
+  int rightlimit = xcoords[segments - 1]; /*common right limit */
+                                          /*or too non-overlap */
+  return !(spline2->segments < 3 ||
+           spline2->xcoords[1] > leftlimit + fraction * (rightlimit - leftlimit) ||
+           spline2->xcoords[spline2->segments - 1] <
+               rightlimit - fraction * (rightlimit - leftlimit));
+}
+
+/**********************************************************************
+ * extrapolate_spline
+ *
+ * Extrapolates the spline linearly using the same gradient as the
+ * quadratic has at either end.
+ **********************************************************************/
+
+void QSPLINE::extrapolate( // linear extrapolation
+    double gradient,       // gradient to use
+    int xmin,              // new left edge
+    int xmax               // new right edge
+) {
+  int segment;        /*current segment of spline */
+  int dest_segment;   // dest index
+  int32_t *xstarts;   // new boundaries
+  QUAD_COEFFS *quads; // new ones
+  int increment;      // in size
+
+  increment = xmin < xcoords[0] ? 1 : 0;
+  if (xmax > xcoords[segments]) {
+    increment++;
+  }
+  if (increment == 0) {
+    return;
+  }
+  xstarts = new int32_t[segments + 1 + increment];
+  quads = new QUAD_COEFFS[segments + increment];
+  if (xmin < xcoords[0]) {
+    xstarts[0] = xmin;
+    quads[0].a = 0;
+    quads[0].b = gradient;
+    quads[0].c = y(xcoords[0]) - quads[0].b * xcoords[0];
+    dest_segment = 1;
+  } else {
+    dest_segment = 0;
+  }
+  for (segment = 0; segment < segments; segment++) {
+    xstarts[dest_segment] = xcoords[segment];
+    quads[dest_segment] = quadratics[segment];
+    dest_segment++;
+  }
+  xstarts[dest_segment] = xcoords[segment];
+  if (xmax > xcoords[segments]) {
+    quads[dest_segment].a = 0;
+    quads[dest_segment].b = gradient;
+    quads[dest_segment].c = y(xcoords[segments]) - quads[dest_segment].b * xcoords[segments];
+    dest_segment++;
+    xstarts[dest_segment] = xmax + 1;
+  }
+  segments = dest_segment;
+  delete[] xcoords;
+  delete[] quadratics;
+  xcoords = xstarts;
+  quadratics = quads;
+}
+
+/**********************************************************************
+ * QSPLINE::plot
+ *
+ * Draw the QSPLINE in the given colour.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void QSPLINE::plot(          // draw it
+    ScrollView *window,      // window to draw in
+    ScrollView::Color colour // colour to draw in
+    ) const {
+  int32_t segment;  // index of segment
+  int16_t step;     // index of poly piece
+  double increment; // x increment
+  double x;         // x coord
+
+  window->Pen(colour);
+  for (segment = 0; segment < segments; segment++) {
+    increment = static_cast<double>(xcoords[segment + 1] - xcoords[segment]) / QSPLINE_PRECISION;
+    x = xcoords[segment];
+    for (step = 0; step <= QSPLINE_PRECISION; step++) {
+      if (segment == 0 && step == 0) {
+        window->SetCursor(x, quadratics[segment].y(x));
+      } else {
+        window->DrawTo(x, quadratics[segment].y(x));
+      }
+      x += increment;
+    }
+  }
+}
+#endif
+
+void QSPLINE::plot(Image pix) const {
+  if (pix == nullptr) {
+    return;
+  }
+
+  int32_t segment;  // Index of segment
+  int16_t step;     // Index of poly piece
+  double increment; // x increment
+  double x;         // x coord
+  auto height = static_cast<double>(pixGetHeight(pix));
+  Pta *points = ptaCreate(QSPLINE_PRECISION * segments);
+  const int kLineWidth = 5;
+
+  for (segment = 0; segment < segments; segment++) {
+    increment = static_cast<double>((xcoords[segment + 1] - xcoords[segment])) / QSPLINE_PRECISION;
+    x = xcoords[segment];
+    for (step = 0; step <= QSPLINE_PRECISION; step++) {
+      double y = height - quadratics[segment].y(x);
+      ptaAddPt(points, x, y);
+      x += increment;
+    }
+  }
+
+  switch (pixGetDepth(pix)) {
+    case 1:
+      pixRenderPolyline(pix, points, kLineWidth, L_SET_PIXELS, 1);
+      break;
+    case 32:
+      pixRenderPolylineArb(pix, points, kLineWidth, 255, 0, 0, 1);
+      break;
+    default:
+      pixRenderPolyline(pix, points, kLineWidth, L_CLEAR_PIXELS, 1);
+      break;
+  }
+  ptaDestroy(&points);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quspline.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/quspline.h
@ -0,0 +1,99 @@
+/**********************************************************************
+ * File:        quspline.h  (Formerly qspline.h)
+ * Description: Code for the QSPLINE class.
+ * Author:      Ray Smith
+ * Created:     Tue Oct 08 17:16:12 BST 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef QUSPLINE_H
+#define QUSPLINE_H
+
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+
+#include <cstdint> // for int32_t
+
+struct Pix;
+
+namespace tesseract {
+
+class ICOORD;
+class QUAD_COEFFS;
+class ROW;
+class TBOX;
+
+class TESS_API QSPLINE {
+  friend void make_first_baseline(TBOX *, int, int *, int *, QSPLINE *, QSPLINE *, float);
+  friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float);
+  friend void tweak_row_baseline(ROW *, double, double);
+
+public:
+  QSPLINE() { // empty constructor
+    segments = 0;
+    xcoords = nullptr; // everything empty
+    quadratics = nullptr;
+  }
+  QSPLINE( // copy constructor
+      const QSPLINE &src);
+  QSPLINE(                          // constructor
+      int32_t count,                // number of segments
+      int32_t *xstarts,             // segment starts
+      double *coeffs);              // coefficients
+  ~QSPLINE();                       // destructor
+  QSPLINE(                          // least squares fit
+      int xstarts[],                // spline boundaries
+      int segcount,                 // no of segments
+      int xcoords[],                // points to fit
+      int ycoords[], int blobcount, // no of coords
+      int degree);                  // function
+
+  double step(   // step change
+      double x1, // between coords
+      double x2);
+  double y(            // evaluate
+      double x) const; // at x
+
+  void move(            // reposition spline
+      ICOORD vec);      // by vector
+  bool overlap(         // test overlap
+      QSPLINE *spline2, // 2 cannot be smaller
+      double fraction); // by more than this
+  void extrapolate(     // linear extrapolation
+      double gradient,  // gradient to use
+      int left,         // new left edge
+      int right);       // new right edge
+
+#ifndef GRAPHICS_DISABLED
+  void plot(                           // draw it
+      ScrollView *window,              // in window
+      ScrollView::Color colour) const; // in colour
+#endif
+
+  // Paint the baseline over pix. If pix has depth of 32, then the line will
+  // be painted in red. Otherwise it will be painted in black.
+  void plot(Image pix) const;
+
+  QSPLINE &operator=(const QSPLINE &source); // from this
+
+private:
+  int32_t spline_index(    // binary search
+      double x) const;     // for x
+  int32_t segments;        // no of segments
+  int32_t *xcoords;        // no of coords
+  QUAD_COEFFS *quadratics; // spline pieces
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ratngs.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ratngs.cpp
@ -0,0 +1,829 @@
+/**********************************************************************
+ * File: ratngs.cpp  (Formerly ratings.c)
+ * Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "ratngs.h"
+
+#include "blobs.h"
+#include "matrix.h"
+#include "normalis.h" // kBlnBaselineOffset.
+#include "unicharset.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+namespace tesseract {
+
+const float WERD_CHOICE::kBadRating = 100000.0;
+// Min offset in baseline-normalized coords to make a character a subscript.
+const int kMinSubscriptOffset = 20;
+// Min offset in baseline-normalized coords to make a character a superscript.
+const int kMinSuperscriptOffset = 20;
+// Max y of bottom of a drop-cap blob.
+const int kMaxDropCapBottom = -128;
+// Max fraction of x-height to use as denominator in measuring x-height overlap.
+const double kMaxOverlapDenominator = 0.125;
+// Min fraction of x-height range that should be in agreement for matching
+// x-heights.
+const double kMinXHeightMatch = 0.5;
+// Max tolerance on baseline position as a fraction of x-height for matching
+// baselines.
+const double kMaxBaselineDrift = 0.0625;
+
+static const char kPermuterTypeNoPerm[] = "None";
+static const char kPermuterTypePuncPerm[] = "Punctuation";
+static const char kPermuterTypeTopPerm[] = "Top Choice";
+static const char kPermuterTypeLowerPerm[] = "Top Lower Case";
+static const char kPermuterTypeUpperPerm[] = "Top Upper Case";
+static const char kPermuterTypeNgramPerm[] = "Ngram";
+static const char kPermuterTypeNumberPerm[] = "Number";
+static const char kPermuterTypeUserPatPerm[] = "User Pattern";
+static const char kPermuterTypeSysDawgPerm[] = "System Dictionary";
+static const char kPermuterTypeDocDawgPerm[] = "Document Dictionary";
+static const char kPermuterTypeUserDawgPerm[] = "User Dictionary";
+static const char kPermuterTypeFreqDawgPerm[] = "Frequent Words Dictionary";
+static const char kPermuterTypeCompoundPerm[] = "Compound";
+
+static const char *const kPermuterTypeNames[] = {
+    kPermuterTypeNoPerm,       // 0
+    kPermuterTypePuncPerm,     // 1
+    kPermuterTypeTopPerm,      // 2
+    kPermuterTypeLowerPerm,    // 3
+    kPermuterTypeUpperPerm,    // 4
+    kPermuterTypeNgramPerm,    // 5
+    kPermuterTypeNumberPerm,   // 6
+    kPermuterTypeUserPatPerm,  // 7
+    kPermuterTypeSysDawgPerm,  // 8
+    kPermuterTypeDocDawgPerm,  // 9
+    kPermuterTypeUserDawgPerm, // 10
+    kPermuterTypeFreqDawgPerm, // 11
+    kPermuterTypeCompoundPerm  // 12
+};
+
+/**
+ * BLOB_CHOICE::BLOB_CHOICE
+ *
+ * Constructor to build a BLOB_CHOICE from a char, rating and certainty.
+ */
+BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
+                         float src_rating,          // rating
+                         float src_cert,            // certainty
+                         int src_script_id,         // script
+                         float min_xheight,         // min xheight allowed
+                         float max_xheight,         // max xheight by this char
+                         float yshift,              // yshift out of position
+                         BlobChoiceClassifier c) {  // adapted match or other
+  unichar_id_ = src_unichar_id;
+  rating_ = src_rating;
+  certainty_ = src_cert;
+  fontinfo_id_ = -1;
+  fontinfo_id2_ = -1;
+  script_id_ = src_script_id;
+  min_xheight_ = min_xheight;
+  max_xheight_ = max_xheight;
+  yshift_ = yshift;
+  classifier_ = c;
+}
+
+/**
+ * BLOB_CHOICE::BLOB_CHOICE
+ *
+ * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
+ */
+BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST_LINK(other) {
+  unichar_id_ = other.unichar_id();
+  rating_ = other.rating();
+  certainty_ = other.certainty();
+  fontinfo_id_ = other.fontinfo_id();
+  fontinfo_id2_ = other.fontinfo_id2();
+  script_id_ = other.script_id();
+  matrix_cell_ = other.matrix_cell_;
+  min_xheight_ = other.min_xheight_;
+  max_xheight_ = other.max_xheight_;
+  yshift_ = other.yshift();
+  classifier_ = other.classifier_;
+#ifndef DISABLED_LEGACY_ENGINE
+  fonts_ = other.fonts_;
+#endif // ndef DISABLED_LEGACY_ENGINE
+}
+
+// Copy assignment operator.
+BLOB_CHOICE &BLOB_CHOICE::operator=(const BLOB_CHOICE &other) {
+  ELIST_LINK::operator=(other);
+  unichar_id_ = other.unichar_id();
+  rating_ = other.rating();
+  certainty_ = other.certainty();
+  fontinfo_id_ = other.fontinfo_id();
+  fontinfo_id2_ = other.fontinfo_id2();
+  script_id_ = other.script_id();
+  matrix_cell_ = other.matrix_cell_;
+  min_xheight_ = other.min_xheight_;
+  max_xheight_ = other.max_xheight_;
+  yshift_ = other.yshift();
+  classifier_ = other.classifier_;
+#ifndef DISABLED_LEGACY_ENGINE
+  fonts_ = other.fonts_;
+#endif // ndef DISABLED_LEGACY_ENGINE
+  return *this;
+}
+
+// Returns true if *this and other agree on the baseline and x-height
+// to within some tolerance based on a given estimate of the x-height.
+bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const {
+  double baseline_diff = fabs(yshift() - other.yshift());
+  if (baseline_diff > kMaxBaselineDrift * x_height) {
+    if (debug) {
+      tprintf("Baseline diff %g for %d v %d\n", baseline_diff, unichar_id_, other.unichar_id_);
+    }
+    return false;
+  }
+  double this_range = max_xheight() - min_xheight();
+  double other_range = other.max_xheight() - other.min_xheight();
+  double denominator =
+      ClipToRange(std::min(this_range, other_range), 1.0, kMaxOverlapDenominator * x_height);
+  double overlap =
+      std::min(max_xheight(), other.max_xheight()) - std::max(min_xheight(), other.min_xheight());
+  overlap /= denominator;
+  if (debug) {
+    tprintf("PosAndSize for %d v %d: bl diff = %g, ranges %g, %g / %g ->%g\n", unichar_id_,
+            other.unichar_id_, baseline_diff, this_range, other_range, denominator, overlap);
+  }
+
+  return overlap >= kMinXHeightMatch;
+}
+
+// Helper to find the BLOB_CHOICE in the bc_list that matches the given
+// unichar_id, or nullptr if there is no match.
+BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list) {
+  // Find the corresponding best BLOB_CHOICE.
+  BLOB_CHOICE_IT choice_it(bc_list);
+  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
+    BLOB_CHOICE *choice = choice_it.data();
+    if (choice->unichar_id() == char_id) {
+      return choice;
+    }
+  }
+  return nullptr;
+}
+
+const char *WERD_CHOICE::permuter_name(uint8_t permuter) {
+  return kPermuterTypeNames[permuter];
+}
+
+const char *ScriptPosToString(enum ScriptPos script_pos) {
+  switch (script_pos) {
+    case SP_NORMAL:
+      return "NORM";
+    case SP_SUBSCRIPT:
+      return "SUB";
+    case SP_SUPERSCRIPT:
+      return "SUPER";
+    case SP_DROPCAP:
+      return "DROPC";
+  }
+  return "SP_UNKNOWN";
+}
+
+/**
+ * WERD_CHOICE::WERD_CHOICE
+ *
+ * Constructor to build a WERD_CHOICE from the given string.
+ * The function assumes that src_string is not nullptr.
+ */
+WERD_CHOICE::WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset)
+    : unicharset_(&unicharset) {
+  std::vector<UNICHAR_ID> encoding;
+  std::vector<char> lengths;
+  std::string cleaned = unicharset.CleanupString(src_string);
+  if (unicharset.encode_string(cleaned.c_str(), true, &encoding, &lengths, nullptr)) {
+    lengths.push_back('\0');
+    std::string src_lengths = &lengths[0];
+    this->init(cleaned.c_str(), src_lengths.c_str(), 0.0, 0.0, NO_PERM);
+  } else { // There must have been an invalid unichar in the string.
+    this->init(8);
+    this->make_bad();
+  }
+}
+
+/**
+ * WERD_CHOICE::init
+ *
+ * Helper function to build a WERD_CHOICE from the given string,
+ * fragment lengths, rating, certainty and permuter.
+ *
+ * The function assumes that src_string is not nullptr.
+ * src_lengths argument could be nullptr, in which case the unichars
+ * in src_string are assumed to all be of length 1.
+ */
+void WERD_CHOICE::init(const char *src_string, const char *src_lengths, float src_rating,
+                       float src_certainty, uint8_t src_permuter) {
+  int src_string_len = strlen(src_string);
+  if (src_string_len == 0) {
+    this->init(8);
+  } else {
+    this->init(src_lengths ? strlen(src_lengths) : src_string_len);
+    length_ = reserved_;
+    int offset = 0;
+    for (int i = 0; i < length_; ++i) {
+      int unichar_length = src_lengths ? src_lengths[i] : 1;
+      unichar_ids_[i] = unicharset_->unichar_to_id(src_string + offset, unichar_length);
+      state_[i] = 1;
+      certainties_[i] = src_certainty;
+      offset += unichar_length;
+    }
+  }
+  adjust_factor_ = 1.0f;
+  rating_ = src_rating;
+  certainty_ = src_certainty;
+  permuter_ = src_permuter;
+  dangerous_ambig_found_ = false;
+}
+
+/**
+ * WERD_CHOICE::~WERD_CHOICE
+ */
+WERD_CHOICE::~WERD_CHOICE() = default;
+
+const char *WERD_CHOICE::permuter_name() const {
+  return kPermuterTypeNames[permuter_];
+}
+
+// Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
+// taken from the appropriate cell in the ratings MATRIX.
+// Borrowed pointer, so do not delete.
+BLOB_CHOICE_LIST *WERD_CHOICE::blob_choices(int index, MATRIX *ratings) const {
+  MATRIX_COORD coord = MatrixCoord(index);
+  BLOB_CHOICE_LIST *result = ratings->get(coord.col, coord.row);
+  if (result == nullptr) {
+    result = new BLOB_CHOICE_LIST;
+    ratings->put(coord.col, coord.row, result);
+  }
+  return result;
+}
+
+// Returns the MATRIX_COORD corresponding to the location in the ratings
+// MATRIX for the given index into the word.
+MATRIX_COORD WERD_CHOICE::MatrixCoord(int index) const {
+  int col = 0;
+  for (int i = 0; i < index; ++i) {
+    col += state_[i];
+  }
+  int row = col + state_[index] - 1;
+  return MATRIX_COORD(col, row);
+}
+
+// Sets the entries for the given index from the BLOB_CHOICE, assuming
+// unit fragment lengths, but setting the state for this index to blob_count.
+void WERD_CHOICE::set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice) {
+  unichar_ids_[index] = blob_choice->unichar_id();
+  script_pos_[index] = tesseract::SP_NORMAL;
+  state_[index] = blob_count;
+  certainties_[index] = blob_choice->certainty();
+}
+
+/**
+ * contains_unichar_id
+ *
+ * Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
+ */
+bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const {
+  for (int i = 0; i < length_; ++i) {
+    if (unichar_ids_[i] == unichar_id) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * remove_unichar_ids
+ *
+ * Removes num unichar ids starting from index start from unichar_ids_
+ * and updates length_ and fragment_lengths_ to reflect this change.
+ * Note: this function does not modify rating_ and certainty_.
+ */
+void WERD_CHOICE::remove_unichar_ids(int start, int num) {
+  ASSERT_HOST(start >= 0 && start + num <= length_);
+  // Accumulate the states to account for the merged blobs.
+  for (int i = 0; i < num; ++i) {
+    if (start > 0) {
+      state_[start - 1] += state_[start + i];
+    } else if (start + num < length_) {
+      state_[start + num] += state_[start + i];
+    }
+  }
+  for (int i = start; i + num < length_; ++i) {
+    unichar_ids_[i] = unichar_ids_[i + num];
+    script_pos_[i] = script_pos_[i + num];
+    state_[i] = state_[i + num];
+    certainties_[i] = certainties_[i + num];
+  }
+  length_ -= num;
+}
+
+/**
+ * reverse_and_mirror_unichar_ids
+ *
+ * Reverses and mirrors unichars in unichar_ids.
+ */
+void WERD_CHOICE::reverse_and_mirror_unichar_ids() {
+  for (int i = 0; i < length_ / 2; ++i) {
+    UNICHAR_ID tmp_id = unichar_ids_[i];
+    unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_ - 1 - i]);
+    unichar_ids_[length_ - 1 - i] = unicharset_->get_mirror(tmp_id);
+  }
+  if (length_ % 2 != 0) {
+    unichar_ids_[length_ / 2] = unicharset_->get_mirror(unichar_ids_[length_ / 2]);
+  }
+}
+
+/**
+ * punct_stripped
+ *
+ * Returns the half-open interval of unichar_id indices [start, end) which
+ * enclose the core portion of this word -- the part after stripping
+ * punctuation from the left and right.
+ */
+void WERD_CHOICE::punct_stripped(int *start, int *end) const {
+  *start = 0;
+  *end = length() - 1;
+  while (*start < length() && unicharset()->get_ispunctuation(unichar_id(*start))) {
+    (*start)++;
+  }
+  while (*end > -1 && unicharset()->get_ispunctuation(unichar_id(*end))) {
+    (*end)--;
+  }
+  (*end)++;
+}
+
+void WERD_CHOICE::GetNonSuperscriptSpan(int *pstart, int *pend) const {
+  int end = length();
+  while (end > 0 && unicharset_->get_isdigit(unichar_ids_[end - 1]) &&
+         BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) {
+    end--;
+  }
+  int start = 0;
+  while (start < end && unicharset_->get_isdigit(unichar_ids_[start]) &&
+         BlobPosition(start) == tesseract::SP_SUPERSCRIPT) {
+    start++;
+  }
+  *pstart = start;
+  *pend = end;
+}
+
+WERD_CHOICE WERD_CHOICE::shallow_copy(int start, int end) const {
+  ASSERT_HOST(start >= 0 && start <= length_);
+  ASSERT_HOST(end >= 0 && end <= length_);
+  if (end < start) {
+    end = start;
+  }
+  WERD_CHOICE retval(unicharset_, end - start);
+  for (int i = start; i < end; i++) {
+    retval.append_unichar_id_space_allocated(unichar_ids_[i], state_[i], 0.0f, certainties_[i]);
+  }
+  return retval;
+}
+
+/**
+ * has_rtl_unichar_id
+ *
+ * Returns true if unichar_ids contain at least one "strongly" RTL unichar.
+ */
+bool WERD_CHOICE::has_rtl_unichar_id() const {
+  int i;
+  for (i = 0; i < length_; ++i) {
+    UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
+    if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * string_and_lengths
+ *
+ * Populates the given word_str with unichars from unichar_ids and
+ * and word_lengths_str with the corresponding unichar lengths.
+ */
+void WERD_CHOICE::string_and_lengths(std::string *word_str, std::string *word_lengths_str) const {
+  *word_str = "";
+  if (word_lengths_str != nullptr) {
+    *word_lengths_str = "";
+  }
+  for (int i = 0; i < length_; ++i) {
+    const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
+    *word_str += ch;
+    if (word_lengths_str != nullptr) {
+      *word_lengths_str += (char)strlen(ch);
+    }
+  }
+}
+
+/**
+ * append_unichar_id
+ *
+ * Make sure there is enough space in the word for the new unichar id
+ * and call append_unichar_id_space_allocated().
+ */
+void WERD_CHOICE::append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating,
+                                    float certainty) {
+  if (length_ == reserved_) {
+    this->double_the_size();
+  }
+  this->append_unichar_id_space_allocated(unichar_id, blob_count, rating, certainty);
+}
+
+/**
+ * WERD_CHOICE::operator+=
+ *
+ * Cat a second word rating on the end of this current one.
+ * The ratings are added and the confidence is the min.
+ * If the permuters are NOT the same the permuter is set to COMPOUND_PERM
+ */
+WERD_CHOICE &WERD_CHOICE::operator+=(const WERD_CHOICE &second) {
+  ASSERT_HOST(unicharset_ == second.unicharset_);
+  while (reserved_ < length_ + second.length()) {
+    this->double_the_size();
+  }
+  const std::vector<UNICHAR_ID> &other_unichar_ids = second.unichar_ids();
+  for (int i = 0; i < second.length(); ++i) {
+    unichar_ids_[length_ + i] = other_unichar_ids[i];
+    state_[length_ + i] = second.state_[i];
+    certainties_[length_ + i] = second.certainties_[i];
+    script_pos_[length_ + i] = second.BlobPosition(i);
+  }
+  length_ += second.length();
+  if (second.adjust_factor_ > adjust_factor_) {
+    adjust_factor_ = second.adjust_factor_;
+  }
+  rating_ += second.rating();          // add ratings
+  if (second.certainty() < certainty_) { // take min
+    certainty_ = second.certainty();
+  }
+  if (second.dangerous_ambig_found_) {
+    dangerous_ambig_found_ = true;
+  }
+  if (permuter_ == NO_PERM) {
+    permuter_ = second.permuter();
+  } else if (second.permuter() != NO_PERM && second.permuter() != permuter_) {
+    permuter_ = COMPOUND_PERM;
+  }
+  return *this;
+}
+
+/**
+ * WERD_CHOICE::operator=
+ *
+ * Allocate enough memory to hold a copy of source and copy over
+ * all the information from source to this WERD_CHOICE.
+ */
+WERD_CHOICE &WERD_CHOICE::operator=(const WERD_CHOICE &source) {
+  while (reserved_ < source.length()) {
+    this->double_the_size();
+  }
+
+  unicharset_ = source.unicharset_;
+  const std::vector<UNICHAR_ID> &other_unichar_ids = source.unichar_ids();
+  for (int i = 0; i < source.length(); ++i) {
+    unichar_ids_[i] = other_unichar_ids[i];
+    state_[i] = source.state_[i];
+    certainties_[i] = source.certainties_[i];
+    script_pos_[i] = source.BlobPosition(i);
+  }
+  length_ = source.length();
+  adjust_factor_ = source.adjust_factor_;
+  rating_ = source.rating();
+  certainty_ = source.certainty();
+  min_x_height_ = source.min_x_height();
+  max_x_height_ = source.max_x_height();
+  permuter_ = source.permuter();
+  dangerous_ambig_found_ = source.dangerous_ambig_found_;
+  return *this;
+}
+
+// Sets up the script_pos_ member using the blobs_list to get the bln
+// bounding boxes, *this to get the unichars, and this->unicharset
+// to get the target positions. If small_caps is true, sub/super are not
+// considered, but dropcaps are.
+// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
+void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
+  // Initialize to normal.
+  for (int i = 0; i < length_; ++i) {
+    script_pos_[i] = tesseract::SP_NORMAL;
+  }
+  if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
+    return;
+  }
+
+  int position_counts[4] = {0, 0, 0, 0};
+
+  int chunk_index = 0;
+  for (int blob_index = 0; blob_index < length_; ++blob_index, ++chunk_index) {
+    TBLOB *tblob = word->blobs[chunk_index];
+    int uni_id = unichar_id(blob_index);
+    TBOX blob_box = tblob->bounding_box();
+    if (!state_.empty()) {
+      for (int i = 1; i < state_[blob_index]; ++i) {
+        ++chunk_index;
+        tblob = word->blobs[chunk_index];
+        blob_box += tblob->bounding_box();
+      }
+    }
+    script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box, uni_id);
+    if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) {
+      script_pos_[blob_index] = tesseract::SP_NORMAL;
+    }
+    position_counts[script_pos_[blob_index]]++;
+  }
+  // If almost everything looks like a superscript or subscript,
+  // we most likely just got the baseline wrong.
+  if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ ||
+      position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) {
+    if (debug >= 2) {
+      tprintf(
+          "Most characters of %s are subscript or superscript.\n"
+          "That seems wrong, so I'll assume we got the baseline wrong\n",
+          unichar_string().c_str());
+    }
+    for (int i = 0; i < length_; i++) {
+      ScriptPos sp = script_pos_[i];
+      if (sp == tesseract::SP_SUBSCRIPT || sp == tesseract::SP_SUPERSCRIPT) {
+        position_counts[sp]--;
+        position_counts[tesseract::SP_NORMAL]++;
+        script_pos_[i] = tesseract::SP_NORMAL;
+      }
+    }
+  }
+
+  if ((debug >= 1 && position_counts[tesseract::SP_NORMAL] < length_) || debug >= 2) {
+    tprintf("SetScriptPosition on %s\n", unichar_string().c_str());
+    int chunk_index = 0;
+    for (int blob_index = 0; blob_index < length_; ++blob_index) {
+      if (debug >= 2 || script_pos_[blob_index] != tesseract::SP_NORMAL) {
+        TBLOB *tblob = word->blobs[chunk_index];
+        ScriptPositionOf(true, *unicharset_, tblob->bounding_box(), unichar_id(blob_index));
+      }
+      chunk_index += state_.empty() ? 1 :  state_[blob_index];
+    }
+  }
+}
+
+// Sets all the script_pos_ positions to the given position.
+void WERD_CHOICE::SetAllScriptPositions(tesseract::ScriptPos position) {
+  for (int i = 0; i < length_; ++i) {
+    script_pos_[i] = position;
+  }
+}
+
+/* static */
+ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset,
+                                        const TBOX &blob_box, UNICHAR_ID unichar_id) {
+  ScriptPos retval = tesseract::SP_NORMAL;
+  int top = blob_box.top();
+  int bottom = blob_box.bottom();
+  int min_bottom, max_bottom, min_top, max_top;
+  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
+
+  int sub_thresh_top = min_top - kMinSubscriptOffset;
+  int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset;
+  int sup_thresh_bot = max_bottom + kMinSuperscriptOffset;
+  if (bottom <= kMaxDropCapBottom) {
+    retval = tesseract::SP_DROPCAP;
+  } else if (top < sub_thresh_top && bottom < sub_thresh_bot) {
+    retval = tesseract::SP_SUBSCRIPT;
+  } else if (bottom > sup_thresh_bot) {
+    retval = tesseract::SP_SUPERSCRIPT;
+  }
+
+  if (print_debug) {
+    const char *pos = ScriptPosToString(retval);
+    tprintf(
+        "%s Character %s[bot:%d top: %d]  "
+        "bot_range[%d,%d]  top_range[%d, %d] "
+        "sub_thresh[bot:%d top:%d]  sup_thresh_bot %d\n",
+        pos, unicharset.id_to_unichar(unichar_id), bottom, top, min_bottom, max_bottom, min_top,
+        max_top, sub_thresh_bot, sub_thresh_top, sup_thresh_bot);
+  }
+  return retval;
+}
+
+// Returns the script-id (eg Han) of the dominant script in the word.
+int WERD_CHOICE::GetTopScriptID() const {
+  int max_script = unicharset_->get_script_table_size();
+  int *sid = new int[max_script];
+  int x;
+  for (x = 0; x < max_script; x++) {
+    sid[x] = 0;
+  }
+  for (x = 0; x < length_; ++x) {
+    int script_id = unicharset_->get_script(unichar_id(x));
+    sid[script_id]++;
+  }
+  if (unicharset_->han_sid() != unicharset_->null_sid()) {
+    // Add the Hiragana & Katakana counts to Han and zero them out.
+    if (unicharset_->hiragana_sid() != unicharset_->null_sid()) {
+      sid[unicharset_->han_sid()] += sid[unicharset_->hiragana_sid()];
+      sid[unicharset_->hiragana_sid()] = 0;
+    }
+    if (unicharset_->katakana_sid() != unicharset_->null_sid()) {
+      sid[unicharset_->han_sid()] += sid[unicharset_->katakana_sid()];
+      sid[unicharset_->katakana_sid()] = 0;
+    }
+  }
+  // Note that high script ID overrides lower one on a tie, thus biasing
+  // towards non-Common script (if sorted that way in unicharset file).
+  int max_sid = 0;
+  for (x = 1; x < max_script; x++) {
+    if (sid[x] >= sid[max_sid]) {
+      max_sid = x;
+    }
+  }
+  if (sid[max_sid] < length_ / 2) {
+    max_sid = unicharset_->null_sid();
+  }
+  delete[] sid;
+  return max_sid;
+}
+
+// Fixes the state_ for a chop at the given blob_posiiton.
+void WERD_CHOICE::UpdateStateForSplit(int blob_position) {
+  int total_chunks = 0;
+  for (int i = 0; i < length_; ++i) {
+    total_chunks += state_[i];
+    if (total_chunks > blob_position) {
+      ++state_[i];
+      return;
+    }
+  }
+}
+
+// Returns the sum of all the state elements, being the total number of blobs.
+int WERD_CHOICE::TotalOfStates() const {
+  int total_chunks = 0;
+  for (int i = 0; i < length_; ++i) {
+    total_chunks += state_[i];
+  }
+  return total_chunks;
+}
+
+/**
+ * WERD_CHOICE::print
+ *
+ * Print WERD_CHOICE to stdout.
+ */
+void WERD_CHOICE::print(const char *msg) const {
+  tprintf("%s : ", msg);
+  for (int i = 0; i < length_; ++i) {
+    tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i]));
+  }
+  tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n", rating_, certainty_,
+          adjust_factor_, permuter_, min_x_height_, max_x_height_, dangerous_ambig_found_);
+  tprintf("pos");
+  for (int i = 0; i < length_; ++i) {
+    tprintf("\t%s", ScriptPosToString(script_pos_[i]));
+  }
+  tprintf("\nstr");
+  for (int i = 0; i < length_; ++i) {
+    tprintf("\t%s", unicharset_->id_to_unichar(unichar_ids_[i]));
+  }
+  tprintf("\nstate:");
+  for (int i = 0; i < length_; ++i) {
+    tprintf("\t%d ", state_[i]);
+  }
+  tprintf("\nC");
+  for (int i = 0; i < length_; ++i) {
+    tprintf("\t%.3f", certainties_[i]);
+  }
+  tprintf("\n");
+}
+
+// Prints the segmentation state with an introductory message.
+void WERD_CHOICE::print_state(const char *msg) const {
+  tprintf("%s", msg);
+  for (int i = 0; i < length_; ++i) {
+    tprintf(" %d", state_[i]);
+  }
+  tprintf("\n");
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the segmentation state of *this (if not the same as the last
+// one displayed) and waits for a click in the window.
+void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
+  // Number of different colors to draw with.
+  const int kNumColors = 6;
+  static ScrollView *segm_window = nullptr;
+  // Check the state against the static prev_drawn_state.
+  static std::vector<int> prev_drawn_state;
+  bool already_done = prev_drawn_state.size() == length_;
+  if (!already_done) {
+    prev_drawn_state.resize(length_);
+  }
+  for (int i = 0; i < length_; ++i) {
+    if (prev_drawn_state[i] != state_[i]) {
+      already_done = false;
+    }
+    prev_drawn_state[i] = state_[i];
+  }
+  if (already_done || word->blobs.empty()) {
+    return;
+  }
+
+  // Create the window if needed.
+  if (segm_window == nullptr) {
+    segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true);
+  } else {
+    segm_window->Clear();
+  }
+
+  TBOX bbox;
+  int blob_index = 0;
+  for (int c = 0; c < length_; ++c) {
+    auto color = static_cast<ScrollView::Color>(c % kNumColors + 3);
+    for (int i = 0; i < state_[c]; ++i, ++blob_index) {
+      TBLOB *blob = word->blobs[blob_index];
+      bbox += blob->bounding_box();
+      blob->plot(segm_window, color, color);
+    }
+  }
+  segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom());
+  segm_window->Update();
+  segm_window->Wait();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2) {
+  const UNICHARSET *uchset = word1.unicharset();
+  if (word2.unicharset() != uchset) {
+    return false;
+  }
+  int w1start, w1end;
+  word1.punct_stripped(&w1start, &w1end);
+  int w2start, w2end;
+  word2.punct_stripped(&w2start, &w2end);
+  if (w1end - w1start != w2end - w2start) {
+    return false;
+  }
+  for (int i = 0; i < w1end - w1start; i++) {
+    if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
+        uchset->to_lower(word2.unichar_id(w2start + i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/**
+ * print_ratings_list
+ *
+ * Send all the ratings out to the logfile.
+ *
+ * @param msg intro message
+ * @param ratings list of ratings
+ * @param current_unicharset unicharset that can be used
+ * for id-to-unichar conversion
+ */
+void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings,
+                        const UNICHARSET &current_unicharset) {
+  if (ratings->empty()) {
+    tprintf("%s:<none>\n", msg);
+    return;
+  }
+  if (*msg != '\0') {
+    tprintf("%s\n", msg);
+  }
+  BLOB_CHOICE_IT c_it;
+  c_it.set_to_list(ratings);
+  for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
+    c_it.data()->print(&current_unicharset);
+    if (!c_it.at_last()) {
+      tprintf("\n");
+    }
+  }
+  tprintf("\n");
+  fflush(stdout);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ratngs.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/ratngs.h
@ -0,0 +1,638 @@
+/**********************************************************************
+ * File:        ratngs.h  (Formerly ratings.h)
+ * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef RATNGS_H
+#define RATNGS_H
+
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
+#endif
+
+#include "clst.h"
+#include "elst.h"
+#ifndef DISABLED_LEGACY_ENGINE
+#  include "fontinfo.h"
+#endif // undef DISABLED_LEGACY_ENGINE
+#include "matrix.h"
+#include "unicharset.h"
+#include "werd.h"
+
+#include <tesseract/unichar.h>
+
+#include <cassert>
+#include <cfloat> // for FLT_MAX
+
+namespace tesseract {
+
+class MATRIX;
+struct TBLOB;
+struct TWERD;
+
+// Enum to describe the source of a BLOB_CHOICE to make it possible to determine
+// whether a blob has been classified by inspecting the BLOB_CHOICEs.
+enum BlobChoiceClassifier {
+  BCC_STATIC_CLASSIFIER,  // From the char_norm classifier.
+  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
+  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
+  BCC_AMBIG,              // Generated by ambiguity detection.
+  BCC_FAKE,               // From some other process.
+};
+
+class BLOB_CHOICE : public ELIST_LINK {
+public:
+  BLOB_CHOICE() {
+    unichar_id_ = UNICHAR_SPACE;
+    fontinfo_id_ = -1;
+    fontinfo_id2_ = -1;
+    rating_ = 10.0;
+    certainty_ = -1.0;
+    script_id_ = -1;
+    min_xheight_ = 0.0f;
+    max_xheight_ = 0.0f;
+    yshift_ = 0.0f;
+    classifier_ = BCC_FAKE;
+  }
+  BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
+              float src_rating,          // rating
+              float src_cert,            // certainty
+              int script_id,             // script
+              float min_xheight,         // min xheight in image pixel units
+              float max_xheight,         // max xheight allowed by this char
+              float yshift,              // the larger of y shift (top or bottom)
+              BlobChoiceClassifier c);   // adapted match or other
+  BLOB_CHOICE(const BLOB_CHOICE &other);
+  ~BLOB_CHOICE() = default;
+
+  UNICHAR_ID unichar_id() const {
+    return unichar_id_;
+  }
+  float rating() const {
+    return rating_;
+  }
+  float certainty() const {
+    return certainty_;
+  }
+  int16_t fontinfo_id() const {
+    return fontinfo_id_;
+  }
+  int16_t fontinfo_id2() const {
+    return fontinfo_id2_;
+  }
+#ifndef DISABLED_LEGACY_ENGINE
+  const std::vector<ScoredFont> &fonts() const {
+    return fonts_;
+  }
+  void set_fonts(const std::vector<ScoredFont> &fonts) {
+    fonts_ = fonts;
+    int score1 = 0, score2 = 0;
+    fontinfo_id_ = -1;
+    fontinfo_id2_ = -1;
+    for (auto &f : fonts_) {
+      if (f.score > score1) {
+        score2 = score1;
+        fontinfo_id2_ = fontinfo_id_;
+        score1 = f.score;
+        fontinfo_id_ = f.fontinfo_id;
+      } else if (f.score > score2) {
+        score2 = f.score;
+        fontinfo_id2_ = f.fontinfo_id;
+      }
+    }
+  }
+#endif // ndef DISABLED_LEGACY_ENGINE
+  int script_id() const {
+    return script_id_;
+  }
+  const MATRIX_COORD &matrix_cell() {
+    return matrix_cell_;
+  }
+  float min_xheight() const {
+    return min_xheight_;
+  }
+  float max_xheight() const {
+    return max_xheight_;
+  }
+  float yshift() const {
+    return yshift_;
+  }
+  BlobChoiceClassifier classifier() const {
+    return classifier_;
+  }
+  bool IsAdapted() const {
+    return classifier_ == BCC_ADAPTED_CLASSIFIER;
+  }
+  bool IsClassified() const {
+    return classifier_ == BCC_STATIC_CLASSIFIER || classifier_ == BCC_ADAPTED_CLASSIFIER ||
+           classifier_ == BCC_SPECKLE_CLASSIFIER;
+  }
+
+  void set_unichar_id(UNICHAR_ID newunichar_id) {
+    unichar_id_ = newunichar_id;
+  }
+  void set_rating(float newrat) {
+    rating_ = newrat;
+  }
+  void set_certainty(float newrat) {
+    certainty_ = newrat;
+  }
+  void set_script(int newscript_id) {
+    script_id_ = newscript_id;
+  }
+  void set_matrix_cell(int col, int row) {
+    matrix_cell_.col = col;
+    matrix_cell_.row = row;
+  }
+  void set_classifier(BlobChoiceClassifier classifier) {
+    classifier_ = classifier;
+  }
+  static BLOB_CHOICE *deep_copy(const BLOB_CHOICE *src) {
+    auto *choice = new BLOB_CHOICE;
+    *choice = *src;
+    return choice;
+  }
+  // Returns true if *this and other agree on the baseline and x-height
+  // to within some tolerance based on a given estimate of the x-height.
+  bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;
+
+  void print(const UNICHARSET *unicharset) const {
+    tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
+            unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
+  }
+  void print_full() const {
+    print(nullptr);
+    tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
+            fontinfo_id2_, yshift_, classifier_);
+  }
+  // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
+  static int SortByRating(const void *p1, const void *p2) {
+    const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
+    const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
+    return (bc1->rating_ < bc2->rating_) ? -1 : 1;
+  }
+
+private:
+  // Copy assignment operator.
+  BLOB_CHOICE &operator=(const BLOB_CHOICE &other);
+
+  UNICHAR_ID unichar_id_; // unichar id
+#ifndef DISABLED_LEGACY_ENGINE
+  // Fonts and scores. Allowed to be empty.
+  std::vector<ScoredFont> fonts_;
+#endif                   // ndef DISABLED_LEGACY_ENGINE
+  int16_t fontinfo_id_;  // char font information
+  int16_t fontinfo_id2_; // 2nd choice font information
+  // Rating is the classifier distance weighted by the length of the outline
+  // in the blob. In terms of probability, classifier distance is -klog p such
+  // that the resulting distance is in the range [0, 1] and then
+  // rating = w (-k log p) where w is the weight for the length of the outline.
+  // Sums of ratings may be compared meaningfully for words of different
+  // segmentation.
+  float rating_; // size related
+  // Certainty is a number in [-20, 0] indicating the classifier certainty
+  // of the choice. In terms of probability, certainty = 20 (k log p) where
+  // k is defined as above to normalize -klog p to the range [0, 1].
+  float certainty_; // absolute
+  int script_id_;
+  // Holds the position of this choice in the ratings matrix.
+  // Used to location position in the matrix during path backtracking.
+  MATRIX_COORD matrix_cell_;
+  // X-height range (in image pixels) that this classification supports.
+  float min_xheight_;
+  float max_xheight_;
+  // yshift_ - The vertical distance (in image pixels) the character is
+  //           shifted (up or down) from an acceptable y position.
+  float yshift_;
+  BlobChoiceClassifier classifier_; // What generated *this.
+};
+
+// Make BLOB_CHOICE listable.
+ELISTIZEH(BLOB_CHOICE)
+
+// Return the BLOB_CHOICE in bc_list matching a given unichar_id,
+// or nullptr if there is no match.
+BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
+
+// Permuter codes used in WERD_CHOICEs.
+enum PermuterType {
+  NO_PERM,           // 0
+  PUNC_PERM,         // 1
+  TOP_CHOICE_PERM,   // 2
+  LOWER_CASE_PERM,   // 3
+  UPPER_CASE_PERM,   // 4
+  NGRAM_PERM,        // 5
+  NUMBER_PERM,       // 6
+  USER_PATTERN_PERM, // 7
+  SYSTEM_DAWG_PERM,  // 8
+  DOC_DAWG_PERM,     // 9
+  USER_DAWG_PERM,    // 10
+  FREQ_DAWG_PERM,    // 11
+  COMPOUND_PERM,     // 12
+
+  NUM_PERMUTER_TYPES
+};
+
+// ScriptPos tells whether a character is subscript, superscript or normal.
+enum ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP };
+
+const char *ScriptPosToString(ScriptPos script_pos);
+
+class TESS_API WERD_CHOICE : public ELIST_LINK {
+public:
+  static const float kBadRating;
+  static const char *permuter_name(uint8_t permuter);
+
+  WERD_CHOICE(const UNICHARSET *unicharset) : unicharset_(unicharset) {
+    this->init(8);
+  }
+  WERD_CHOICE(const UNICHARSET *unicharset, int reserved) : unicharset_(unicharset) {
+    this->init(reserved);
+  }
+  WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating,
+              float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
+      : unicharset_(&unicharset) {
+    this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter);
+  }
+  WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
+  WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
+    this->init(word.length());
+    this->operator=(word);
+  }
+  ~WERD_CHOICE();
+
+  const UNICHARSET *unicharset() const {
+    return unicharset_;
+  }
+  bool empty() const {
+    return length_ == 0;
+  }
+  inline int length() const {
+    return length_;
+  }
+  float adjust_factor() const {
+    return adjust_factor_;
+  }
+  void set_adjust_factor(float factor) {
+    adjust_factor_ = factor;
+  }
+  inline const std::vector<UNICHAR_ID> &unichar_ids() const {
+    return unichar_ids_;
+  }
+  inline UNICHAR_ID unichar_id(int index) const {
+    assert(index < length_);
+    return unichar_ids_[index];
+  }
+  inline int state(int index) const {
+    return state_[index];
+  }
+  ScriptPos BlobPosition(int index) const {
+    if (index < 0 || index >= length_) {
+      return SP_NORMAL;
+    }
+    return script_pos_[index];
+  }
+  inline float rating() const {
+    return rating_;
+  }
+  inline float certainty() const {
+    return certainty_;
+  }
+  inline float certainty(int index) const {
+    return certainties_[index];
+  }
+  inline float min_x_height() const {
+    return min_x_height_;
+  }
+  inline float max_x_height() const {
+    return max_x_height_;
+  }
+  inline void set_x_heights(float min_height, float max_height) {
+    min_x_height_ = min_height;
+    max_x_height_ = max_height;
+  }
+  inline uint8_t permuter() const {
+    return permuter_;
+  }
+  const char *permuter_name() const;
+  // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
+  // taken from the appropriate cell in the ratings MATRIX.
+  // Borrowed pointer, so do not delete.
+  BLOB_CHOICE_LIST *blob_choices(int index, MATRIX *ratings) const;
+
+  // Returns the MATRIX_COORD corresponding to the location in the ratings
+  // MATRIX for the given index into the word.
+  MATRIX_COORD MatrixCoord(int index) const;
+
+  inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
+    assert(index < length_);
+    unichar_ids_[index] = unichar_id;
+  }
+  bool dangerous_ambig_found() const {
+    return dangerous_ambig_found_;
+  }
+  void set_dangerous_ambig_found_(bool value) {
+    dangerous_ambig_found_ = value;
+  }
+  inline void set_rating(float new_val) {
+    rating_ = new_val;
+  }
+  inline void set_certainty(float new_val) {
+    certainty_ = new_val;
+  }
+  inline void set_permuter(uint8_t perm) {
+    permuter_ = perm;
+  }
+  // Note: this function should only be used if all the fields
+  // are populated manually with set_* functions (rather than
+  // (copy)constructors and append_* functions).
+  inline void set_length(int len) {
+    ASSERT_HOST(reserved_ >= len);
+    length_ = len;
+  }
+
+  /// Make more space in unichar_id_ and fragment_lengths_ arrays.
+  inline void double_the_size() {
+    if (reserved_ > 0) {
+      reserved_ *= 2;
+    } else {
+      reserved_ = 1;
+    }
+    unichar_ids_.resize(reserved_);
+    script_pos_.resize(reserved_);
+    state_.resize(reserved_);
+    certainties_.resize(reserved_);
+  }
+
+  /// Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and
+  /// fragment_length_ arrays. Sets other values to default (blank) values.
+  inline void init(int reserved) {
+    reserved_ = reserved;
+    if (reserved > 0) {
+      unichar_ids_.resize(reserved);
+      script_pos_.resize(reserved);
+      state_.resize(reserved);
+      certainties_.resize(reserved);
+    } else {
+      unichar_ids_.clear();
+      script_pos_.clear();
+      state_.clear();
+      certainties_.clear();
+    }
+    length_ = 0;
+    adjust_factor_ = 1.0f;
+    rating_ = 0.0;
+    certainty_ = FLT_MAX;
+    min_x_height_ = 0.0f;
+    max_x_height_ = FLT_MAX;
+    permuter_ = NO_PERM;
+    unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
+    dangerous_ambig_found_ = false;
+  }
+
+  /// Helper function to build a WERD_CHOICE from the given string,
+  /// fragment lengths, rating, certainty and permuter.
+  /// The function assumes that src_string is not nullptr.
+  /// src_lengths argument could be nullptr, in which case the unichars
+  /// in src_string are assumed to all be of length 1.
+  void init(const char *src_string, const char *src_lengths, float src_rating, float src_certainty,
+            uint8_t src_permuter);
+
+  /// Set the fields in this choice to be default (bad) values.
+  inline void make_bad() {
+    length_ = 0;
+    rating_ = kBadRating;
+    certainty_ = -FLT_MAX;
+  }
+
+  /// This function assumes that there is enough space reserved
+  /// in the WERD_CHOICE for adding another unichar.
+  /// This is an efficient alternative to append_unichar_id().
+  inline void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating,
+                                                float certainty) {
+    assert(reserved_ > length_);
+    length_++;
+    this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_ - 1);
+  }
+
+  void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty);
+
+  inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty,
+                             int index) {
+    assert(index < length_);
+    unichar_ids_[index] = unichar_id;
+    state_[index] = blob_count;
+    certainties_[index] = certainty;
+    script_pos_[index] = SP_NORMAL;
+    rating_ += rating;
+    if (certainty < certainty_) {
+      certainty_ = certainty;
+    }
+  }
+  // Sets the entries for the given index from the BLOB_CHOICE, assuming
+  // unit fragment lengths, but setting the state for this index to blob_count.
+  void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice);
+
+  bool contains_unichar_id(UNICHAR_ID unichar_id) const;
+  void remove_unichar_ids(int index, int num);
+  inline void remove_last_unichar_id() {
+    --length_;
+  }
+  inline void remove_unichar_id(int index) {
+    this->remove_unichar_ids(index, 1);
+  }
+  bool has_rtl_unichar_id() const;
+  void reverse_and_mirror_unichar_ids();
+
+  // Returns the half-open interval of unichar_id indices [start, end) which
+  // enclose the core portion of this word -- the part after stripping
+  // punctuation from the left and right.
+  void punct_stripped(int *start_core, int *end_core) const;
+
+  // Returns the indices [start, end) containing the core of the word, stripped
+  // of any superscript digits on either side. (i.e., the non-footnote part
+  // of the word). There is no guarantee that the output range is non-empty.
+  void GetNonSuperscriptSpan(int *start, int *end) const;
+
+  // Return a copy of this WERD_CHOICE with the choices [start, end).
+  // The result is useful only for checking against a dictionary.
+  WERD_CHOICE shallow_copy(int start, int end) const;
+
+  void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const;
+  std::string debug_string() const {
+    std::string word_str;
+    for (int i = 0; i < length_; ++i) {
+      word_str += unicharset_->debug_str(unichar_ids_[i]);
+      word_str += " ";
+    }
+    return word_str;
+  }
+  // Returns true if any unichar_id in the word is a non-space-delimited char.
+  bool ContainsAnyNonSpaceDelimited() const {
+    for (int i = 0; i < length_; ++i) {
+      if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
+        return true;
+      }
+    }
+    return false;
+  }
+  // Returns true if the word is all spaces.
+  bool IsAllSpaces() const {
+    for (int i = 0; i < length_; ++i) {
+      if (unichar_ids_[i] != UNICHAR_SPACE) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Call this to override the default (strict left to right graphemes)
+  // with the fact that some engine produces a "reading order" set of
+  // Graphemes for each word.
+  bool set_unichars_in_script_order(bool in_script_order) {
+    return unichars_in_script_order_ = in_script_order;
+  }
+
+  bool unichars_in_script_order() const {
+    return unichars_in_script_order_;
+  }
+
+  // Returns a UTF-8 string equivalent to the current choice
+  // of UNICHAR IDs.
+  std::string &unichar_string() {
+    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
+    return unichar_string_;
+  }
+
+  // Returns a UTF-8 string equivalent to the current choice
+  // of UNICHAR IDs.
+  const std::string &unichar_string() const {
+    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
+    return unichar_string_;
+  }
+
+  // Returns the lengths, one byte each, representing the number of bytes
+  // required in the unichar_string for each UNICHAR_ID.
+  const std::string &unichar_lengths() const {
+    this->string_and_lengths(&unichar_string_, &unichar_lengths_);
+    return unichar_lengths_;
+  }
+
+  // Sets up the script_pos_ member using the blobs_list to get the bln
+  // bounding boxes, *this to get the unichars, and this->unicharset
+  // to get the target positions. If small_caps is true, sub/super are not
+  // considered, but dropcaps are.
+  // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
+  void SetScriptPositions(bool small_caps, TWERD *word, int debug = 0);
+  // Sets all the script_pos_ positions to the given position.
+  void SetAllScriptPositions(ScriptPos position);
+
+  static ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset,
+                                    const TBOX &blob_box, UNICHAR_ID unichar_id);
+
+  // Returns the "dominant" script ID for the word.  By "dominant", the script
+  // must account for at least half the characters.  Otherwise, it returns 0.
+  // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
+  int GetTopScriptID() const;
+
+  // Fixes the state_ for a chop at the given blob_posiiton.
+  void UpdateStateForSplit(int blob_position);
+
+  // Returns the sum of all the state elements, being the total number of blobs.
+  int TotalOfStates() const;
+
+  void print() const {
+    this->print("");
+  }
+  void print(const char *msg) const;
+  // Prints the segmentation state with an introductory message.
+  void print_state(const char *msg) const;
+
+  // Displays the segmentation state of *this (if not the same as the last
+  // one displayed) and waits for a click in the window.
+  void DisplaySegmentation(TWERD *word);
+
+  WERD_CHOICE &operator+=(        // concatanate
+      const WERD_CHOICE &second); // second on first
+
+  WERD_CHOICE &operator=(const WERD_CHOICE &source);
+
+private:
+  const UNICHARSET *unicharset_;
+  // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
+  // unichar_ids_ is an array of classifier "results" that make up a word.
+  // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
+  // of each unichar_id.
+  // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
+  // were put together to make the classification results in the ith position
+  // in unichar_ids_, and certainties_[i] is the certainty of the choice that
+  // was used in this word.
+  // == Change from before ==
+  // Previously there was fragment_lengths_ that allowed a word to be
+  // artificially composed of multiple fragment results. Since the new
+  // segmentation search doesn't do fragments, treatment of fragments has
+  // been moved to a lower level, augmenting the ratings matrix with the
+  // combined fragments, and allowing the language-model/segmentation-search
+  // to deal with only the combined unichar_ids.
+  std::vector<UNICHAR_ID> unichar_ids_; // unichar ids that represent the text of the word
+  std::vector<ScriptPos> script_pos_;   // Normal/Sub/Superscript of each unichar.
+  std::vector<int> state_;              // Number of blobs in each unichar.
+  std::vector<float> certainties_;      // Certainty of each unichar.
+  int reserved_;            // size of the above arrays
+  int length_;              // word length
+  // Factor that was used to adjust the rating.
+  float adjust_factor_;
+  // Rating is the sum of the ratings of the individual blobs in the word.
+  float rating_; // size related
+  // certainty is the min (worst) certainty of the individual blobs in the word.
+  float certainty_; // absolute
+  // xheight computed from the result, or 0 if inconsistent.
+  float min_x_height_;
+  float max_x_height_;
+  uint8_t permuter_; // permuter code
+
+  // Normally, the ratings_ matrix represents the recognition results in order
+  // from left-to-right.  However, some engines (say Cube) may return
+  // recognition results in the order of the script's major reading direction
+  // (for Arabic, that is right-to-left).
+  bool unichars_in_script_order_;
+  // True if NoDangerousAmbig found an ambiguity.
+  bool dangerous_ambig_found_;
+
+  // The following variables are populated and passed by reference any
+  // time unichar_string() or unichar_lengths() are called.
+  mutable std::string unichar_string_;
+  mutable std::string unichar_lengths_;
+};
+
+// Make WERD_CHOICE listable.
+ELISTIZEH(WERD_CHOICE)
+using BLOB_CHOICE_LIST_VECTOR = std::vector<BLOB_CHOICE_LIST *>;
+
+// Utilities for comparing WERD_CHOICEs
+
+bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2);
+
+// Utilities for debug printing.
+void print_ratings_list(const char *msg,                     // intro message
+                        BLOB_CHOICE_LIST *ratings,           // list of results
+                        const UNICHARSET &current_unicharset // unicharset that can be used
+                                                             // for id-to-unichar conversion
+);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rect.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rect.cpp
@ -0,0 +1,277 @@
+/**********************************************************************
+ * File:        rect.cpp  (Formerly box.c)
+ * Description: Bounding box class definition.
+ * Author:      Phil Cheatle
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "rect.h"
+
+#include "serialis.h" // for TFile
+
+namespace tesseract {
+
+/**********************************************************************
+ * TBOX::TBOX()  Constructor from 2 ICOORDS
+ *
+ **********************************************************************/
+
+TBOX::TBOX(           // constructor
+    const ICOORD pt1, // one corner
+    const ICOORD pt2  // the other corner
+) {
+  if (pt1.x() <= pt2.x()) {
+    if (pt1.y() <= pt2.y()) {
+      bot_left = pt1;
+      top_right = pt2;
+    } else {
+      bot_left = ICOORD(pt1.x(), pt2.y());
+      top_right = ICOORD(pt2.x(), pt1.y());
+    }
+  } else {
+    if (pt1.y() <= pt2.y()) {
+      bot_left = ICOORD(pt2.x(), pt1.y());
+      top_right = ICOORD(pt1.x(), pt2.y());
+    } else {
+      bot_left = pt2;
+      top_right = pt1;
+    }
+  }
+}
+
+bool TBOX::DeSerialize(TFile *f) {
+  return bot_left.DeSerialize(f) && top_right.DeSerialize(f);
+}
+
+bool TBOX::Serialize(TFile *f) const {
+  return bot_left.Serialize(f) && top_right.Serialize(f);
+}
+
+// rotate_large constructs the containing bounding box of all 4
+// corners after rotating them. It therefore guarantees that all
+// original content is contained within, but also slightly enlarges the box.
+void TBOX::rotate_large(const FCOORD &vec) {
+  ICOORD top_left(bot_left.x(), top_right.y());
+  ICOORD bottom_right(top_right.x(), bot_left.y());
+  top_left.rotate(vec);
+  bottom_right.rotate(vec);
+  rotate(vec);
+  TBOX box2(top_left, bottom_right);
+  *this += box2;
+}
+
+/**********************************************************************
+ * TBOX::intersection()  Build the largest box contained in both boxes
+ *
+ **********************************************************************/
+
+TBOX TBOX::intersection( // shared area box
+    const TBOX &box) const {
+  int16_t left;
+  int16_t bottom;
+  int16_t right;
+  int16_t top;
+  if (overlap(box)) {
+    if (box.bot_left.x() > bot_left.x()) {
+      left = box.bot_left.x();
+    } else {
+      left = bot_left.x();
+    }
+
+    if (box.top_right.x() < top_right.x()) {
+      right = box.top_right.x();
+    } else {
+      right = top_right.x();
+    }
+
+    if (box.bot_left.y() > bot_left.y()) {
+      bottom = box.bot_left.y();
+    } else {
+      bottom = bot_left.y();
+    }
+
+    if (box.top_right.y() < top_right.y()) {
+      top = box.top_right.y();
+    } else {
+      top = top_right.y();
+    }
+  } else {
+    left = INT16_MAX;
+    bottom = INT16_MAX;
+    top = -INT16_MAX;
+    right = -INT16_MAX;
+  }
+  return TBOX(left, bottom, right, top);
+}
+
+/**********************************************************************
+ * TBOX::bounding_union()  Build the smallest box containing both boxes
+ *
+ **********************************************************************/
+
+TBOX TBOX::bounding_union( // box enclosing both
+    const TBOX &box) const {
+  ICOORD bl; // bottom left
+  ICOORD tr; // top right
+
+  if (box.bot_left.x() < bot_left.x()) {
+    bl.set_x(box.bot_left.x());
+  } else {
+    bl.set_x(bot_left.x());
+  }
+
+  if (box.top_right.x() > top_right.x()) {
+    tr.set_x(box.top_right.x());
+  } else {
+    tr.set_x(top_right.x());
+  }
+
+  if (box.bot_left.y() < bot_left.y()) {
+    bl.set_y(box.bot_left.y());
+  } else {
+    bl.set_y(bot_left.y());
+  }
+
+  if (box.top_right.y() > top_right.y()) {
+    tr.set_y(box.top_right.y());
+  } else {
+    tr.set_y(top_right.y());
+  }
+  return TBOX(bl, tr);
+}
+
+/**********************************************************************
+ * TBOX::plot()  Paint a box using specified settings
+ *
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void TBOX::plot(                    // paint box
+    ScrollView *fd,                 // where to paint
+    ScrollView::Color fill_colour,  // colour for inside
+    ScrollView::Color border_colour // colour for border
+    ) const {
+  fd->Brush(fill_colour);
+  fd->Pen(border_colour);
+  plot(fd);
+}
+#endif
+
+// Appends the bounding box as (%d,%d)->(%d,%d) to a string.
+void TBOX::print_to_str(std::string &str) const {
+  // "(%d,%d)->(%d,%d)", left(), bottom(), right(), top()
+  str += "(" + std::to_string(left());
+  str += "," + std::to_string(bottom());
+  str += ")->(" + std::to_string(right());
+  str += "," + std::to_string(top());
+  str += ')';
+}
+
+// Writes to the given file. Returns false in case of error.
+bool TBOX::Serialize(FILE *fp) const {
+  if (!bot_left.Serialize(fp)) {
+    return false;
+  }
+  if (!top_right.Serialize(fp)) {
+    return false;
+  }
+  return true;
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool TBOX::DeSerialize(bool swap, FILE *fp) {
+  if (!bot_left.DeSerialize(swap, fp)) {
+    return false;
+  }
+  if (!top_right.DeSerialize(swap, fp)) {
+    return false;
+  }
+  return true;
+}
+
+/**********************************************************************
+ * operator+=
+ *
+ * Extend one box to include the other  (In place union)
+ **********************************************************************/
+
+TBOX &operator+=( // bounding bounding bx
+    TBOX &op1,    // operands
+    const TBOX &op2) {
+  if (op2.bot_left.x() < op1.bot_left.x()) {
+    op1.bot_left.set_x(op2.bot_left.x());
+  }
+
+  if (op2.top_right.x() > op1.top_right.x()) {
+    op1.top_right.set_x(op2.top_right.x());
+  }
+
+  if (op2.bot_left.y() < op1.bot_left.y()) {
+    op1.bot_left.set_y(op2.bot_left.y());
+  }
+
+  if (op2.top_right.y() > op1.top_right.y()) {
+    op1.top_right.set_y(op2.top_right.y());
+  }
+
+  return op1;
+}
+
+/**********************************************************************
+ * operator&=
+ *
+ * Reduce one box to intersection with the other  (In place intersection)
+ **********************************************************************/
+
+TBOX &operator&=(TBOX &op1, const TBOX &op2) {
+  if (op1.overlap(op2)) {
+    if (op2.bot_left.x() > op1.bot_left.x()) {
+      op1.bot_left.set_x(op2.bot_left.x());
+    }
+
+    if (op2.top_right.x() < op1.top_right.x()) {
+      op1.top_right.set_x(op2.top_right.x());
+    }
+
+    if (op2.bot_left.y() > op1.bot_left.y()) {
+      op1.bot_left.set_y(op2.bot_left.y());
+    }
+
+    if (op2.top_right.y() < op1.top_right.y()) {
+      op1.top_right.set_y(op2.top_right.y());
+    }
+  } else {
+    op1.bot_left.set_x(INT16_MAX);
+    op1.bot_left.set_y(INT16_MAX);
+    op1.top_right.set_x(-INT16_MAX);
+    op1.top_right.set_y(-INT16_MAX);
+  }
+  return op1;
+}
+
+bool TBOX::x_almost_equal(const TBOX &box, int tolerance) const {
+  return (abs(left() - box.left()) <= tolerance && abs(right() - box.right()) <= tolerance);
+}
+
+bool TBOX::almost_equal(const TBOX &box, int tolerance) const {
+  return (abs(left() - box.left()) <= tolerance && abs(right() - box.right()) <= tolerance &&
+          abs(top() - box.top()) <= tolerance && abs(bottom() - box.bottom()) <= tolerance);
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rect.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rect.h
@ -0,0 +1,503 @@
+/**********************************************************************
+ * File:        rect.h  (Formerly box.h)
+ * Description: Bounding box class definition.
+ * Author:      Phil Cheatle
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef RECT_H
+#define RECT_H
+
+#include "points.h"     // for ICOORD, FCOORD
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+#include "tprintf.h"    // for tprintf
+
+#include <tesseract/export.h> // for DLLSYM
+
+#include <algorithm> // for std::max, std::min
+#include <cmath>     // for std::ceil, std::floor
+#include <cstdint>   // for INT16_MAX
+#include <cstdio>    // for FILE
+#include <string>    // for std::string
+
+namespace tesseract {
+
+class TESS_API TBOX { // bounding box
+public:
+  TBOX()
+      : // empty constructor making a null box
+      bot_left(INT16_MAX, INT16_MAX)
+      , top_right(-INT16_MAX, -INT16_MAX) {}
+
+  TBOX(                  // constructor
+      const ICOORD pt1,  // one corner
+      const ICOORD pt2); // the other corner
+
+  //*********************************************************************
+  // TBOX::TBOX()  Constructor from 4 integer values.
+  //  Note: It is caller's responsibility to provide values
+  //        in the right order.
+  //*********************************************************************
+  TBOX( // constructor
+      int16_t left, int16_t bottom, int16_t right, int16_t top)
+      : bot_left(left, bottom), top_right(right, top) {}
+
+  TBOX( // box around FCOORD
+      const FCOORD pt);
+
+  bool null_box() const { // Is box null
+    return ((left() >= right()) || (top() <= bottom()));
+  }
+
+  bool operator==(const TBOX &other) const {
+    return bot_left == other.bot_left && top_right == other.top_right;
+  }
+
+  int16_t top() const { // coord of top
+    return top_right.y();
+  }
+  void set_top(int y) {
+    top_right.set_y(y);
+  }
+
+  int16_t bottom() const { // coord of bottom
+    return bot_left.y();
+  }
+  void set_bottom(int y) {
+    bot_left.set_y(y);
+  }
+
+  int16_t left() const { // coord of left
+    return bot_left.x();
+  }
+  void set_left(int x) {
+    bot_left.set_x(x);
+  }
+
+  int16_t right() const { // coord of right
+    return top_right.x();
+  }
+  void set_right(int x) {
+    top_right.set_x(x);
+  }
+  int x_middle() const {
+    return (bot_left.x() + top_right.x()) / 2;
+  }
+  int y_middle() const {
+    return (bot_left.y() + top_right.y()) / 2;
+  }
+
+  const ICOORD &botleft() const { // access function
+    return bot_left;
+  }
+
+  ICOORD botright() const { // ~ access function
+    return ICOORD(top_right.x(), bot_left.y());
+  }
+
+  ICOORD topleft() const { // ~ access function
+    return ICOORD(bot_left.x(), top_right.y());
+  }
+
+  const ICOORD &topright() const { // access function
+    return top_right;
+  }
+
+  int16_t height() const { // how high is it?
+    if (!null_box()) {
+      return top_right.y() - bot_left.y();
+    } else {
+      return 0;
+    }
+  }
+
+  int16_t width() const { // how high is it?
+    if (!null_box()) {
+      return top_right.x() - bot_left.x();
+    } else {
+      return 0;
+    }
+  }
+
+  int32_t area() const { // what is the area?
+    if (!null_box()) {
+      return width() * height();
+    } else {
+      return 0;
+    }
+  }
+
+  // Pads the box on either side by the supplied x,y pad amounts.
+  // NO checks for exceeding any bounds like 0 or an image size.
+  void pad(int xpad, int ypad) {
+    ICOORD pad(xpad, ypad);
+    bot_left -= pad;
+    top_right += pad;
+  }
+
+  void move_bottom_edge( // move one edge
+      const int16_t y) { // by +/- y
+    bot_left += ICOORD(0, y);
+  }
+
+  void move_left_edge(   // move one edge
+      const int16_t x) { // by +/- x
+    bot_left += ICOORD(x, 0);
+  }
+
+  void move_right_edge(  // move one edge
+      const int16_t x) { // by +/- x
+    top_right += ICOORD(x, 0);
+  }
+
+  void move_top_edge(    // move one edge
+      const int16_t y) { // by +/- y
+    top_right += ICOORD(0, y);
+  }
+
+  void move(              // move box
+      const ICOORD vec) { // by vector
+    bot_left += vec;
+    top_right += vec;
+  }
+
+  void move(              // move box
+      const FCOORD vec) { // by float vector
+    bot_left.set_x(static_cast<int16_t>(std::floor(bot_left.x() + vec.x())));
+    // round left
+    bot_left.set_y(static_cast<int16_t>(std::floor(bot_left.y() + vec.y())));
+    // round down
+    top_right.set_x(static_cast<int16_t>(std::ceil(top_right.x() + vec.x())));
+    // round right
+    top_right.set_y(static_cast<int16_t>(std::ceil(top_right.y() + vec.y())));
+    // round up
+  }
+
+  void scale(          // scale box
+      const float f) { // by multiplier
+    // round left
+    bot_left.set_x(static_cast<int16_t>(std::floor(bot_left.x() * f)));
+    // round down
+    bot_left.set_y(static_cast<int16_t>(std::floor(bot_left.y() * f)));
+    // round right
+    top_right.set_x(static_cast<int16_t>(std::ceil(top_right.x() * f)));
+    // round up
+    top_right.set_y(static_cast<int16_t>(std::ceil(top_right.y() * f)));
+  }
+  void scale(             // scale box
+      const FCOORD vec) { // by float vector
+    bot_left.set_x(static_cast<int16_t>(std::floor(bot_left.x() * vec.x())));
+    bot_left.set_y(static_cast<int16_t>(std::floor(bot_left.y() * vec.y())));
+    top_right.set_x(static_cast<int16_t>(std::ceil(top_right.x() * vec.x())));
+    top_right.set_y(static_cast<int16_t>(std::ceil(top_right.y() * vec.y())));
+  }
+
+  // rotate doesn't enlarge the box - it just rotates the bottom-left
+  // and top-right corners. Use rotate_large if you want to guarantee
+  // that all content is contained within the rotated box.
+  void rotate(const FCOORD &vec) { // by vector
+    bot_left.rotate(vec);
+    top_right.rotate(vec);
+    *this = TBOX(bot_left, top_right);
+  }
+  // rotate_large constructs the containing bounding box of all 4
+  // corners after rotating them. It therefore guarantees that all
+  // original content is contained within, but also slightly enlarges the box.
+  void rotate_large(const FCOORD &vec);
+
+  bool contains( // is pt inside box
+      const FCOORD pt) const;
+
+  bool contains( // is box inside box
+      const TBOX &box) const;
+
+  bool overlap( // do boxes overlap
+      const TBOX &box) const;
+
+  bool major_overlap( // do boxes overlap more than half
+      const TBOX &box) const;
+
+  // Do boxes overlap on x axis.
+  bool x_overlap(const TBOX &box) const;
+
+  // Return the horizontal gap between the boxes. If the boxes
+  // overlap horizontally then the return value is negative, indicating
+  // the amount of the overlap.
+  int x_gap(const TBOX &box) const {
+    return std::max(bot_left.x(), box.bot_left.x()) - std::min(top_right.x(), box.top_right.x());
+  }
+
+  // Return the vertical gap between the boxes. If the boxes
+  // overlap vertically then the return value is negative, indicating
+  // the amount of the overlap.
+  int y_gap(const TBOX &box) const {
+    return std::max(bot_left.y(), box.bot_left.y()) - std::min(top_right.y(), box.top_right.y());
+  }
+
+  // Do boxes overlap on x axis by more than
+  // half of the width of the narrower box.
+  bool major_x_overlap(const TBOX &box) const;
+
+  // Do boxes overlap on y axis.
+  bool y_overlap(const TBOX &box) const;
+
+  // Do boxes overlap on y axis by more than
+  // half of the height of the shorter box.
+  bool major_y_overlap(const TBOX &box) const;
+
+  // fraction of current box's area covered by other
+  double overlap_fraction(const TBOX &box) const;
+
+  // fraction of the current box's projected area covered by the other's
+  double x_overlap_fraction(const TBOX &box) const;
+
+  // fraction of the current box's projected area covered by the other's
+  double y_overlap_fraction(const TBOX &box) const;
+
+  // Returns true if the boxes are almost equal on x axis.
+  bool x_almost_equal(const TBOX &box, int tolerance) const;
+
+  // Returns true if the boxes are almost equal
+  bool almost_equal(const TBOX &box, int tolerance) const;
+
+  TBOX intersection( // shared area box
+      const TBOX &box) const;
+
+  TBOX bounding_union( // box enclosing both
+      const TBOX &box) const;
+
+  // Sets the box boundaries to the given coordinates.
+  void set_to_given_coords(int x_min, int y_min, int x_max, int y_max) {
+    bot_left.set_x(x_min);
+    bot_left.set_y(y_min);
+    top_right.set_x(x_max);
+    top_right.set_y(y_max);
+  }
+
+  void print() const { // print
+    tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), top());
+  }
+  // Appends the bounding box as (%d,%d)->(%d,%d) to a string.
+  void print_to_str(std::string &str) const;
+
+#ifndef GRAPHICS_DISABLED
+  void plot(                  // use current settings
+      ScrollView *fd) const { // where to paint
+    fd->Rectangle(bot_left.x(), bot_left.y(), top_right.x(), top_right.y());
+  }
+
+  void plot(                                  // paint box
+      ScrollView *fd,                         // where to paint
+      ScrollView::Color fill_colour,          // colour for inside
+      ScrollView::Color border_colour) const; // colour for border
+#endif
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE *fp) const;
+  bool Serialize(TFile *fp) const;
+
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE *fp);
+  bool DeSerialize(TFile *fp);
+
+  friend TBOX &operator+=(TBOX &, const TBOX &);
+  // in place union
+  friend TBOX &operator&=(TBOX &, const TBOX &);
+  // in place intersection
+
+private:
+  ICOORD bot_left;  // bottom left corner
+  ICOORD top_right; // top right corner
+};
+
+/**********************************************************************
+ * TBOX::TBOX()  Constructor from 1 FCOORD
+ *
+ **********************************************************************/
+
+inline TBOX::TBOX(  // constructor
+    const FCOORD pt // floating centre
+) {
+  bot_left =
+      ICOORD(static_cast<int16_t>(std::floor(pt.x())), static_cast<int16_t>(std::floor(pt.y())));
+  top_right =
+      ICOORD(static_cast<int16_t>(std::ceil(pt.x())), static_cast<int16_t>(std::ceil(pt.y())));
+}
+
+/**********************************************************************
+ * TBOX::contains()  Is point within box
+ *
+ **********************************************************************/
+
+inline bool TBOX::contains(const FCOORD pt) const {
+  return ((pt.x() >= bot_left.x()) && (pt.x() <= top_right.x()) && (pt.y() >= bot_left.y()) &&
+          (pt.y() <= top_right.y()));
+}
+
+/**********************************************************************
+ * TBOX::contains()  Is box within box
+ *
+ **********************************************************************/
+
+inline bool TBOX::contains(const TBOX &box) const {
+  return (contains(box.bot_left) && contains(box.top_right));
+}
+
+/**********************************************************************
+ * TBOX::overlap()  Do two boxes overlap?
+ *
+ **********************************************************************/
+
+inline bool TBOX::overlap( // do boxes overlap
+    const TBOX &box) const {
+  return ((box.bot_left.x() <= top_right.x()) && (box.top_right.x() >= bot_left.x()) &&
+          (box.bot_left.y() <= top_right.y()) && (box.top_right.y() >= bot_left.y()));
+}
+
+/**********************************************************************
+ * TBOX::major_overlap()  Do two boxes overlap by at least half of the smallest?
+ *
+ **********************************************************************/
+
+inline bool TBOX::major_overlap( // Do boxes overlap more that half.
+    const TBOX &box) const {
+  int overlap = std::min(box.top_right.x(), top_right.x());
+  overlap -= std::max(box.bot_left.x(), bot_left.x());
+  overlap += overlap;
+  if (overlap < std::min(box.width(), width())) {
+    return false;
+  }
+  overlap = std::min(box.top_right.y(), top_right.y());
+  overlap -= std::max(box.bot_left.y(), bot_left.y());
+  overlap += overlap;
+  if (overlap < std::min(box.height(), height())) {
+    return false;
+  }
+  return true;
+}
+
+/**********************************************************************
+ * TBOX::overlap_fraction()  Fraction of area covered by the other box
+ *
+ **********************************************************************/
+
+inline double TBOX::overlap_fraction(const TBOX &box) const {
+  double fraction = 0.0;
+  if (this->area()) {
+    fraction = this->intersection(box).area() * 1.0 / this->area();
+  }
+  return fraction;
+}
+
+/**********************************************************************
+ * TBOX::x_overlap()  Do two boxes overlap on x-axis
+ *
+ **********************************************************************/
+
+inline bool TBOX::x_overlap(const TBOX &box) const {
+  return ((box.bot_left.x() <= top_right.x()) && (box.top_right.x() >= bot_left.x()));
+}
+
+/**********************************************************************
+ * TBOX::major_x_overlap()  Do two boxes overlap by more than half the
+ *                          width of the narrower box on the x-axis
+ *
+ **********************************************************************/
+
+inline bool TBOX::major_x_overlap(const TBOX &box) const {
+  int16_t overlap = box.width();
+  if (this->left() > box.left()) {
+    overlap -= this->left() - box.left();
+  }
+  if (this->right() < box.right()) {
+    overlap -= box.right() - this->right();
+  }
+  return (overlap >= box.width() / 2 || overlap >= this->width() / 2);
+}
+
+/**********************************************************************
+ * TBOX::y_overlap()  Do two boxes overlap on y-axis
+ *
+ **********************************************************************/
+
+inline bool TBOX::y_overlap(const TBOX &box) const {
+  return ((box.bot_left.y() <= top_right.y()) && (box.top_right.y() >= bot_left.y()));
+}
+
+/**********************************************************************
+ * TBOX::major_y_overlap()  Do two boxes overlap by more than half the
+ *                          height of the shorter box on the y-axis
+ *
+ **********************************************************************/
+
+inline bool TBOX::major_y_overlap(const TBOX &box) const {
+  int16_t overlap = box.height();
+  if (this->bottom() > box.bottom()) {
+    overlap -= this->bottom() - box.bottom();
+  }
+  if (this->top() < box.top()) {
+    overlap -= box.top() - this->top();
+  }
+  return (overlap >= box.height() / 2 || overlap >= this->height() / 2);
+}
+
+/**********************************************************************
+ * TBOX::x_overlap_fraction() Calculates the horizontal overlap of the
+ *                            given boxes as a fraction of this boxes
+ *                            width.
+ *
+ **********************************************************************/
+
+inline double TBOX::x_overlap_fraction(const TBOX &other) const {
+  int low = std::max(left(), other.left());
+  int high = std::min(right(), other.right());
+  int width = right() - left();
+  if (width == 0) {
+    int x = left();
+    if (other.left() <= x && x <= other.right()) {
+      return 1.0;
+    } else {
+      return 0.0;
+    }
+  } else {
+    return std::max(0.0, static_cast<double>(high - low) / width);
+  }
+}
+
+/**********************************************************************
+ * TBOX::y_overlap_fraction() Calculates the vertical overlap of the
+ *                            given boxes as a fraction of this boxes
+ *                            height.
+ *
+ **********************************************************************/
+
+inline double TBOX::y_overlap_fraction(const TBOX &other) const {
+  int low = std::max(bottom(), other.bottom());
+  int high = std::min(top(), other.top());
+  int height = top() - bottom();
+  if (height == 0) {
+    int y = bottom();
+    if (other.bottom() <= y && y <= other.top()) {
+      return 1.0;
+    } else {
+      return 0.0;
+    }
+  } else {
+    return std::max(0.0, static_cast<double>(high - low) / height);
+  }
+}
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rejctmap.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rejctmap.cpp
@ -0,0 +1,251 @@
+/**********************************************************************
+ * File:        rejctmap.cpp  (Formerly rejmap.c)
+ * Description: REJ and REJMAP class functions.
+ * Author:      Phil Cheatle
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "rejctmap.h"
+
+#include <memory>
+
+#include "params.h"
+
+namespace tesseract {
+
+void REJ::full_print(FILE *fp) const {
+  fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F");
+  fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F");
+  fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F");
+  fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F");
+  fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F");
+  fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F");
+  fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F");
+  fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F");
+  fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F");
+  fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n",
+          flag(R_NOT_TESS_ACCEPTED) ? "T" : "F");
+  fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F");
+  fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F");
+  fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F");
+  fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F");
+  fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F");
+  fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F");
+  fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F");
+  fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F");
+  fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F");
+  fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F");
+  fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F");
+  fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F");
+  fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F");
+  fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F");
+  fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F");
+  fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F");
+  fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
+          flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
+}
+
+REJMAP &REJMAP::operator=(const REJMAP &source) {
+  initialise(source.len);
+  for (int i = 0; i < len; i++) {
+    ptr[i] = source.ptr[i];
+  }
+  return *this;
+}
+
+void REJMAP::initialise(int16_t length) {
+  ptr = std::make_unique<REJ[]>(length);
+  len = length;
+}
+
+int16_t REJMAP::accept_count() const { // How many accepted?
+  int i;
+  int16_t count = 0;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      count++;
+    }
+  }
+  return count;
+}
+
+bool REJMAP::recoverable_rejects() const { // Any non perm rejs?
+  for (int i = 0; i < len; i++) {
+    if (ptr[i].recoverable()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool REJMAP::quality_recoverable_rejects() const { // Any potential rejs?
+  for (int i = 0; i < len; i++) {
+    if (ptr[i].accept_if_good_quality()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void REJMAP::remove_pos( // Cut out an element
+    int16_t pos          // element to remove
+) {
+  ASSERT_HOST(pos >= 0);
+  ASSERT_HOST(pos < len);
+  ASSERT_HOST(len > 0);
+
+  len--;
+  for (; pos < len; pos++) {
+    ptr[pos] = ptr[pos + 1];
+  }
+}
+
+void REJMAP::print(FILE *fp) const {
+  int i;
+  char buff[512];
+
+  for (i = 0; i < len; i++) {
+    buff[i] = ptr[i].display_char();
+  }
+  buff[i] = '\0';
+  fprintf(fp, "\"%s\"", buff);
+}
+
+void REJMAP::full_print(FILE *fp) const {
+  int i;
+
+  for (i = 0; i < len; i++) {
+    ptr[i].full_print(fp);
+    fprintf(fp, "\n");
+  }
+}
+
+void REJMAP::rej_word_small_xht() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    ptr[i].setrej_small_xht();
+  }
+}
+
+void REJMAP::rej_word_tess_failure() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    ptr[i].setrej_tess_failure();
+  }
+}
+
+void REJMAP::rej_word_not_tess_accepted() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_not_tess_accepted();
+    }
+  }
+}
+
+void REJMAP::rej_word_contains_blanks() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_contains_blanks();
+    }
+  }
+}
+
+void REJMAP::rej_word_bad_permuter() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_bad_permuter();
+    }
+  }
+}
+
+void REJMAP::rej_word_xht_fixup() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_xht_fixup();
+    }
+  }
+}
+
+void REJMAP::rej_word_no_alphanums() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_no_alphanums();
+    }
+  }
+}
+
+void REJMAP::rej_word_mostly_rej() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_mostly_rej();
+    }
+  }
+}
+
+void REJMAP::rej_word_bad_quality() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_bad_quality();
+    }
+  }
+}
+
+void REJMAP::rej_word_doc_rej() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_doc_rej();
+    }
+  }
+}
+
+void REJMAP::rej_word_block_rej() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_block_rej();
+    }
+  }
+}
+
+void REJMAP::rej_word_row_rej() { // Reject whole word
+  int i;
+
+  for (i = 0; i < len; i++) {
+    if (ptr[i].accepted()) {
+      ptr[i].setrej_row_rej();
+    }
+  }
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rejctmap.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/rejctmap.h
@ -0,0 +1,376 @@
+/**********************************************************************
+ * File:        rejctmap.h  (Formerly rejmap.h)
+ * Description: REJ and REJMAP class functions.
+ * Author:    Phil Cheatle
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+
+This module may look unnecessarily verbose, but here's the philosophy...
+
+ALL processing of the reject map is done in this module. There are lots of
+separate calls to set reject/accept flags. These have DELIBERATELY been kept
+distinct so that this module can decide what to do.
+
+Basically, there is a flag for each sort of rejection or acceptance. This
+provides a history of what has happened to EACH character.
+
+Determining whether a character is CURRENTLY rejected depends on implicit
+understanding of the SEQUENCE of possible calls. The flags are defined and
+grouped in the REJ_FLAGS enum. These groupings are used in determining a
+characters CURRENT rejection status. Basically, a character is ACCEPTED if
+
+    none of the permanent rej flags are set
+  AND (    the character has never been rejected
+      OR an accept flag is set which is LATER than the latest reject flag )
+
+IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
+OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
+**********************************************************************/
+
+#ifndef REJCTMAP_H
+#define REJCTMAP_H
+
+#include "errcode.h"
+#include "params.h"
+
+#include <bitset>
+#include <memory>
+
+namespace tesseract {
+
+enum REJ_FLAGS {
+  /* Reject modes which are NEVER overridden */
+  R_TESS_FAILURE,   // PERM Tess didn't classify
+  R_SMALL_XHT,      // PERM Xht too small
+  R_EDGE_CHAR,      // PERM Too close to edge of image
+  R_1IL_CONFLICT,   // PERM 1Il confusion
+  R_POSTNN_1IL,     // PERM 1Il unrejected by NN
+  R_REJ_CBLOB,      // PERM Odd blob
+  R_MM_REJECT,      // PERM Matrix match rejection (m's)
+  R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
+
+  /* Initial reject modes (pre NN_ACCEPT) */
+  R_POOR_MATCH,        // TEMP Ray's original heuristic (Not used)
+  R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
+  R_CONTAINS_BLANKS,   // TEMP Tess failed on other chs in WERD
+  R_BAD_PERMUTER,      // POTENTIAL Bad permuter for WERD
+
+  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
+  R_HYPHEN,       // TEMP Post NN dodgy hyphen or full stop
+  R_DUBIOUS,      // TEMP Post NN dodgy chars
+  R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
+  R_MOSTLY_REJ,   // TEMP Most of word rejected so rej the rest
+  R_XHT_FIXUP,    // TEMP Xht tests unsure
+
+  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
+  R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
+
+  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
+  R_DOC_REJ,   // TEMP Document rejection
+  R_BLOCK_REJ, // TEMP Block rejection
+  R_ROW_REJ,   // TEMP Row rejection
+  R_UNLV_REJ,  // TEMP ~ turned to - or ^ turned to space
+
+  /* Accept modes which occur between the above rejection groups */
+  R_NN_ACCEPT,         // NN acceptance
+  R_HYPHEN_ACCEPT,     // Hyphen acceptance
+  R_MM_ACCEPT,         // Matrix match acceptance
+  R_QUALITY_ACCEPT,    // Accept word in good quality doc
+  R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
+};
+
+/* REJECT MAP VALUES */
+
+#define MAP_ACCEPT '1'
+#define MAP_REJECT_PERM '0'
+#define MAP_REJECT_TEMP '2'
+#define MAP_REJECT_POTENTIAL '3'
+
+class REJ {
+  std::bitset<32> flags;
+
+  void set_flag(REJ_FLAGS rej_flag) {
+    flags.set(rej_flag);
+  }
+
+public:
+  REJ() = default;
+
+  REJ( // classwise copy
+      const REJ &source) {
+    flags = source.flags;
+  }
+
+  REJ &operator=( // assign REJ
+      const REJ &source) = default;
+
+  bool flag(REJ_FLAGS rej_flag) const {
+    return flags[rej_flag];
+  }
+
+  char display_char() const {
+    if (perm_rejected()) {
+      return MAP_REJECT_PERM;
+    } else if (accept_if_good_quality()) {
+      return MAP_REJECT_POTENTIAL;
+    } else if (rejected()) {
+      return MAP_REJECT_TEMP;
+    } else {
+      return MAP_ACCEPT;
+    }
+  }
+
+  bool perm_rejected() const { // Is char perm reject?
+    return (flag(R_TESS_FAILURE) || flag(R_SMALL_XHT) || flag(R_EDGE_CHAR) ||
+            flag(R_1IL_CONFLICT) || flag(R_POSTNN_1IL) || flag(R_REJ_CBLOB) ||
+            flag(R_BAD_REPETITION) || flag(R_MM_REJECT));
+  }
+
+private:
+  bool rej_before_nn_accept() const {
+    return flag(R_POOR_MATCH) || flag(R_NOT_TESS_ACCEPTED) ||
+           flag(R_CONTAINS_BLANKS) || flag(R_BAD_PERMUTER);
+  }
+
+  bool rej_between_nn_and_mm() const {
+    return flag(R_HYPHEN) || flag(R_DUBIOUS) || flag(R_NO_ALPHANUMS) ||
+           flag(R_MOSTLY_REJ) || flag(R_XHT_FIXUP);
+  }
+
+  bool rej_between_mm_and_quality_accept() const {
+    return flag(R_BAD_QUALITY);
+  }
+
+  bool rej_between_quality_and_minimal_rej_accept() const {
+    return flag(R_DOC_REJ) || flag(R_BLOCK_REJ) || flag(R_ROW_REJ) ||
+           flag(R_UNLV_REJ);
+  }
+
+  bool rej_before_mm_accept() const {
+    return rej_between_nn_and_mm() ||
+           (rej_before_nn_accept() && !flag(R_NN_ACCEPT) &&
+            !flag(R_HYPHEN_ACCEPT));
+  }
+
+  bool rej_before_quality_accept() const {
+    return rej_between_mm_and_quality_accept() ||
+           (!flag(R_MM_ACCEPT) && rej_before_mm_accept());
+  }
+
+public:
+  bool rejected() const { // Is char rejected?
+    if (flag(R_MINIMAL_REJ_ACCEPT)) {
+      return false;
+    } else {
+      return (perm_rejected() || rej_between_quality_and_minimal_rej_accept() ||
+              (!flag(R_QUALITY_ACCEPT) && rej_before_quality_accept()));
+    }
+  }
+
+  bool accept_if_good_quality() const { // potential rej?
+    return (rejected() && !perm_rejected() && flag(R_BAD_PERMUTER) &&
+            !flag(R_POOR_MATCH) && !flag(R_NOT_TESS_ACCEPTED) &&
+            !flag(R_CONTAINS_BLANKS) &&
+            (!rej_between_nn_and_mm() && !rej_between_mm_and_quality_accept() &&
+             !rej_between_quality_and_minimal_rej_accept()));
+  }
+
+  void setrej_tess_failure() { // Tess generated blank
+    set_flag(R_TESS_FAILURE);
+  }
+
+  void setrej_small_xht() { // Small xht char/wd
+    set_flag(R_SMALL_XHT);
+  }
+
+  void setrej_edge_char() { // Close to image edge
+    set_flag(R_EDGE_CHAR);
+  }
+
+  void setrej_1Il_conflict() { // Initial reject map
+    set_flag(R_1IL_CONFLICT);
+  }
+
+  void setrej_postNN_1Il() { // 1Il after NN
+    set_flag(R_POSTNN_1IL);
+  }
+
+  void setrej_rej_cblob() { // Insert duff blob
+    set_flag(R_REJ_CBLOB);
+  }
+
+  void setrej_mm_reject() { // Matrix matcher
+    set_flag(R_MM_REJECT);
+  }
+
+  void setrej_bad_repetition() { // Odd repeated char
+    set_flag(R_BAD_REPETITION);
+  }
+
+  void setrej_poor_match() { // Failed Rays heuristic
+    set_flag(R_POOR_MATCH);
+  }
+
+  void setrej_not_tess_accepted() {
+    // TEMP reject_word
+    set_flag(R_NOT_TESS_ACCEPTED);
+  }
+
+  void setrej_contains_blanks() {
+    // TEMP reject_word
+    set_flag(R_CONTAINS_BLANKS);
+  }
+
+  void setrej_bad_permuter() { // POTENTIAL reject_word
+    set_flag(R_BAD_PERMUTER);
+  }
+
+  void setrej_hyphen() { // PostNN dubious hyphen or .
+    set_flag(R_HYPHEN);
+  }
+
+  void setrej_dubious() { // PostNN dubious limit
+    set_flag(R_DUBIOUS);
+  }
+
+  void setrej_no_alphanums() { // TEMP reject_word
+    set_flag(R_NO_ALPHANUMS);
+  }
+
+  void setrej_mostly_rej() { // TEMP reject_word
+    set_flag(R_MOSTLY_REJ);
+  }
+
+  void setrej_xht_fixup() { // xht fixup
+    set_flag(R_XHT_FIXUP);
+  }
+
+  void setrej_bad_quality() { // TEMP reject_word
+    set_flag(R_BAD_QUALITY);
+  }
+
+  void setrej_doc_rej() { // TEMP reject_word
+    set_flag(R_DOC_REJ);
+  }
+
+  void setrej_block_rej() { // TEMP reject_word
+    set_flag(R_BLOCK_REJ);
+  }
+
+  void setrej_row_rej() { // TEMP reject_word
+    set_flag(R_ROW_REJ);
+  }
+
+  void setrej_unlv_rej() { // TEMP reject_word
+    set_flag(R_UNLV_REJ);
+  }
+
+  void setrej_hyphen_accept() { // NN Flipped a char
+    set_flag(R_HYPHEN_ACCEPT);
+  }
+
+  void setrej_nn_accept() { // NN Flipped a char
+    set_flag(R_NN_ACCEPT);
+  }
+
+  void setrej_mm_accept() { // Matrix matcher
+    set_flag(R_MM_ACCEPT);
+  }
+
+  void setrej_quality_accept() { // Quality flip a char
+    set_flag(R_QUALITY_ACCEPT);
+  }
+
+  void setrej_minimal_rej_accept() {
+    // Accept all except blank
+    set_flag(R_MINIMAL_REJ_ACCEPT);
+  }
+
+  bool accepted() const { // Is char accepted?
+    return !rejected();
+  }
+
+  bool recoverable() const {
+    return (rejected() && !perm_rejected());
+  }
+
+  void full_print(FILE *fp) const;
+};
+
+class REJMAP {
+  std::unique_ptr<REJ[]> ptr; // ptr to the chars
+  int16_t len;                // Number of chars
+
+public:
+  REJMAP() : len(0) {}
+
+  REJMAP(const REJMAP &rejmap) {
+    *this = rejmap;
+  }
+
+  REJMAP &operator=(const REJMAP &source);
+
+  // Sets up the ptr array to length, whatever it was before.
+  void initialise(int16_t length);
+
+  REJ &operator[](         // access function
+      int16_t index) const // map index
+  {
+    ASSERT_HOST(index < len);
+    return ptr[index]; // no bounds checks
+  }
+
+  int32_t length() const { // map length
+    return len;
+  }
+
+  int16_t accept_count() const; // How many accepted?
+
+  int16_t reject_count() const { // How many rejects?
+    return len - accept_count();
+  }
+
+  void remove_pos(  // Cut out an element
+      int16_t pos); // element to remove
+
+  void print(FILE *fp) const;
+
+  void full_print(FILE *fp) const;
+
+  bool recoverable_rejects() const; // Any non perm rejs?
+
+  bool quality_recoverable_rejects() const;
+  // Any potential rejs?
+
+  void rej_word_small_xht(); // Reject whole word
+                             // Reject whole word
+  void rej_word_tess_failure();
+  void rej_word_not_tess_accepted();
+  // Reject whole word
+  // Reject whole word
+  void rej_word_contains_blanks();
+  // Reject whole word
+  void rej_word_bad_permuter();
+  void rej_word_xht_fixup(); // Reject whole word
+                             // Reject whole word
+  void rej_word_no_alphanums();
+  void rej_word_mostly_rej();  // Reject whole word
+  void rej_word_bad_quality(); // Reject whole word
+  void rej_word_doc_rej();     // Reject whole word
+  void rej_word_block_rej();   // Reject whole word
+  void rej_word_row_rej();     // Reject whole word
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/seam.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/seam.cpp
@ -0,0 +1,275 @@
+/******************************************************************************
+ *
+ * File:         seam.cpp  (Formerly seam.c)
+ * Author:       Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+/*----------------------------------------------------------------------
+              I n c l u d e s
+----------------------------------------------------------------------*/
+#include "seam.h"
+
+#include "blobs.h"
+#include "tprintf.h"
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------
+        Public Function Code
+----------------------------------------------------------------------*/
+
+// Returns the bounding box of all the points in the seam.
+TBOX SEAM::bounding_box() const {
+  TBOX box(location_.x, location_.y, location_.x, location_.y);
+  for (int s = 0; s < num_splits_; ++s) {
+    box += splits_[s].bounding_box();
+  }
+  return box;
+}
+
+// Returns true if the splits in *this SEAM appear OK in the sense that they
+// do not cross any outlines and do not chop off any ridiculously small
+// pieces.
+bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
+  // TODO(rays) Try testing all the splits. Duplicating original code for now,
+  // which tested only the first.
+  return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
+}
+
+// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
+// seam, which is about to be inserted at insert_index. Returns false if
+// any of the computations fails, as this indicates an invalid chop.
+// widthn_/widthp_ are only changed if modify is true.
+bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
+                               const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
+  for (int s = 0; s < insert_index; ++s) {
+    if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
+      return false;
+    }
+  }
+  if (!FindBlobWidth(blobs, insert_index, modify)) {
+    return false;
+  }
+  for (unsigned s = insert_index; s < seams.size(); ++s) {
+    if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Computes the widthp_/widthn_ range. Returns false if not all the splits
+// are accounted for. widthn_/widthp_ are only changed if modify is true.
+bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) {
+  int num_found = 0;
+  if (modify) {
+    widthp_ = 0;
+    widthn_ = 0;
+  }
+  for (int s = 0; s < num_splits_; ++s) {
+    const SPLIT &split = splits_[s];
+    bool found_split = split.ContainedByBlob(*blobs[index]);
+    // Look right.
+    for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
+      found_split = split.ContainedByBlob(*blobs[b]);
+      if (found_split && b - index > widthp_ && modify) {
+        widthp_ = b - index;
+      }
+    }
+    // Look left.
+    for (int b = index - 1; !found_split && b >= 0; --b) {
+      found_split = split.ContainedByBlob(*blobs[b]);
+      if (found_split && index - b > widthn_ && modify) {
+        widthn_ = index - b;
+      }
+    }
+    if (found_split) {
+      ++num_found;
+    }
+  }
+  return num_found == num_splits_;
+}
+
+// Splits this blob into two blobs by applying the splits included in
+// *this SEAM
+void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const {
+  for (int s = 0; s < num_splits_; ++s) {
+    splits_[s].SplitOutlineList(blob->outlines);
+  }
+  blob->ComputeBoundingBoxes();
+
+  divide_blobs(blob, other_blob, italic_blob, location_);
+
+  blob->EliminateDuplicateOutlines();
+  other_blob->EliminateDuplicateOutlines();
+
+  blob->CorrectBlobOrder(other_blob);
+}
+
+// Undoes ApplySeam by removing the seam between these two blobs.
+// Produces one blob as a result, and deletes other_blob.
+void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
+  if (blob->outlines == nullptr) {
+    blob->outlines = other_blob->outlines;
+    other_blob->outlines = nullptr;
+  }
+
+  TESSLINE *outline = blob->outlines;
+  while (outline->next) {
+    outline = outline->next;
+  }
+  outline->next = other_blob->outlines;
+  other_blob->outlines = nullptr;
+  delete other_blob;
+
+  for (int s = 0; s < num_splits_; ++s) {
+    splits_[s].UnsplitOutlineList(blob);
+  }
+  blob->ComputeBoundingBoxes();
+  blob->EliminateDuplicateOutlines();
+}
+
+// Prints everything in *this SEAM.
+void SEAM::Print(const char *label) const {
+  tprintf("%s", label);
+  tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, widthp_, widthn_);
+  for (int s = 0; s < num_splits_; ++s) {
+    splits_[s].Print();
+    if (s + 1 < num_splits_) {
+      tprintf(",   ");
+    }
+  }
+  tprintf("\n");
+}
+
+// Prints a collection of SEAMs.
+/* static */
+void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
+  if (!seams.empty()) {
+    tprintf("%s\n", label);
+    for (unsigned x = 0; x < seams.size(); ++x) {
+      tprintf("%2u:   ", x);
+      seams[x]->Print("");
+    }
+    tprintf("\n");
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+// Draws the seam in the given window.
+void SEAM::Mark(ScrollView *window) const {
+  for (int s = 0; s < num_splits_; ++s) {
+    splits_[s].Mark(window);
+  }
+}
+#endif
+
+// Break up the blobs in this chain so that they are all independent.
+// This operation should undo the affect of join_pieces.
+/* static */
+void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                       int first, int last) {
+  for (int x = first; x < last; ++x) {
+    seams[x]->Reveal();
+  }
+
+  TESSLINE *outline = blobs[first]->outlines;
+  int next_blob = first + 1;
+
+  while (outline != nullptr && next_blob <= last) {
+    if (outline->next == blobs[next_blob]->outlines) {
+      outline->next = nullptr;
+      outline = blobs[next_blob]->outlines;
+      ++next_blob;
+    } else {
+      outline = outline->next;
+    }
+  }
+}
+
+// Join a group of base level pieces into a single blob that can then
+// be classified.
+/* static */
+void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                      int first, int last) {
+  TESSLINE *outline = blobs[first]->outlines;
+  if (!outline) {
+    return;
+  }
+
+  for (int x = first; x < last; ++x) {
+    SEAM *seam = seams[x];
+    if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
+      seam->Hide();
+    }
+    while (outline->next) {
+      outline = outline->next;
+    }
+    outline->next = blobs[x + 1]->outlines;
+  }
+}
+
+// Hides the seam so the outlines appear not to be cut by it.
+void SEAM::Hide() const {
+  for (int s = 0; s < num_splits_; ++s) {
+    splits_[s].Hide();
+  }
+}
+
+// Undoes hide, so the outlines are cut by the seam.
+void SEAM::Reveal() const {
+  for (int s = 0; s < num_splits_; ++s) {
+    splits_[s].Reveal();
+  }
+}
+
+// Computes and returns, but does not set, the full priority of *this SEAM.
+float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
+                         double center_knob, double width_change_knob) const {
+  if (num_splits_ == 0) {
+    return 0.0f;
+  }
+  for (int s = 1; s < num_splits_; ++s) {
+    splits_[s].SplitOutline();
+  }
+  float full_priority =
+      priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob,
+                                          width_change_knob);
+  for (int s = num_splits_ - 1; s >= 1; --s) {
+    splits_[s].UnsplitOutlines();
+  }
+  return full_priority;
+}
+
+/**
+ * @name start_seam_list
+ *
+ * Initialize a list of seams that match the original number of blobs
+ * present in the starting segmentation.  Each of the seams created
+ * by this routine have location information only.
+ */
+void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) {
+  seam_array->clear();
+  TPOINT location;
+
+  for (int b = 1; b < word->NumBlobs(); ++b) {
+    TBOX bbox = word->blobs[b - 1]->bounding_box();
+    TBOX nbox = word->blobs[b]->bounding_box();
+    location.x = (bbox.right() + nbox.left()) / 2;
+    location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
+    seam_array->push_back(new SEAM(0.0f, location));
+  }
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/seam.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/seam.h
@ -0,0 +1,206 @@
+/******************************************************************************
+ *
+ * File:        seam.h
+ * Author:      Mark Seaman, SW Productivity
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+#ifndef SEAM_H
+#define SEAM_H
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "blobs.h"
+#include "split.h"
+
+namespace tesseract {
+
+using PRIORITY = float; /*  PRIORITY  */
+
+class SEAM {
+public:
+  // A seam with no splits
+  SEAM(float priority, const TPOINT &location)
+      : priority_(priority), location_(location), widthp_(0), widthn_(0), num_splits_(0) {}
+  // A seam with a single split point.
+  SEAM(float priority, const TPOINT &location, const SPLIT &split)
+      : priority_(priority), location_(location), widthp_(0), widthn_(0), num_splits_(1) {
+    splits_[0] = split;
+  }
+  // Default copy constructor, operator= and destructor are OK!
+
+  // Accessors.
+  float priority() const {
+    return priority_;
+  }
+  void set_priority(float priority) {
+    priority_ = priority;
+  }
+  bool HasAnySplits() const {
+    return num_splits_ > 0;
+  }
+
+  // Returns the bounding box of all the points in the seam.
+  TBOX bounding_box() const;
+
+  // Returns true if other can be combined into *this.
+  bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const {
+    int dist = location_.x - other.location_.x;
+    return -max_x_dist < dist && dist < max_x_dist &&
+           num_splits_ + other.num_splits_ <= kMaxNumSplits &&
+           priority_ + other.priority_ < max_total_priority && !OverlappingSplits(other) &&
+           !SharesPosition(other);
+  }
+
+  // Combines other into *this. Only works if CombinableWith returned true.
+  void CombineWith(const SEAM &other) {
+    priority_ += other.priority_;
+    location_ += other.location_;
+    location_ /= 2;
+
+    for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
+      splits_[num_splits_++] = other.splits_[s];
+    }
+  }
+
+  // Returns true if the given blob contains all splits of *this SEAM.
+  bool ContainedByBlob(const TBLOB &blob) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      if (!splits_[s].ContainedByBlob(blob)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Returns true if the given EDGEPT is used by this SEAM, checking only
+  // the EDGEPT pointer, not the coordinates.
+  bool UsesPoint(const EDGEPT *point) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      if (splits_[s].UsesPoint(point)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  // Returns true if *this and other share any common point, by coordinates.
+  bool SharesPosition(const SEAM &other) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      for (int t = 0; t < other.num_splits_; ++t) {
+        if (splits_[s].SharesPosition(other.splits_[t])) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+  // Returns true if *this and other have any vertically overlapping splits.
+  bool OverlappingSplits(const SEAM &other) const {
+    for (int s = 0; s < num_splits_; ++s) {
+      TBOX split1_box = splits_[s].bounding_box();
+      for (int t = 0; t < other.num_splits_; ++t) {
+        TBOX split2_box = other.splits_[t].bounding_box();
+        if (split1_box.y_overlap(split2_box)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  // Marks the edgepts used by the seam so the segments made by the cut
+  // never get split further by another seam in the future.
+  void Finalize() {
+    for (int s = 0; s < num_splits_; ++s) {
+      splits_[s].point1->MarkChop();
+      splits_[s].point2->MarkChop();
+    }
+  }
+
+  // Returns true if the splits in *this SEAM appear OK in the sense that they
+  // do not cross any outlines and do not chop off any ridiculously small
+  // pieces.
+  bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
+
+  // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
+  // seam, which is about to be inserted at insert_index. Returns false if
+  // any of the computations fails, as this indicates an invalid chop.
+  // widthn_/widthp_ are only changed if modify is true.
+  bool PrepareToInsertSeam(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                           int insert_index, bool modify);
+  // Computes the widthp_/widthn_ range. Returns false if not all the splits
+  // are accounted for. widthn_/widthp_ are only changed if modify is true.
+  bool FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify);
+
+  // Splits this blob into two blobs by applying the splits included in
+  // *this SEAM
+  void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const;
+  // Undoes ApplySeam by removing the seam between these two blobs.
+  // Produces one blob as a result, and deletes other_blob.
+  void UndoSeam(TBLOB *blob, TBLOB *other_blob) const;
+
+  // Prints everything in *this SEAM.
+  void Print(const char *label) const;
+  // Prints a collection of SEAMs.
+  static void PrintSeams(const char *label, const std::vector<SEAM *> &seams);
+#ifndef GRAPHICS_DISABLED
+  // Draws the seam in the given window.
+  void Mark(ScrollView *window) const;
+#endif
+
+  // Break up the blobs in this chain so that they are all independent.
+  // This operation should undo the affect of join_pieces.
+  static void BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                          int first, int last);
+  // Join a group of base level pieces into a single blob that can then
+  // be classified.
+  static void JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
+                         int first, int last);
+
+  // Hides the seam so the outlines appear not to be cut by it.
+  void Hide() const;
+  // Undoes hide, so the outlines are cut by the seam.
+  void Reveal() const;
+
+  // Computes and returns, but does not set, the full priority of *this SEAM.
+  // The arguments here are config parameters defined in Wordrec. Add chop_
+  // to the beginning of the name.
+  float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
+                     double center_knob, double width_change_knob) const;
+
+private:
+  // Maximum number of splits that a SEAM can hold.
+  static const uint8_t kMaxNumSplits = 3;
+  // Priority of this split. Lower is better.
+  float priority_;
+  // Position of the middle of the seam.
+  TPOINT location_;
+  // A range such that all splits in *this SEAM are contained within blobs in
+  // the range [index - widthn_,index + widthp_] where index is the index of
+  // this SEAM in the seams vector.
+  int8_t widthp_;
+  int8_t widthn_;
+  // Number of splits_ that are used.
+  uint8_t num_splits_;
+  // Set of pairs of points that are the ends of each split in the SEAM.
+  SPLIT splits_[kMaxNumSplits];
+};
+
+void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/split.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/split.cpp
@ -0,0 +1,324 @@
+/******************************************************************************
+ *
+ * File:         split.cpp  (Formerly split.c)
+ * Author:       Mark Seaman, OCR Technology
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *************************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "split.h"
+
+#include "coutln.h"
+#include "tprintf.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+/*----------------------------------------------------------------------
+              V a r i a b l e s
+----------------------------------------------------------------------*/
+// Limit on the amount of penalty for the chop being off-center.
+const int kCenterGradeCap = 25;
+// Ridiculously large priority for splits that are no use.
+const double kBadPriority = 999.0;
+
+BOOL_VAR(wordrec_display_splits, 0, "Display splits");
+
+// Hides the SPLIT so the outlines appear not to be cut by it.
+void SPLIT::Hide() const {
+  EDGEPT *edgept = point1;
+  do {
+    edgept->Hide();
+    edgept = edgept->next;
+  } while (!edgept->EqualPos(*point2) && edgept != point1);
+  edgept = point2;
+  do {
+    edgept->Hide();
+    edgept = edgept->next;
+  } while (!edgept->EqualPos(*point1) && edgept != point2);
+}
+
+// Undoes hide, so the outlines are cut by the SPLIT.
+void SPLIT::Reveal() const {
+  EDGEPT *edgept = point1;
+  do {
+    edgept->Reveal();
+    edgept = edgept->next;
+  } while (!edgept->EqualPos(*point2) && edgept != point1);
+  edgept = point2;
+  do {
+    edgept->Reveal();
+    edgept = edgept->next;
+  } while (!edgept->EqualPos(*point1) && edgept != point2);
+}
+
+// Compute a split priority based on the bounding boxes of the parts.
+// The arguments here are config parameters defined in Wordrec. Add chop_
+// to the beginning of the name.
+float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
+                          double center_knob, double width_change_knob) const {
+  TBOX box1 = Box12();
+  TBOX box2 = Box21();
+  int min_left = std::min(box1.left(), box2.left());
+  int max_right = std::max(box1.right(), box2.right());
+  if (xmin < min_left && xmax > max_right) {
+    return kBadPriority;
+  }
+
+  float grade = 0.0f;
+  // grade_overlap.
+  int width1 = box1.width();
+  int width2 = box2.width();
+  int min_width = std::min(width1, width2);
+  int overlap = -box1.x_gap(box2);
+  if (overlap == min_width) {
+    grade += 100.0f; // Total overlap.
+  } else {
+    if (2 * overlap > min_width) {
+      overlap += 2 * overlap - min_width;
+    }
+    if (overlap > 0) {
+      grade += overlap_knob * overlap;
+    }
+  }
+  // grade_center_of_blob.
+  if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
+    grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
+  }
+  // grade_width_change.
+  float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
+  if (width_change_grade > 0.0f) {
+    grade += width_change_grade * width_change_knob;
+  }
+  return grade;
+}
+
+// Returns true if *this SPLIT appears OK in the sense that it does not cross
+// any outlines and does not chop off any ridiculously small pieces.
+bool SPLIT::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
+  return !IsLittleChunk(min_points, min_area) &&
+         !blob.SegmentCrossesOutline(point1->pos, point2->pos);
+}
+
+// Returns true if the split generates a small chunk in terms of either area
+// or number of points.
+bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
+  if (point1->ShortNonCircularSegment(min_points, point2) &&
+      point1->SegmentArea(point2) < min_area) {
+    return true;
+  }
+  if (point2->ShortNonCircularSegment(min_points, point1) &&
+      point2->SegmentArea(point1) < min_area) {
+    return true;
+  }
+  return false;
+}
+
+/**********************************************************************
+ * make_edgept
+ *
+ * Create an EDGEPT and hook it into an existing list of edge points.
+ **********************************************************************/
+EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
+  EDGEPT *this_edgept;
+  /* Create point */
+  this_edgept = new EDGEPT;
+  this_edgept->pos.x = x;
+  this_edgept->pos.y = y;
+  // Now deal with the src_outline steps.
+  C_OUTLINE *prev_ol = prev->src_outline;
+  if (prev_ol != nullptr && prev->next == next) {
+    // Compute the fraction of the segment that is being cut.
+    FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
+    FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
+    double cut_fraction = target_vec.length() / segment_vec.length();
+    // Get the start and end at the step level.
+    ICOORD step_start = prev_ol->position_at_index(prev->start_step);
+    int end_step = prev->start_step + prev->step_count;
+    int step_length = prev_ol->pathlength();
+    ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
+    ICOORD step_vec = step_end - step_start;
+    double target_length = step_vec.length() * cut_fraction;
+    // Find the point on the segment that gives the length nearest to target.
+    int best_step = prev->start_step;
+    ICOORD total_step(0, 0);
+    double best_dist = target_length;
+    for (int s = prev->start_step; s < end_step; ++s) {
+      total_step += prev_ol->step(s % step_length);
+      double dist = fabs(target_length - total_step.length());
+      if (dist < best_dist) {
+        best_dist = dist;
+        best_step = s + 1;
+      }
+    }
+    // The new point is an intermediate point.
+    this_edgept->src_outline = prev_ol;
+    this_edgept->step_count = end_step - best_step;
+    this_edgept->start_step = best_step % step_length;
+    prev->step_count = best_step - prev->start_step;
+  } else {
+    // The new point is poly only.
+    this_edgept->src_outline = nullptr;
+    this_edgept->step_count = 0;
+    this_edgept->start_step = 0;
+  }
+  /* Hook it up */
+  this_edgept->next = next;
+  this_edgept->prev = prev;
+  prev->next = this_edgept;
+  next->prev = this_edgept;
+  /* Set up vec entries */
+  this_edgept->vec.x = this_edgept->next->pos.x - x;
+  this_edgept->vec.y = this_edgept->next->pos.y - y;
+  this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
+  this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
+  return this_edgept;
+}
+
+/**********************************************************************
+ * remove_edgept
+ *
+ * Remove a given EDGEPT from its list and delete it.
+ **********************************************************************/
+void remove_edgept(EDGEPT *point) {
+  EDGEPT *prev = point->prev;
+  EDGEPT *next = point->next;
+  // Add point's steps onto prev's steps if they are from the same outline.
+  if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
+    prev->step_count += point->step_count;
+  }
+  prev->next = next;
+  next->prev = prev;
+  prev->vec.x = next->pos.x - prev->pos.x;
+  prev->vec.y = next->pos.y - prev->pos.y;
+  delete point;
+}
+
+/**********************************************************************
+ * Print
+ *
+ * Shows the coordinates of both points in a split.
+ **********************************************************************/
+void SPLIT::Print() const {
+  tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
+}
+
+#ifndef GRAPHICS_DISABLED
+// Draws the split in the given window.
+void SPLIT::Mark(ScrollView *window) const {
+  window->Pen(ScrollView::GREEN);
+  window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
+  window->UpdateWindow();
+}
+#endif
+
+// Creates two outlines out of one by splitting the original one in half.
+// Inserts the resulting outlines into the given list.
+void SPLIT::SplitOutlineList(TESSLINE *outlines) const {
+  SplitOutline();
+  while (outlines->next != nullptr) {
+    outlines = outlines->next;
+  }
+
+  outlines->next = new TESSLINE;
+  outlines->next->loop = point1;
+  outlines->next->ComputeBoundingBox();
+
+  outlines = outlines->next;
+
+  outlines->next = new TESSLINE;
+  outlines->next->loop = point2;
+  outlines->next->ComputeBoundingBox();
+
+  outlines->next->next = nullptr;
+}
+
+// Makes a split between these two edge points, but does not affect the
+// outlines to which they belong.
+void SPLIT::SplitOutline() const {
+  EDGEPT *temp2 = point2->next;
+  EDGEPT *temp1 = point1->next;
+  /* Create two new points */
+  EDGEPT *new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
+  EDGEPT *new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
+  // point1 and 2 are now cross-over points, so they must have nullptr
+  // src_outlines and give their src_outline information their new
+  // replacements.
+  new_point1->src_outline = point1->src_outline;
+  new_point1->start_step = point1->start_step;
+  new_point1->step_count = point1->step_count;
+  new_point2->src_outline = point2->src_outline;
+  new_point2->start_step = point2->start_step;
+  new_point2->step_count = point2->step_count;
+  point1->src_outline = nullptr;
+  point1->start_step = 0;
+  point1->step_count = 0;
+  point2->src_outline = nullptr;
+  point2->start_step = 0;
+  point2->step_count = 0;
+}
+
+// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
+// the split, but possibly leaving some duplicate outlines.
+void SPLIT::UnsplitOutlineList(TBLOB *blob) const {
+  /* Modify edge points */
+  UnsplitOutlines();
+
+  auto *outline1 = new TESSLINE;
+  outline1->next = blob->outlines;
+  blob->outlines = outline1;
+  outline1->loop = point1;
+
+  auto *outline2 = new TESSLINE;
+  outline2->next = blob->outlines;
+  blob->outlines = outline2;
+  outline2->loop = point2;
+}
+
+// Removes the split that was put between these two points.
+void SPLIT::UnsplitOutlines() const {
+  EDGEPT *tmp1 = point1->next;
+  EDGEPT *tmp2 = point2->next;
+
+  tmp1->next->prev = point2;
+  tmp2->next->prev = point1;
+
+  // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
+  // deleted.
+  point1->next = tmp2->next;
+  point1->src_outline = tmp2->src_outline;
+  point1->start_step = tmp2->start_step;
+  point1->step_count = tmp2->step_count;
+  // Likewise point2 takes tmp1's place.
+  point2->next = tmp1->next;
+  point2->src_outline = tmp1->src_outline;
+  point2->start_step = tmp1->start_step;
+  point2->step_count = tmp1->step_count;
+
+  delete tmp1;
+  delete tmp2;
+
+  point1->vec.x = point1->next->pos.x - point1->pos.x;
+  point1->vec.y = point1->next->pos.y - point1->pos.y;
+
+  point2->vec.x = point2->next->pos.x - point2->pos.x;
+  point2->vec.y = point2->next->pos.y - point2->pos.y;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/split.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/split.h
@ -0,0 +1,125 @@
+/******************************************************************************
+ *
+ * File:        split.h
+ * Author:      Mark Seaman, SW Productivity
+ * Status:      Reusable Software Component
+ *
+ * (c) Copyright 1987, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *****************************************************************************/
+#ifndef SPLIT_H
+#define SPLIT_H
+
+#include "blobs.h"  // for EDGEPT, TBLOB, TESSLINE
+#include "params.h" // for BOOL_VAR_H, BoolParam
+#include "rect.h"   // for TBOX
+
+namespace tesseract {
+
+class ScrollView;
+
+/*----------------------------------------------------------------------
+              T y p e s
+----------------------------------------------------------------------*/
+struct SPLIT {
+  SPLIT() : point1(nullptr), point2(nullptr) {}
+  SPLIT(EDGEPT *pt1, EDGEPT *pt2) : point1(pt1), point2(pt2) {}
+
+  // Returns the bounding box of all the points in the split.
+  TBOX bounding_box() const {
+    return TBOX(std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
+                std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
+  }
+
+  // Returns the bounding box of the outline from point1 to point2.
+  TBOX Box12() const {
+    return point1->SegmentBox(point2);
+  }
+  // Returns the bounding box of the outline from point1 to point1.
+  TBOX Box21() const {
+    return point2->SegmentBox(point1);
+  }
+  // Returns the bounding box of the out
+
+  // Hides the SPLIT so the outlines appear not to be cut by it.
+  void Hide() const;
+  // Undoes hide, so the outlines are cut by the SPLIT.
+  void Reveal() const;
+
+  // Returns true if the given EDGEPT is used by this SPLIT, checking only
+  // the EDGEPT pointer, not the coordinates.
+  bool UsesPoint(const EDGEPT *point) const {
+    return point1 == point || point2 == point;
+  }
+  // Returns true if the other SPLIT has any position shared with *this.
+  bool SharesPosition(const SPLIT &other) const {
+    return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
+           point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
+  }
+  // Returns true if both points are contained within the blob.
+  bool ContainedByBlob(const TBLOB &blob) const {
+    return blob.Contains(point1->pos) && blob.Contains(point2->pos);
+  }
+  // Returns true if both points are contained within the outline.
+  bool ContainedByOutline(const TESSLINE &outline) const {
+    return outline.Contains(point1->pos) && outline.Contains(point2->pos);
+  }
+  // Compute a split priority based on the bounding boxes of the parts.
+  // The arguments here are config parameters defined in Wordrec. Add chop_
+  // to the beginning of the name.
+  float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
+                     double center_knob, double width_change_knob) const;
+  // Returns true if *this SPLIT appears OK in the sense that it does not cross
+  // any outlines and does not chop off any ridiculously small pieces.
+  bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
+  // Returns true if the split generates a small chunk in terms of either area
+  // or number of points.
+  bool IsLittleChunk(int min_points, int min_area) const;
+
+  void Print() const;
+#ifndef GRAPHICS_DISABLED
+  // Draws the split in the given window.
+  void Mark(ScrollView *window) const;
+#endif
+
+  // Creates two outlines out of one by splitting the original one in half.
+  // Inserts the resulting outlines into the given list.
+  void SplitOutlineList(TESSLINE *outlines) const;
+  // Makes a split between these two edge points, but does not affect the
+  // outlines to which they belong.
+  void SplitOutline() const;
+  // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
+  // the split, but possibly leaving some duplicate outlines.
+  void UnsplitOutlineList(TBLOB *blob) const;
+  // Removes the split that was put between these two points.
+  void UnsplitOutlines() const;
+
+  EDGEPT *point1;
+  EDGEPT *point2;
+};
+
+/*----------------------------------------------------------------------
+              V a r i a b l e s
+----------------------------------------------------------------------*/
+
+extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits");
+
+/*----------------------------------------------------------------------
+              F u n c t i o n s
+----------------------------------------------------------------------*/
+EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
+
+void remove_edgept(EDGEPT *point);
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/statistc.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/statistc.cpp
@ -0,0 +1,639 @@
+/**********************************************************************
+ * File:        statistc.cpp  (Formerly stats.c)
+ * Description: Simple statistical package for integer values.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "statistc.h"
+
+#include "errcode.h"
+#include "scrollview.h"
+#include "tprintf.h"
+
+#include "helpers.h"
+
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+
+namespace tesseract {
+
+/**********************************************************************
+ * STATS::STATS
+ *
+ * Construct a new stats element by allocating and zeroing the memory.
+ **********************************************************************/
+STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
+  if (max_bucket_value_plus_1 <= min_bucket_value) {
+    min_bucket_value = 0;
+    max_bucket_value_plus_1 = 1;
+  }
+  rangemin_ = min_bucket_value; // setup
+  rangemax_ = max_bucket_value_plus_1;
+  buckets_ = new int32_t[rangemax_ - rangemin_];
+  clear();
+}
+
+/**********************************************************************
+ * STATS::set_range
+ *
+ * Alter the range on an existing stats element.
+ **********************************************************************/
+bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
+  if (max_bucket_value_plus_1 <= min_bucket_value) {
+    return false;
+  }
+  if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
+    delete[] buckets_;
+    buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
+  }
+  rangemin_ = min_bucket_value; // setup
+  rangemax_ = max_bucket_value_plus_1;
+  clear(); // zero it
+  return true;
+}
+
+/**********************************************************************
+ * STATS::clear
+ *
+ * Clear out the STATS class by zeroing all the buckets.
+ **********************************************************************/
+void STATS::clear() { // clear out buckets
+  total_count_ = 0;
+  if (buckets_ != nullptr) {
+    memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
+  }
+}
+
+/**********************************************************************
+ * STATS::~STATS
+ *
+ * Destructor for a stats class.
+ **********************************************************************/
+STATS::~STATS() {
+  delete[] buckets_;
+}
+
+/**********************************************************************
+ * STATS::add
+ *
+ * Add a set of samples to (or delete from) a pile.
+ **********************************************************************/
+void STATS::add(int32_t value, int32_t count) {
+  if (buckets_ == nullptr) {
+    return;
+  }
+  value = ClipToRange(value, rangemin_, rangemax_ - 1);
+  buckets_[value - rangemin_] += count;
+  total_count_ += count; // keep count of total
+}
+
+/**********************************************************************
+ * STATS::mode
+ *
+ * Find the mode of a stats class.
+ **********************************************************************/
+int32_t STATS::mode() const { // get mode of samples
+  if (buckets_ == nullptr) {
+    return rangemin_;
+  }
+  int32_t max = buckets_[0]; // max cell count
+  int32_t maxindex = 0;      // index of max
+  for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
+    if (buckets_[index] > max) {
+      max = buckets_[index]; // find biggest
+      maxindex = index;
+    }
+  }
+  return maxindex + rangemin_; // index of biggest
+}
+
+/**********************************************************************
+ * STATS::mean
+ *
+ * Find the mean of a stats class.
+ **********************************************************************/
+double STATS::mean() const { // get mean of samples
+  if (buckets_ == nullptr || total_count_ <= 0) {
+    return static_cast<double>(rangemin_);
+  }
+  int64_t sum = 0;
+  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
+    sum += static_cast<int64_t>(index) * buckets_[index];
+  }
+  return static_cast<double>(sum) / total_count_ + rangemin_;
+}
+
+/**********************************************************************
+ * STATS::sd
+ *
+ * Find the standard deviation of a stats class.
+ **********************************************************************/
+double STATS::sd() const { // standard deviation
+  if (buckets_ == nullptr || total_count_ <= 0) {
+    return 0.0;
+  }
+  int64_t sum = 0;
+  double sqsum = 0.0;
+  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
+    sum += static_cast<int64_t>(index) * buckets_[index];
+    sqsum += static_cast<double>(index) * index * buckets_[index];
+  }
+  double variance = static_cast<double>(sum) / total_count_;
+  variance = sqsum / total_count_ - variance * variance;
+  if (variance > 0.0) {
+    return sqrt(variance);
+  }
+  return 0.0;
+}
+
+/**********************************************************************
+ * STATS::ile
+ *
+ * Returns the fractile value such that frac fraction (in [0,1]) of samples
+ * has a value less than the return value.
+ **********************************************************************/
+double STATS::ile(double frac) const {
+  if (buckets_ == nullptr || total_count_ == 0) {
+    return static_cast<double>(rangemin_);
+  }
+#if 0
+  // TODO(rays) The existing code doesn't seem to be doing the right thing
+  // with target a double but this substitute crashes the code that uses it.
+  // Investigate and fix properly.
+  int target = IntCastRounded(frac * total_count_);
+  target = ClipToRange(target, 1, total_count_);
+#else
+  double target = frac * total_count_;
+  target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
+#endif
+  int sum = 0;
+  int index = 0;
+  for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
+    ;
+  }
+  if (index > 0) {
+    ASSERT_HOST(buckets_[index - 1] > 0);
+    return rangemin_ + index - static_cast<double>(sum - target) / buckets_[index - 1];
+  } else {
+    return static_cast<double>(rangemin_);
+  }
+}
+
+/**********************************************************************
+ * STATS::min_bucket
+ *
+ * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
+ **********************************************************************/
+int32_t STATS::min_bucket() const { // Find min
+  if (buckets_ == nullptr || total_count_ == 0) {
+    return rangemin_;
+  }
+  int32_t min = 0;
+  for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
+    ;
+  }
+  return rangemin_ + min;
+}
+
+/**********************************************************************
+ * STATS::max_bucket
+ *
+ * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
+ **********************************************************************/
+
+int32_t STATS::max_bucket() const { // Find max
+  if (buckets_ == nullptr || total_count_ == 0) {
+    return rangemin_;
+  }
+  int32_t max;
+  for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
+    ;
+  }
+  return rangemin_ + max;
+}
+
+/**********************************************************************
+ * STATS::median
+ *
+ * Finds a more useful estimate of median than ile(0.5).
+ *
+ * Overcomes a problem with ile() - if the samples are, for example,
+ * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
+ * between 6 and 13 = 9.5
+ **********************************************************************/
+double STATS::median() const { // get median
+  if (buckets_ == nullptr) {
+    return static_cast<double>(rangemin_);
+  }
+  double median = ile(0.5);
+  int median_pile = static_cast<int>(floor(median));
+  if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
+    int32_t min_pile;
+    int32_t max_pile;
+    /* Find preceding non zero pile */
+    for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--) {
+      ;
+    }
+    /* Find following non zero pile */
+    for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++) {
+      ;
+    }
+    median = (min_pile + max_pile) / 2.0;
+  }
+  return median;
+}
+
+/**********************************************************************
+ * STATS::local_min
+ *
+ * Return true if this point is a local min.
+ **********************************************************************/
+bool STATS::local_min(int32_t x) const {
+  if (buckets_ == nullptr) {
+    return false;
+  }
+  x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
+  if (buckets_[x] == 0) {
+    return true;
+  }
+  int32_t index; // table index
+  for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) {
+    ;
+  }
+  if (index >= 0 && buckets_[index] < buckets_[x]) {
+    return false;
+  }
+  for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
+    ;
+  }
+  if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
+/**********************************************************************
+ * STATS::smooth
+ *
+ * Apply a triangular smoothing filter to the stats.
+ * This makes the modes a bit more useful.
+ * The factor gives the height of the triangle, i.e. the weight of the
+ * centre.
+ **********************************************************************/
+void STATS::smooth(int32_t factor) {
+  if (buckets_ == nullptr || factor < 2) {
+    return;
+  }
+  STATS result(rangemin_, rangemax_);
+  int entrycount = rangemax_ - rangemin_;
+  for (int entry = 0; entry < entrycount; entry++) {
+    // centre weight
+    int count = buckets_[entry] * factor;
+    for (int offset = 1; offset < factor; offset++) {
+      if (entry - offset >= 0) {
+        count += buckets_[entry - offset] * (factor - offset);
+      }
+      if (entry + offset < entrycount) {
+        count += buckets_[entry + offset] * (factor - offset);
+      }
+    }
+    result.add(entry + rangemin_, count);
+  }
+  total_count_ = result.total_count_;
+  memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
+}
+
+/**********************************************************************
+ * STATS::cluster
+ *
+ * Cluster the samples into max_cluster clusters.
+ * Each call runs one iteration. The array of clusters must be
+ * max_clusters+1 in size as cluster 0 is used to indicate which samples
+ * have been used.
+ * The return value is the current number of clusters.
+ **********************************************************************/
+
+int32_t STATS::cluster(float lower, // thresholds
+                       float upper,
+                       float multiple,       // distance threshold
+                       int32_t max_clusters, // max no to make
+                       STATS *clusters) {    // array of clusters
+  bool new_cluster;                          // added one
+  float *centres;                            // cluster centres
+  int32_t entry;                             // bucket index
+  int32_t cluster;                           // cluster index
+  int32_t best_cluster;                      // one to assign to
+  int32_t new_centre = 0;                    // residual mode
+  int32_t new_mode;                          // pile count of new_centre
+  int32_t count;                             // pile to place
+  float dist;                                // from cluster
+  float min_dist;                            // from best_cluster
+  int32_t cluster_count;                     // no of clusters
+
+  if (buckets_ == nullptr || max_clusters < 1) {
+    return 0;
+  }
+  centres = new float[max_clusters + 1];
+  for (cluster_count = 1;
+       cluster_count <= max_clusters && clusters[cluster_count].buckets_ != nullptr &&
+       clusters[cluster_count].total_count_ > 0;
+       cluster_count++) {
+    centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5));
+    new_centre = clusters[cluster_count].mode();
+    for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
+                                 pile_count(entry) <= pile_count(entry + 1);
+         entry--) {
+      count = pile_count(entry) - clusters[0].pile_count(entry);
+      if (count > 0) {
+        clusters[cluster_count].add(entry, count);
+        clusters[0].add(entry, count);
+      }
+    }
+    for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
+                                 pile_count(entry) <= pile_count(entry - 1);
+         entry++) {
+      count = pile_count(entry) - clusters[0].pile_count(entry);
+      if (count > 0) {
+        clusters[cluster_count].add(entry, count);
+        clusters[0].add(entry, count);
+      }
+    }
+  }
+  cluster_count--;
+
+  if (cluster_count == 0) {
+    clusters[0].set_range(rangemin_, rangemax_);
+  }
+  do {
+    new_cluster = false;
+    new_mode = 0;
+    for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
+      count = buckets_[entry] - clusters[0].buckets_[entry];
+      // remaining pile
+      if (count > 0) { // any to handle
+        min_dist = static_cast<float>(INT32_MAX);
+        best_cluster = 0;
+        for (cluster = 1; cluster <= cluster_count; cluster++) {
+          dist = entry + rangemin_ - centres[cluster];
+          // find distance
+          if (dist < 0) {
+            dist = -dist;
+          }
+          if (dist < min_dist) {
+            min_dist = dist; // find least
+            best_cluster = cluster;
+          }
+        }
+        if (min_dist > upper // far enough for new
+            && (best_cluster == 0 || entry + rangemin_ > centres[best_cluster] * multiple ||
+                entry + rangemin_ < centres[best_cluster] / multiple)) {
+          if (count > new_mode) {
+            new_mode = count;
+            new_centre = entry + rangemin_;
+          }
+        }
+      }
+    }
+    // need new and room
+    if (new_mode > 0 && cluster_count < max_clusters) {
+      cluster_count++;
+      new_cluster = true;
+      if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
+        delete[] centres;
+        return 0;
+      }
+      centres[cluster_count] = static_cast<float>(new_centre);
+      clusters[cluster_count].add(new_centre, new_mode);
+      clusters[0].add(new_centre, new_mode);
+      for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
+                                   pile_count(entry) <= pile_count(entry + 1);
+           entry--) {
+        count = pile_count(entry) - clusters[0].pile_count(entry);
+        if (count > 0) {
+          clusters[cluster_count].add(entry, count);
+          clusters[0].add(entry, count);
+        }
+      }
+      for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
+                                   pile_count(entry) <= pile_count(entry - 1);
+           entry++) {
+        count = pile_count(entry) - clusters[0].pile_count(entry);
+        if (count > 0) {
+          clusters[cluster_count].add(entry, count);
+          clusters[0].add(entry, count);
+        }
+      }
+      centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5));
+    }
+  } while (new_cluster && cluster_count < max_clusters);
+  delete[] centres;
+  return cluster_count;
+}
+
+// Helper tests that the current index is still part of the peak and gathers
+// the data into the peak, returning false when the peak is ended.
+// src_buckets[index] - used_buckets[index] is the unused part of the histogram.
+// prev_count is the histogram count of the previous index on entry and is
+// updated to the current index on return.
+// total_count and total_value are accumulating the mean of the peak.
+static bool GatherPeak(int index, const int *src_buckets, int *used_buckets, int *prev_count,
+                       int *total_count, double *total_value) {
+  int pile_count = src_buckets[index] - used_buckets[index];
+  if (pile_count <= *prev_count && pile_count > 0) {
+    // Accumulate count and index.count product.
+    *total_count += pile_count;
+    *total_value += index * pile_count;
+    // Mark this index as used
+    used_buckets[index] = src_buckets[index];
+    *prev_count = pile_count;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Finds (at most) the top max_modes modes, well actually the whole peak around
+// each mode, returning them in the given modes vector as a <mean of peak,
+// total count of peak> pair in order of decreasing total count.
+// Since the mean is the key and the count the data in the pair, a single call
+// to sort on the output will re-sort by increasing mean of peak if that is
+// more useful than decreasing total count.
+// Returns the actual number of modes found.
+int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const {
+  if (max_modes <= 0) {
+    return 0;
+  }
+  int src_count = rangemax_ - rangemin_;
+  // Used copies the counts in buckets_ as they get used.
+  STATS used(rangemin_, rangemax_);
+  modes.clear();
+  // Total count of the smallest peak found so far.
+  int least_count = 1;
+  // Mode that is used as a seed for each peak
+  int max_count = 0;
+  do {
+    // Find an unused mode.
+    max_count = 0;
+    int max_index = 0;
+    for (int src_index = 0; src_index < src_count; src_index++) {
+      int pile_count = buckets_[src_index] - used.buckets_[src_index];
+      if (pile_count > max_count) {
+        max_count = pile_count;
+        max_index = src_index;
+      }
+    }
+    if (max_count > 0) {
+      // Copy the bucket count to used so it doesn't get found again.
+      used.buckets_[max_index] = max_count;
+      // Get the entire peak.
+      double total_value = max_index * max_count;
+      int total_count = max_count;
+      int prev_pile = max_count;
+      for (int offset = 1; max_index + offset < src_count; ++offset) {
+        if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, &total_count,
+                        &total_value)) {
+          break;
+        }
+      }
+      prev_pile = buckets_[max_index];
+      for (int offset = 1; max_index - offset >= 0; ++offset) {
+        if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, &total_count,
+                        &total_value)) {
+          break;
+        }
+      }
+      if (total_count > least_count || modes.size() < max_modes) {
+        // We definitely want this mode, so if we have enough discard the least.
+        if (modes.size() == max_modes) {
+          modes.resize(max_modes - 1);
+        }
+        int target_index = 0;
+        // Linear search for the target insertion point.
+        while (target_index < modes.size() && modes[target_index].data() >= total_count) {
+          ++target_index;
+        }
+        auto peak_mean = static_cast<float>(total_value / total_count + rangemin_);
+        modes.insert(modes.begin() + target_index, KDPairInc<float, int>(peak_mean, total_count));
+        least_count = modes.back().data();
+      }
+    }
+  } while (max_count > 0);
+  return modes.size();
+}
+
+/**********************************************************************
+ * STATS::print
+ *
+ * Prints a summary and table of the histogram.
+ **********************************************************************/
+void STATS::print() const {
+  if (buckets_ == nullptr) {
+    return;
+  }
+  int32_t min = min_bucket() - rangemin_;
+  int32_t max = max_bucket() - rangemin_;
+
+  int num_printed = 0;
+  for (int index = min; index <= max; index++) {
+    if (buckets_[index] != 0) {
+      tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
+      if (++num_printed % 8 == 0) {
+        tprintf("\n");
+      }
+    }
+  }
+  tprintf("\n");
+  print_summary();
+}
+
+/**********************************************************************
+ * STATS::print_summary
+ *
+ * Print a summary of the stats.
+ **********************************************************************/
+void STATS::print_summary() const {
+  if (buckets_ == nullptr) {
+    return;
+  }
+  int32_t min = min_bucket();
+  int32_t max = max_bucket();
+  tprintf("Total count=%d\n", total_count_);
+  tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
+  tprintf("Lower quartile=%.2f\n", ile(0.25));
+  tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
+  tprintf("Upper quartile=%.2f\n", ile(0.75));
+  tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
+  tprintf("Range=%d\n", max + 1 - min);
+  tprintf("Mean= %.2f\n", mean());
+  tprintf("SD= %.2f\n", sd());
+}
+
+/**********************************************************************
+ * STATS::plot
+ *
+ * Draw a histogram of the stats table.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void STATS::plot(ScrollView *window, // to draw in
+                 float xorigin,      // bottom left
+                 float yorigin,
+                 float xscale,                     // one x unit
+                 float yscale,                     // one y unit
+                 ScrollView::Color colour) const { // colour to draw in
+  if (buckets_ == nullptr) {
+    return;
+  }
+  window->Pen(colour);
+
+  for (int index = 0; index < rangemax_ - rangemin_; index++) {
+    window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
+                      yorigin + yscale * buckets_[index]);
+  }
+}
+#endif
+
+/**********************************************************************
+ * STATS::plotline
+ *
+ * Draw a histogram of the stats table. (Line only)
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void STATS::plotline(ScrollView *window, // to draw in
+                     float xorigin,      // bottom left
+                     float yorigin,
+                     float xscale,                     // one x unit
+                     float yscale,                     // one y unit
+                     ScrollView::Color colour) const { // colour to draw in
+  if (buckets_ == nullptr) {
+    return;
+  }
+  window->Pen(colour);
+  window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
+  for (int index = 0; index < rangemax_ - rangemin_; index++) {
+    window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
+  }
+}
+#endif
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/statistc.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/statistc.h
@ -0,0 +1,150 @@
+/**********************************************************************
+ * File:        statistc.h  (Formerly stats.h)
+ * Description: Class description for STATS class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCSTRUCT_STATISTC_H_
+#define TESSERACT_CCSTRUCT_STATISTC_H_
+
+#include <cstdio>
+#include "kdpair.h"
+#include "scrollview.h"
+
+namespace tesseract {
+
+// Simple histogram-based statistics for integer values in a known
+// range, such that the range is small compared to the number of samples.
+class TESS_API STATS {
+public:
+  // The histogram buckets are in the range
+  // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
+  // [min_bucket_value, max_bucket_value].
+  // Any data under min_bucket value is silently mapped to min_bucket_value,
+  // and likewise, any data over max_bucket_value is silently mapped to
+  // max_bucket_value.
+  // In the internal array, min_bucket_value maps to 0 and
+  // max_bucket_value_plus_1 - min_bucket_value to the array size.
+  // TODO(rays) This is ugly. Convert the second argument to
+  // max_bucket_value and all the code that uses it.
+  STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
+  STATS() = default; // empty for arrays
+
+  ~STATS();
+
+  // (Re)Sets the range and clears the counts.
+  // See the constructor for info on max and min values.
+  bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
+
+  void clear(); // empty buckets
+
+  void add(int32_t value, int32_t count);
+
+  // "Accessors" return various statistics on the data.
+  int32_t mode() const; // get mode of samples
+  double mean() const;  // get mean of samples
+  double sd() const;    // standard deviation
+  // Returns the fractile value such that frac fraction (in [0,1]) of samples
+  // has a value less than the return value.
+  double ile(double frac) const;
+  // Returns the minimum used entry in the histogram (ie the minimum of the
+  // data, NOT the minimum of the supplied range, nor is it an index.)
+  // Would normally be called min(), but that is a reserved word in VC++.
+  int32_t min_bucket() const; // Find min
+  // Returns the maximum used entry in the histogram (ie the maximum of the
+  // data, NOT the maximum of the supplied range, nor is it an index.)
+  int32_t max_bucket() const; // Find max
+  // Finds a more useful estimate of median than ile(0.5).
+  // Overcomes a problem with ile() - if the samples are, for example,
+  // 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
+  // between 6 and 13 = 9.5
+  double median() const; // get median of samples
+  // Returns the count of the given value.
+  int32_t pile_count(int32_t value) const {
+    if (value <= rangemin_) {
+      return buckets_[0];
+    }
+    if (value >= rangemax_ - 1) {
+      return buckets_[rangemax_ - rangemin_ - 1];
+    }
+    return buckets_[value - rangemin_];
+  }
+  // Returns the total count of all buckets.
+  int32_t get_total() const {
+    return total_count_; // total of all piles
+  }
+  // Returns true if x is a local min.
+  bool local_min(int32_t x) const;
+
+  // Apply a triangular smoothing filter to the stats.
+  // This makes the modes a bit more useful.
+  // The factor gives the height of the triangle, i.e. the weight of the
+  // centre.
+  void smooth(int32_t factor);
+
+  // Cluster the samples into max_cluster clusters.
+  // Each call runs one iteration. The array of clusters must be
+  // max_clusters+1 in size as cluster 0 is used to indicate which samples
+  // have been used.
+  // The return value is the current number of clusters.
+  int32_t cluster(float lower, // thresholds
+                  float upper,
+                  float multiple,       // distance threshold
+                  int32_t max_clusters, // max no to make
+                  STATS *clusters);     // array of clusters
+
+  // Finds (at most) the top max_modes modes, well actually the whole peak
+  // around each mode, returning them in the given modes vector as a <mean of
+  // peak, total count of peak> pair in order of decreasing total count. Since
+  // the mean is the key and the count the data in the pair, a single call to
+  // sort on the output will re-sort by increasing mean of peak if that is more
+  // useful than decreasing total count. Returns the actual number of modes
+  // found.
+  int top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const;
+
+  // Prints a summary and table of the histogram.
+  void print() const;
+  // Prints summary stats only of the histogram.
+  void print_summary() const;
+
+#ifndef GRAPHICS_DISABLED
+  // Draws the histogram as a series of rectangles.
+  void plot(ScrollView *window,              // window to draw in
+            float xorigin,                   // origin of histo
+            float yorigin,                   // gram
+            float xscale,                    // size of one unit
+            float yscale,                    // size of one uint
+            ScrollView::Color colour) const; // colour to draw in
+
+  // Draws a line graph of the histogram.
+  void plotline(ScrollView *window,              // window to draw in
+                float xorigin,                   // origin of histo
+                float yorigin,                   // gram
+                float xscale,                    // size of one unit
+                float yscale,                    // size of one uint
+                ScrollView::Color colour) const; // colour to draw in
+#endif                                           // !GRAPHICS_DISABLED
+
+private:
+  int32_t rangemin_ = 0; // min of range
+  // rangemax_ is not well named as it is really one past the max.
+  int32_t rangemax_ = 0;       // max of range
+  int32_t total_count_ = 0;    // no of samples
+  int32_t *buckets_ = nullptr; // array of cells
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_CCSTRUCT_STATISTC_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/stepblob.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/stepblob.cpp
@ -0,0 +1,550 @@
+/**********************************************************************
+ * File:        stepblob.cpp  (Formerly cblob.c)
+ * Description: Code for C_BLOB class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "stepblob.h"
+
+#include "points.h" // for operator+=, FCOORD, ICOORD
+
+#include <allheaders.h> // for pixCreate, pixGetDepth
+#include <vector>       // for std::vector
+
+namespace tesseract {
+
+class DENORM;
+
+// Max perimeter to width ratio for a baseline position above box bottom.
+const double kMaxPerimeterWidthRatio = 8.0;
+
+/**********************************************************************
+ * position_outline
+ *
+ * Position the outline in the given list at the relevant place
+ * according to its nesting.
+ **********************************************************************/
+static void position_outline( // put in place
+    C_OUTLINE *outline,       // thing to place
+    C_OUTLINE_LIST *destlist  // desstination list
+) {
+  C_OUTLINE *dest_outline;    // outline from dest list
+  C_OUTLINE_IT it = destlist; // iterator
+                              // iterator on children
+  C_OUTLINE_IT child_it = outline->child();
+
+  if (!it.empty()) {
+    do {
+      dest_outline = it.data(); // get destination
+                                // encloses dest
+      if (*dest_outline < *outline) {
+        // take off list
+        dest_outline = it.extract();
+        // put this in place
+        it.add_after_then_move(outline);
+        // make it a child
+        child_it.add_to_end(dest_outline);
+        while (!it.at_last()) {
+          it.forward(); // do rest of list
+                        // check for other children
+          dest_outline = it.data();
+          if (*dest_outline < *outline) {
+            // take off list
+            dest_outline = it.extract();
+            child_it.add_to_end(dest_outline);
+            // make it a child
+            if (it.empty()) {
+              break;
+            }
+          }
+        }
+        return; // finished
+      }
+      // enclosed by dest
+      else if (*outline < *dest_outline) {
+        position_outline(outline, dest_outline->child());
+        // place in child list
+        return; // finished
+      }
+      it.forward();
+    } while (!it.at_first());
+  }
+  it.add_to_end(outline); // at outer level
+}
+
+/**********************************************************************
+ * plot_outline_list
+ *
+ * Draw a list of outlines in the given colour and their children
+ * in the child colour.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+static void plot_outline_list(     // draw outlines
+    C_OUTLINE_LIST *list,          // outline to draw
+    ScrollView *window,            // window to draw in
+    ScrollView::Color colour,      // colour to use
+    ScrollView::Color child_colour // colour of children
+) {
+  C_OUTLINE *outline;     // current outline
+  C_OUTLINE_IT it = list; // iterator
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    outline = it.data();
+    // draw it
+    outline->plot(window, colour);
+    if (!outline->child()->empty()) {
+      plot_outline_list(outline->child(), window, child_colour, child_colour);
+    }
+  }
+}
+// Draws the outlines in the given colour, and child_colour, normalized
+// using the given denorm, making use of sub-pixel accurate information
+// if available.
+static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
+                                     ScrollView::Color colour, ScrollView::Color child_colour,
+                                     ScrollView *window) {
+  C_OUTLINE_IT it(list);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    C_OUTLINE *outline = it.data();
+    outline->plot_normed(denorm, colour, window);
+    if (!outline->child()->empty()) {
+      plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
+    }
+  }
+}
+#endif
+
+/**********************************************************************
+ * reverse_outline_list
+ *
+ * Reverse a list of outlines and their children.
+ **********************************************************************/
+
+static void reverse_outline_list(C_OUTLINE_LIST *list) {
+  C_OUTLINE_IT it = list; // iterator
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    C_OUTLINE *outline = it.data();
+    outline->reverse(); // reverse it
+    outline->set_flag(COUT_INVERSE, true);
+    if (!outline->child()->empty()) {
+      reverse_outline_list(outline->child());
+    }
+  }
+}
+
+/**********************************************************************
+ * C_BLOB::C_BLOB
+ *
+ * Constructor to build a C_BLOB from a list of C_OUTLINEs.
+ * The C_OUTLINEs are not copied so the source list is emptied.
+ * The C_OUTLINEs are nested correctly in the blob.
+ **********************************************************************/
+
+C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
+  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
+    C_OUTLINE *outline = ol_it.extract();
+    // Position this outline in appropriate position in the hierarchy.
+    position_outline(outline, &outlines);
+  }
+  CheckInverseFlagAndDirection();
+}
+
+// Simpler constructor to build a blob from a single outline that has
+// already been fully initialized.
+C_BLOB::C_BLOB(C_OUTLINE *outline) {
+  C_OUTLINE_IT it(&outlines);
+  it.add_to_end(outline);
+}
+
+// Builds a set of one or more blobs from a list of outlines.
+// Input: one outline on outline_list contains all the others, but the
+// nesting and order are undefined.
+// If good_blob is true, the blob is added to good_blobs_it, unless
+// an illegal (generation-skipping) parent-child relationship is found.
+// If so, the parent blob goes to bad_blobs_it, and the immediate children
+// are promoted to the top level, recursively being sent to good_blobs_it.
+// If good_blob is false, all created blobs will go to the bad_blobs_it.
+// Output: outline_list is empty. One or more blobs are added to
+// good_blobs_it and/or bad_blobs_it.
+void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
+                                        C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it) {
+  // List of top-level outlines with correctly nested children.
+  C_OUTLINE_LIST nested_outlines;
+  for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
+    C_OUTLINE *outline = ol_it.extract();
+    // Position this outline in appropriate position in the hierarchy.
+    position_outline(outline, &nested_outlines);
+  }
+  // Check for legal nesting and reassign as required.
+  for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
+    C_OUTLINE *outline = ol_it.extract();
+    bool blob_is_good = good_blob;
+    if (!outline->IsLegallyNested()) {
+      // The blob is illegally nested.
+      // Mark it bad, and add all its children to the top-level list.
+      blob_is_good = false;
+      ol_it.add_list_after(outline->child());
+    }
+    auto *blob = new C_BLOB(outline);
+    // Set inverse flag and reverse if needed.
+    blob->CheckInverseFlagAndDirection();
+    // Put on appropriate list.
+    if (!blob_is_good && bad_blobs_it != nullptr) {
+      bad_blobs_it->add_after_then_move(blob);
+    } else {
+      good_blobs_it->add_after_then_move(blob);
+    }
+  }
+}
+
+// Sets the COUT_INVERSE flag appropriately on the outlines and their
+// children recursively, reversing the outlines if needed so that
+// everything has an anticlockwise top-level.
+void C_BLOB::CheckInverseFlagAndDirection() {
+  C_OUTLINE_IT ol_it(&outlines);
+  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
+    C_OUTLINE *outline = ol_it.data();
+    if (outline->turn_direction() < 0) {
+      outline->reverse();
+      reverse_outline_list(outline->child());
+      outline->set_flag(COUT_INVERSE, true);
+    } else {
+      outline->set_flag(COUT_INVERSE, false);
+    }
+  }
+}
+
+// Build and return a fake blob containing a single fake outline with no
+// steps.
+C_BLOB *C_BLOB::FakeBlob(const TBOX &box) {
+  C_OUTLINE_LIST outlines;
+  C_OUTLINE::FakeOutline(box, &outlines);
+  return new C_BLOB(&outlines);
+}
+
+/**********************************************************************
+ * C_BLOB::bounding_box
+ *
+ * Return the bounding box of the blob.
+ **********************************************************************/
+
+TBOX C_BLOB::bounding_box() const { // bounding box
+  C_OUTLINE *outline;               // current outline
+  // This is a read-only iteration of the outlines.
+  C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST *>(&outlines);
+  TBOX box; // bounding box
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    outline = it.data();
+    box += outline->bounding_box();
+  }
+  return box;
+}
+
+/**********************************************************************
+ * C_BLOB::area
+ *
+ * Return the area of the blob.
+ **********************************************************************/
+
+int32_t C_BLOB::area() {       // area
+  C_OUTLINE *outline;          // current outline
+  C_OUTLINE_IT it = &outlines; // outlines of blob
+  int32_t total;               // total area
+
+  total = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    outline = it.data();
+    total += outline->area();
+  }
+  return total;
+}
+
+/**********************************************************************
+ * C_BLOB::perimeter
+ *
+ * Return the perimeter of the top and 2nd level outlines.
+ **********************************************************************/
+
+int32_t C_BLOB::perimeter() {
+  C_OUTLINE *outline;          // current outline
+  C_OUTLINE_IT it = &outlines; // outlines of blob
+  int32_t total;               // total perimeter
+
+  total = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    outline = it.data();
+    total += outline->perimeter();
+  }
+  return total;
+}
+
+/**********************************************************************
+ * C_BLOB::outer_area
+ *
+ * Return the area of the blob.
+ **********************************************************************/
+
+int32_t C_BLOB::outer_area() { // area
+  C_OUTLINE *outline;          // current outline
+  C_OUTLINE_IT it = &outlines; // outlines of blob
+  int32_t total;               // total area
+
+  total = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    outline = it.data();
+    total += outline->outer_area();
+  }
+  return total;
+}
+
+/**********************************************************************
+ * C_BLOB::count_transitions
+ *
+ * Return the total x and y maxes and mins in the blob.
+ * Chlid outlines are not counted.
+ **********************************************************************/
+
+int32_t C_BLOB::count_transitions( // area
+    int32_t threshold              // on size
+) {
+  C_OUTLINE *outline;          // current outline
+  C_OUTLINE_IT it = &outlines; // outlines of blob
+  int32_t total;               // total area
+
+  total = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    outline = it.data();
+    total += outline->count_transitions(threshold);
+  }
+  return total;
+}
+
+/**********************************************************************
+ * C_BLOB::move
+ *
+ * Move C_BLOB by vector
+ **********************************************************************/
+
+void C_BLOB::move(   // reposition blob
+    const ICOORD vec // by vector
+) {
+  C_OUTLINE_IT it(&outlines); // iterator
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->move(vec); // move each outline
+  }
+}
+
+// Static helper for C_BLOB::rotate to allow recursion of child outlines.
+static void RotateOutlineList(const FCOORD &rotation, C_OUTLINE_LIST *outlines) {
+  C_OUTLINE_LIST new_outlines;
+  C_OUTLINE_IT src_it(outlines);
+  C_OUTLINE_IT dest_it(&new_outlines);
+  while (!src_it.empty()) {
+    C_OUTLINE *old_outline = src_it.extract();
+    src_it.forward();
+    auto *new_outline = new C_OUTLINE(old_outline, rotation);
+    if (!old_outline->child()->empty()) {
+      RotateOutlineList(rotation, old_outline->child());
+      C_OUTLINE_IT child_it(new_outline->child());
+      child_it.add_list_after(old_outline->child());
+    }
+    delete old_outline;
+    dest_it.add_to_end(new_outline);
+  }
+  src_it.add_list_after(&new_outlines);
+}
+
+/**********************************************************************
+ * C_BLOB::rotate
+ *
+ * Rotate C_BLOB by rotation.
+ * Warning! has to rebuild all the C_OUTLINEs.
+ **********************************************************************/
+void C_BLOB::rotate(const FCOORD &rotation) {
+  RotateOutlineList(rotation, &outlines);
+}
+
+// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
+// outline list and its children.
+static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
+  C_OUTLINE_IT it(list);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    C_OUTLINE *outline = it.data();
+    if (pix != nullptr && pixGetDepth(pix) == 8) {
+      outline->ComputeEdgeOffsets(threshold, pix);
+    } else {
+      outline->ComputeBinaryOffsets();
+    }
+    if (!outline->child()->empty()) {
+      ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
+    }
+  }
+}
+
+// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
+// if the supplied pix is 8-bit or the binary edges if nullptr.
+void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
+  ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
+}
+
+// Estimates and returns the baseline position based on the shape of the
+// outlines.
+// We first find the minimum y-coord (y_mins) at each x-coord within the blob.
+// If there is a run of some y or y+1 in y_mins that is longer than the total
+// number of positions at bottom or bottom+1, subject to the additional
+// condition that at least one side of the y/y+1 run is higher than y+1, so it
+// is not a local minimum, then y, not the bottom, makes a good candidate
+// baseline position for this blob. Eg
+//   |                  ---|
+//   |                  |
+//   |-      -----------|        <=  Good candidate baseline position.
+//    |-    -|
+//     |   -|
+//     |---|                     <=  Bottom of blob
+int16_t C_BLOB::EstimateBaselinePosition() {
+  TBOX box = bounding_box();
+  int left = box.left();
+  int width = box.width();
+  int bottom = box.bottom();
+  if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
+    return bottom; // This is only for non-CJK blobs.
+  }
+  // Get the minimum y coordinate at each x-coordinate.
+  std::vector<int> y_mins;
+  y_mins.resize(width + 1, box.top());
+  C_OUTLINE_IT it(&outlines);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    C_OUTLINE *outline = it.data();
+    ICOORD pos = outline->start_pos();
+    for (int s = 0; s < outline->pathlength(); ++s) {
+      if (pos.y() < y_mins[pos.x() - left]) {
+        y_mins[pos.x() - left] = pos.y();
+      }
+      pos += outline->step(s);
+    }
+  }
+  // Find the total extent of the bottom or bottom + 1.
+  int bottom_extent = 0;
+  for (int x = 0; x <= width; ++x) {
+    if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
+      ++bottom_extent;
+    }
+  }
+  // Find the lowest run longer than the bottom extent that is not the bottom.
+  int best_min = box.top();
+  int prev_run = 0;
+  int prev_y = box.top();
+  int prev_prev_y = box.top();
+  for (int x = 0; x < width; x += prev_run) {
+    // Find the length of the current run.
+    int y_at_x = y_mins[x];
+    int run = 1;
+    while (x + run <= width && y_mins[x + run] == y_at_x) {
+      ++run;
+    }
+    if (y_at_x > bottom + 1) {
+      // Possible contender.
+      int total_run = run;
+      // Find extent of current value or +1 to the right of x.
+      while (x + total_run <= width &&
+             (y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
+        ++total_run;
+      }
+      // At least one end has to be higher so it is not a local max.
+      if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
+        // If the prev_run is at y + 1, then we can add that too. There cannot
+        // be a suitable run at y before that or we would have found it already.
+        if (prev_run > 0 && prev_y == y_at_x + 1) {
+          total_run += prev_run;
+        }
+        if (total_run > bottom_extent && y_at_x < best_min) {
+          best_min = y_at_x;
+        }
+      }
+    }
+    prev_run = run;
+    prev_prev_y = prev_y;
+    prev_y = y_at_x;
+  }
+  return best_min == box.top() ? bottom : best_min;
+}
+
+static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
+  C_OUTLINE_IT it(list);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    C_OUTLINE *outline = it.data();
+    outline->render(left, top, pix);
+    if (!outline->child()->empty()) {
+      render_outline_list(outline->child(), left, top, pix);
+    }
+  }
+}
+
+static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
+  C_OUTLINE_IT it(list);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    C_OUTLINE *outline = it.data();
+    outline->render_outline(left, top, pix);
+  }
+}
+
+// Returns a Pix rendering of the blob. pixDestroy after use.
+Image C_BLOB::render() {
+  TBOX box = bounding_box();
+  Image pix = pixCreate(box.width(), box.height(), 1);
+  render_outline_list(&outlines, box.left(), box.top(), pix);
+  return pix;
+}
+
+// Returns a Pix rendering of the outline of the blob. (no fill).
+// pixDestroy after use.
+Image C_BLOB::render_outline() {
+  TBOX box = bounding_box();
+  Image pix = pixCreate(box.width(), box.height(), 1);
+  render_outline_list_outline(&outlines, box.left(), box.top(), pix);
+  return pix;
+}
+
+/**********************************************************************
+ * C_BLOB::plot
+ *
+ * Draw the C_BLOB in the given colour.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void C_BLOB::plot(ScrollView *window,               // window to draw in
+                  ScrollView::Color blob_colour,    // main colour
+                  ScrollView::Color child_colour) { // for holes
+  plot_outline_list(&outlines, window, blob_colour, child_colour);
+}
+// Draws the blob in the given colour, and child_colour, normalized
+// using the given denorm, making use of sub-pixel accurate information
+// if available.
+void C_BLOB::plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
+                         ScrollView::Color child_colour, ScrollView *window) {
+  plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, window);
+}
+#endif
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/stepblob.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/stepblob.h
@ -0,0 +1,136 @@
+/**********************************************************************
+ * File:        stepblob.h  (Formerly cblob.h)
+ * Description: Code for C_BLOB class.
+ * Author:      Ray Smith
+ * Created:     Tue Oct 08 10:41:13 BST 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef STEPBLOB_H
+#define STEPBLOB_H
+
+#include "coutln.h"     // for C_OUTLINE_LIST, C_OUTLINE
+#include "elst.h"       // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
+#include "points.h"     // for FCOORD, ICOORD (ptr only)
+#include "rect.h"       // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::Color
+
+#include <cstdint> // for int32_t, int16_t
+
+struct Pix;
+
+namespace tesseract {
+
+class C_BLOB;
+class DENORM;
+
+ELISTIZEH(C_BLOB)
+
+class TESS_API C_BLOB : public ELIST_LINK {
+public:
+  C_BLOB() = default;
+  explicit C_BLOB(C_OUTLINE_LIST *outline_list);
+  // Simpler constructor to build a blob from a single outline that has
+  // already been fully initialized.
+  explicit C_BLOB(C_OUTLINE *outline);
+
+  // Builds a set of one or more blobs from a list of outlines.
+  // Input: one outline on outline_list contains all the others, but the
+  // nesting and order are undefined.
+  // If good_blob is true, the blob is added to good_blobs_it, unless
+  // an illegal (generation-skipping) parent-child relationship is found.
+  // If so, the parent blob goes to bad_blobs_it, and the immediate children
+  // are promoted to the top level, recursively being sent to good_blobs_it.
+  // If good_blob is false, all created blobs will go to the bad_blobs_it.
+  // Output: outline_list is empty. One or more blobs are added to
+  // good_blobs_it and/or bad_blobs_it.
+  static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
+                                         C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it);
+
+  // Sets the COUT_INVERSE flag appropriately on the outlines and their
+  // children recursively, reversing the outlines if needed so that
+  // everything has an anticlockwise top-level.
+  void CheckInverseFlagAndDirection();
+
+  // Build and return a fake blob containing a single fake outline with no
+  // steps.
+  static C_BLOB *FakeBlob(const TBOX &box);
+
+  C_OUTLINE_LIST *out_list() { // get outline list
+    return &outlines;
+  }
+
+  TBOX bounding_box() const; // compute bounding box
+  int32_t area();            // compute area
+  int32_t perimeter();       // Total perimeter of outlines and 1st level children.
+  int32_t outer_area();      // compute area
+  int32_t count_transitions( // count maxima
+      int32_t threshold);    // size threshold
+
+  void move(const ICOORD vec);         // repostion blob by vector
+  void rotate(const FCOORD &rotation); // Rotate by given vector.
+
+  // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
+  // if the supplied pix is 8-bit or the binary edges if nullptr.
+  void ComputeEdgeOffsets(int threshold, Image pix);
+
+  // Estimates and returns the baseline position based on the shape of the
+  // outlines.
+  int16_t EstimateBaselinePosition();
+
+  // Returns a Pix rendering of the blob. pixDestroy after use.
+  Image render();
+  // Returns a Pix rendering of the outline of the blob. (no fill).
+  // pixDestroy after use.
+  Image render_outline();
+
+#ifndef GRAPHICS_DISABLED
+  void plot(                           // draw one
+      ScrollView *window,              // window to draw in
+      ScrollView::Color blob_colour,   // for outer bits
+      ScrollView::Color child_colour); // for holes
+  // Draws the blob in the given colour, and child_colour, normalized
+  // using the given denorm, making use of sub-pixel accurate information
+  // if available.
+  void plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
+                   ScrollView::Color child_colour, ScrollView *window);
+#endif // !GRAPHICS_DISABLED
+
+  C_BLOB &operator=(const C_BLOB &source) {
+    if (!outlines.empty()) {
+      outlines.clear();
+    }
+    outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy);
+    return *this;
+  }
+
+  static C_BLOB *deep_copy(const C_BLOB *src) {
+    auto *blob = new C_BLOB;
+    *blob = *src;
+    return blob;
+  }
+
+  static int SortByXMiddle(const void *v1, const void *v2) {
+    const C_BLOB *blob1 = *static_cast<const C_BLOB *const *>(v1);
+    const C_BLOB *blob2 = *static_cast<const C_BLOB *const *>(v2);
+    return blob1->bounding_box().x_middle() - blob2->bounding_box().x_middle();
+  }
+
+private:
+  C_OUTLINE_LIST outlines; // master elements
+};
+
+} // namespace tesseract
+
+#endif
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/tabletransfer.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/tabletransfer.h
@ -0,0 +1,67 @@
+/******************************************************************************
+ * File:        tabletransfer.h
+ * Description: Infrastructure for the transfer of table detection results
+ * Author:      Stefan Brechtken
+ *
+ * (C) Copyright 2021, Stefan Brechtken
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ ****************************************************************************/
+
+#ifndef TESSERACT_CCSTRUCT_TABLETRANSFER_H_
+#define TESSERACT_CCSTRUCT_TABLETRANSFER_H_
+#include <memory>
+#include <vector>
+#include "rect.h"
+
+namespace tesseract {
+
+/// Structure for data transfer from table detector
+struct TessTable {
+  tesseract::TBOX box;
+  std::vector<tesseract::TBOX> rows;
+  std::vector<tesseract::TBOX> cols;
+};
+
+/** \brief You can use this small template function to ensure that one and
+ *   only one object of type T exists. It implements the Singleton Pattern.
+ *
+ * T must be default-constructable.
+ * Usage examples:
+ *   A& a = uniqueInstance<A>();
+ *   a.xyz();
+ *   uniqueInstance<A>(make_unique<A>(42)); // replace instance
+ *   a.foo();
+ * or
+ *   uniqueInstance<A>().xyz();
+ */
+template<typename T>
+T& uniqueInstance(std::unique_ptr<T> new_instance = nullptr)
+{
+  static std::unique_ptr<T> _instance = std::make_unique<T>();
+
+  if (new_instance) {
+    _instance = std::move(new_instance);
+  }
+
+  return *_instance.get();
+}
+
+/// return const version of \see uniqueInstance
+template<typename T>
+const T& constUniqueInstance(std::unique_ptr<T> new_instance = nullptr)
+{
+  return uniqueInstance<T>(std::move(new_instance));
+}
+
+} // namespace tesseract
+
+#endif  // TESSERACT_CCSTRUCT_TABLETRANSFER_H_
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/werd.cpp
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/werd.cpp
@ -0,0 +1,584 @@
+/**********************************************************************
+ * File:        werd.cpp  (Formerly word.c)
+ * Description: Code for the WERD class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
+#include "werd.h"
+
+#include "linlsq.h"
+
+#include "helpers.h"
+
+namespace tesseract {
+
+#define FIRST_COLOUR ScrollView::RED       ///< first rainbow colour
+#define LAST_COLOUR ScrollView::AQUAMARINE ///< last rainbow colour
+#define CHILD_COLOUR ScrollView::BROWN     ///< colour of children
+
+/**
+ * WERD::WERD
+ *
+ * Constructor to build a WERD from a list of C_BLOBs.
+ *   blob_list     The C_BLOBs (in word order) are not copied;
+ *                 we take its elements and put them in our lists.
+ *   blank_count   blanks in front of the word
+ *   text          correct text, outlives this WERD
+ */
+WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
+    : blanks(blank_count), flags(0), script_id_(0), correct(text ? text : "") {
+  C_BLOB_IT start_it = &cblobs;
+  C_BLOB_IT rej_cblob_it = &rej_cblobs;
+  C_OUTLINE_IT c_outline_it;
+  int16_t inverted_vote = 0;
+  int16_t non_inverted_vote = 0;
+
+  // Move blob_list's elements into cblobs.
+  start_it.add_list_after(blob_list);
+
+  /*
+  Set white on black flag for the WERD, moving any duff blobs onto the
+  rej_cblobs list.
+  First, walk the cblobs checking the inverse flag for each outline of each
+  cblob. If a cblob has inconsistent flag settings for its different
+  outlines, move the blob to the reject list. Otherwise, increment the
+  appropriate w-on-b or b-on-w vote for the word.
+
+  Now set the inversion flag for the WERD by maximum vote.
+
+  Walk the blobs again, moving any blob whose inversion flag does not agree
+  with the concencus onto the reject list.
+*/
+  start_it.set_to_list(&cblobs);
+  if (start_it.empty()) {
+    return;
+  }
+  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
+    bool reject_blob = false;
+    bool blob_inverted;
+
+    c_outline_it.set_to_list(start_it.data()->out_list());
+    blob_inverted = c_outline_it.data()->flag(COUT_INVERSE);
+    for (c_outline_it.mark_cycle_pt(); !c_outline_it.cycled_list() && !reject_blob;
+         c_outline_it.forward()) {
+      reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted;
+    }
+    if (reject_blob) {
+      rej_cblob_it.add_after_then_move(start_it.extract());
+    } else {
+      if (blob_inverted) {
+        inverted_vote++;
+      } else {
+        non_inverted_vote++;
+      }
+    }
+  }
+
+  flags.set(W_INVERSE, (inverted_vote > non_inverted_vote));
+
+  start_it.set_to_list(&cblobs);
+  if (start_it.empty()) {
+    return;
+  }
+  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
+    c_outline_it.set_to_list(start_it.data()->out_list());
+    if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE]) {
+      rej_cblob_it.add_after_then_move(start_it.extract());
+    }
+  }
+}
+
+/**
+ * WERD::WERD
+ *
+ * Constructor to build a WERD from a list of C_BLOBs.
+ * The C_BLOBs are not copied so the source list is emptied.
+ */
+
+WERD::WERD(C_BLOB_LIST *blob_list, ///< In word order
+           WERD *clone)            ///< Source of flags
+    : flags(clone->flags), script_id_(clone->script_id_), correct(clone->correct) {
+  C_BLOB_IT start_it = blob_list; // iterator
+  C_BLOB_IT end_it = blob_list;   // another
+
+  while (!end_it.at_last()) {
+    end_it.forward(); // move to last
+  }
+  (reinterpret_cast<C_BLOB_LIST *>(&cblobs))->assign_to_sublist(&start_it, &end_it);
+  // move to our list
+  blanks = clone->blanks;
+  //      fprintf(stderr,"Wrong constructor!!!!\n");
+}
+
+// Construct a WERD from a single_blob and clone the flags from this.
+// W_BOL and W_EOL flags are set according to the given values.
+WERD *WERD::ConstructFromSingleBlob(bool bol, bool eol, C_BLOB *blob) {
+  C_BLOB_LIST temp_blobs;
+  C_BLOB_IT temp_it(&temp_blobs);
+  temp_it.add_after_then_move(blob);
+  WERD *blob_word = new WERD(&temp_blobs, this);
+  blob_word->set_flag(W_BOL, bol);
+  blob_word->set_flag(W_EOL, eol);
+  return blob_word;
+}
+
+/**
+ * WERD::bounding_box
+ *
+ * Return the bounding box of the WERD.
+ * This is quite a mess to compute!
+ * ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the
+ * words on the row were re-sorted. The original words were built with reject
+ * blobs included. The FUZZY SPACE flags were set accordingly. If ALL the
+ * blobs in a word are rejected the BB for the word is nullptr, causing the sort
+ * to screw up, leading to the erroneous possibility of the first word in a
+ * row being marked as FUZZY space.
+ */
+
+TBOX WERD::bounding_box() const {
+  return restricted_bounding_box(true, true);
+}
+
+// Returns the bounding box including the desired combination of upper and
+// lower noise/diacritic elements.
+TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
+  TBOX box = true_bounding_box();
+  int bottom = box.bottom();
+  int top = box.top();
+  // This is a read-only iteration of the rejected blobs.
+  C_BLOB_IT it(const_cast<C_BLOB_LIST *>(&rej_cblobs));
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TBOX dot_box = it.data()->bounding_box();
+    if ((upper_dots || dot_box.bottom() <= top) && (lower_dots || dot_box.top() >= bottom)) {
+      box += dot_box;
+    }
+  }
+  return box;
+}
+
+// Returns the bounding box of only the good blobs.
+TBOX WERD::true_bounding_box() const {
+  TBOX box; // box being built
+  // This is a read-only iteration of the good blobs.
+  C_BLOB_IT it(const_cast<C_BLOB_LIST *>(&cblobs));
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    box += it.data()->bounding_box();
+  }
+  return box;
+}
+
+/**
+ * WERD::move
+ *
+ * Reposition WERD by vector
+ * NOTE!! REJECT CBLOBS ARE NOT MOVED
+ */
+
+void WERD::move(const ICOORD vec) {
+  C_BLOB_IT cblob_it(&cblobs); // cblob iterator
+
+  for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
+    cblob_it.data()->move(vec);
+  }
+}
+
+/**
+ * WERD::join_on
+ *
+ * Join other word onto this one. Delete the old word.
+ */
+
+void WERD::join_on(WERD *other) {
+  C_BLOB_IT blob_it(&cblobs);
+  C_BLOB_IT src_it(&other->cblobs);
+  C_BLOB_IT rej_cblob_it(&rej_cblobs);
+  C_BLOB_IT src_rej_it(&other->rej_cblobs);
+
+  while (!src_it.empty()) {
+    blob_it.add_to_end(src_it.extract());
+    src_it.forward();
+  }
+  while (!src_rej_it.empty()) {
+    rej_cblob_it.add_to_end(src_rej_it.extract());
+    src_rej_it.forward();
+  }
+}
+
+/**
+ * WERD::copy_on
+ *
+ * Copy blobs from other word onto this one.
+ */
+
+void WERD::copy_on(WERD *other) {
+  bool reversed = other->bounding_box().left() < bounding_box().left();
+  C_BLOB_IT c_blob_it(&cblobs);
+  C_BLOB_LIST c_blobs;
+
+  c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
+  if (reversed) {
+    c_blob_it.add_list_before(&c_blobs);
+  } else {
+    c_blob_it.move_to_last();
+    c_blob_it.add_list_after(&c_blobs);
+  }
+  if (!other->rej_cblobs.empty()) {
+    C_BLOB_IT rej_c_blob_it(&rej_cblobs);
+    C_BLOB_LIST new_rej_c_blobs;
+
+    new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
+    if (reversed) {
+      rej_c_blob_it.add_list_before(&new_rej_c_blobs);
+    } else {
+      rej_c_blob_it.move_to_last();
+      rej_c_blob_it.add_list_after(&new_rej_c_blobs);
+    }
+  }
+}
+
+/**
+ * WERD::print
+ *
+ * Display members
+ */
+
+void WERD::print() const {
+  tprintf("Blanks= %d\n", blanks);
+  bounding_box().print();
+  tprintf("Flags = %lu = 0%lo\n", flags.to_ulong(), flags.to_ulong());
+  tprintf("   W_SEGMENTED = %s\n", flags[W_SEGMENTED] ? "TRUE" : "FALSE");
+  tprintf("   W_ITALIC = %s\n", flags[W_ITALIC] ? "TRUE" : "FALSE");
+  tprintf("   W_BOL = %s\n", flags[W_BOL] ? "TRUE" : "FALSE");
+  tprintf("   W_EOL = %s\n", flags[W_EOL] ? "TRUE" : "FALSE");
+  tprintf("   W_NORMALIZED = %s\n", flags[W_NORMALIZED] ? "TRUE" : "FALSE");
+  tprintf("   W_SCRIPT_HAS_XHEIGHT = %s\n", flags[W_SCRIPT_HAS_XHEIGHT] ? "TRUE" : "FALSE");
+  tprintf("   W_SCRIPT_IS_LATIN = %s\n", flags[W_SCRIPT_IS_LATIN] ? "TRUE" : "FALSE");
+  tprintf("   W_DONT_CHOP = %s\n", flags[W_DONT_CHOP] ? "TRUE" : "FALSE");
+  tprintf("   W_REP_CHAR = %s\n", flags[W_REP_CHAR] ? "TRUE" : "FALSE");
+  tprintf("   W_FUZZY_SP = %s\n", flags[W_FUZZY_SP] ? "TRUE" : "FALSE");
+  tprintf("   W_FUZZY_NON = %s\n", flags[W_FUZZY_NON] ? "TRUE" : "FALSE");
+  tprintf("Correct= %s\n", correct.c_str());
+  tprintf("Rejected cblob count = %d\n", rej_cblobs.length());
+  tprintf("Script = %d\n", script_id_);
+}
+
+/**
+ * WERD::plot
+ *
+ * Draw the WERD in the given colour.
+ */
+
+#ifndef GRAPHICS_DISABLED
+void WERD::plot(ScrollView *window, ScrollView::Color colour) {
+  C_BLOB_IT it = &cblobs;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->plot(window, colour, colour);
+  }
+  plot_rej_blobs(window);
+}
+
+// Get the next color in the (looping) rainbow.
+ScrollView::Color WERD::NextColor(ScrollView::Color colour) {
+  auto next = static_cast<ScrollView::Color>(colour + 1);
+  if (next >= LAST_COLOUR || next < FIRST_COLOUR) {
+    next = FIRST_COLOUR;
+  }
+  return next;
+}
+
+/**
+ * WERD::plot
+ *
+ * Draw the WERD in rainbow colours in window.
+ */
+
+void WERD::plot(ScrollView *window) {
+  ScrollView::Color colour = FIRST_COLOUR;
+  C_BLOB_IT it = &cblobs;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->plot(window, colour, CHILD_COLOUR);
+    colour = NextColor(colour);
+  }
+  plot_rej_blobs(window);
+}
+
+/**
+ * WERD::plot_rej_blobs
+ *
+ * Draw the WERD rejected blobs in window - ALWAYS GREY
+ */
+
+void WERD::plot_rej_blobs(ScrollView *window) {
+  C_BLOB_IT it = &rej_cblobs;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->plot(window, ScrollView::GREY, ScrollView::GREY);
+  }
+}
+#endif // !GRAPHICS_DISABLED
+
+/**
+ * WERD::shallow_copy()
+ *
+ * Make a shallow copy of a word
+ */
+
+WERD *WERD::shallow_copy() {
+  WERD *new_word = new WERD;
+
+  new_word->blanks = blanks;
+  new_word->flags = flags;
+  new_word->correct = correct;
+  return new_word;
+}
+
+/**
+ * WERD::operator=
+ *
+ * Assign a word, DEEP copying the blob list
+ */
+
+WERD &WERD::operator=(const WERD &source) {
+  this->ELIST2_LINK::operator=(source);
+  blanks = source.blanks;
+  flags = source.flags;
+  script_id_ = source.script_id_;
+  correct = source.correct;
+  cblobs.clear();
+  cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
+  rej_cblobs.clear();
+  rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
+  return *this;
+}
+
+/**
+ *  word_comparator()
+ *
+ *  word comparator used to sort a word list so that words are in increasing
+ *  order of left edge.
+ */
+
+int word_comparator(const void *word1p, const void *word2p) {
+  const WERD *word1 = *reinterpret_cast<const WERD *const *>(word1p);
+  const WERD *word2 = *reinterpret_cast<const WERD *const *>(word2p);
+  return word1->bounding_box().left() - word2->bounding_box().left();
+}
+
+/**
+ *  WERD::ConstructWerdWithNewBlobs()
+ *
+ * This method returns a new werd constructed using the blobs in the input
+ * all_blobs list, which correspond to the blobs in this werd object. The
+ * blobs used to construct the new word are consumed and removed from the
+ * input all_blobs list.
+ * Returns nullptr if the word couldn't be constructed.
+ * Returns original blobs for which no matches were found in the output list
+ * orphan_blobs (appends).
+ */
+
+WERD *WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs) {
+  C_BLOB_LIST current_blob_list;
+  C_BLOB_IT werd_blobs_it(&current_blob_list);
+  // Add the word's c_blobs.
+  werd_blobs_it.add_list_after(cblob_list());
+
+  // New blob list. These contain the blobs which will form the new word.
+  C_BLOB_LIST new_werd_blobs;
+  C_BLOB_IT new_blobs_it(&new_werd_blobs);
+
+  // not_found_blobs contains the list of current word's blobs for which a
+  // corresponding blob wasn't found in the input all_blobs list.
+  C_BLOB_LIST not_found_blobs;
+  C_BLOB_IT not_found_it(&not_found_blobs);
+  not_found_it.move_to_last();
+
+  werd_blobs_it.move_to_first();
+  for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list(); werd_blobs_it.forward()) {
+    C_BLOB *werd_blob = werd_blobs_it.extract();
+    TBOX werd_blob_box = werd_blob->bounding_box();
+    bool found = false;
+    // Now find the corresponding blob for this blob in the all_blobs
+    // list. For now, follow the inefficient method of pairwise
+    // comparisons. Ideally, one can pre-bucket the blobs by row.
+    C_BLOB_IT all_blobs_it(all_blobs);
+    for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) {
+      C_BLOB *a_blob = all_blobs_it.data();
+      // Compute the overlap of the two blobs. If major, a_blob should
+      // be added to the new blobs list.
+      TBOX a_blob_box = a_blob->bounding_box();
+      if (a_blob_box.null_box()) {
+        tprintf("Bounding box couldn't be ascertained\n");
+      }
+      if (werd_blob_box.contains(a_blob_box) || werd_blob_box.major_overlap(a_blob_box)) {
+        // Old blobs are from minimal splits, therefore are expected to be
+        // bigger. The new small blobs should cover a significant portion.
+        // This is it.
+        all_blobs_it.extract();
+        new_blobs_it.add_after_then_move(a_blob);
+        found = true;
+      }
+    }
+    if (!found) {
+      not_found_it.add_after_then_move(werd_blob);
+    } else {
+      delete werd_blob;
+    }
+  }
+  // Iterate over all not found blobs. Some of them may be due to
+  // under-segmentation (which is OK, since the corresponding blob is already
+  // in the list in that case.
+  not_found_it.move_to_first();
+  for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) {
+    C_BLOB *not_found = not_found_it.data();
+    TBOX not_found_box = not_found->bounding_box();
+    C_BLOB_IT existing_blobs_it(new_blobs_it);
+    for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
+         existing_blobs_it.forward()) {
+      C_BLOB *a_blob = existing_blobs_it.data();
+      TBOX a_blob_box = a_blob->bounding_box();
+      if ((not_found_box.major_overlap(a_blob_box) || a_blob_box.major_overlap(not_found_box)) &&
+          not_found_box.y_overlap_fraction(a_blob_box) > 0.8) {
+        // Already taken care of.
+        delete not_found_it.extract();
+        break;
+      }
+    }
+  }
+  if (orphan_blobs) {
+    C_BLOB_IT orphan_blobs_it(orphan_blobs);
+    orphan_blobs_it.move_to_last();
+    orphan_blobs_it.add_list_after(&not_found_blobs);
+  }
+
+  // New blobs are ready. Create a new werd object with these.
+  WERD *new_werd = nullptr;
+  if (!new_werd_blobs.empty()) {
+    new_werd = new WERD(&new_werd_blobs, this);
+  } else {
+    // Add the blobs back to this word so that it can be reused.
+    C_BLOB_IT this_list_it(cblob_list());
+    this_list_it.add_list_after(&not_found_blobs);
+  }
+  return new_werd;
+}
+
+// Removes noise from the word by moving small outlines to the rej_cblobs
+// list, based on the size_threshold.
+void WERD::CleanNoise(float size_threshold) {
+  C_BLOB_IT blob_it(&cblobs);
+  C_BLOB_IT rej_it(&rej_cblobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    C_BLOB *blob = blob_it.data();
+    C_OUTLINE_IT ol_it(blob->out_list());
+    for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
+      C_OUTLINE *outline = ol_it.data();
+      TBOX ol_box = outline->bounding_box();
+      int ol_size = ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height();
+      if (ol_size < size_threshold) {
+        // This outline is too small. Move it to a separate blob in the
+        // reject blobs list.
+        auto *rej_blob = new C_BLOB(ol_it.extract());
+        rej_it.add_after_then_move(rej_blob);
+      }
+    }
+    if (blob->out_list()->empty()) {
+      delete blob_it.extract();
+    }
+  }
+}
+
+// Extracts all the noise outlines and stuffs the pointers into the given
+// vector of outlines. Afterwards, the outlines vector owns the pointers.
+void WERD::GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines) {
+  C_BLOB_IT rej_it(&rej_cblobs);
+  for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) {
+    C_BLOB *blob = rej_it.extract();
+    C_OUTLINE_IT ol_it(blob->out_list());
+    outlines->push_back(ol_it.extract());
+    delete blob;
+  }
+}
+
+// Adds the selected outlines to the indcated real blobs, and puts the rest
+// back in rej_cblobs where they came from. Where the target_blobs entry is
+// nullptr, a run of wanted outlines is put into a single new blob.
+// Ownership of the outlines is transferred back to the word. (Hence
+// vector and not PointerVector.)
+// Returns true if any new blob was added to the start of the word, which
+// suggests that it might need joining to the word before it, and likewise
+// sets make_next_word_fuzzy true if any new blob was added to the end.
+bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
+                               const std::vector<C_BLOB *> &target_blobs,
+                               const std::vector<C_OUTLINE *> &outlines,
+                               bool *make_next_word_fuzzy) {
+  bool outline_added_to_start = false;
+  if (make_next_word_fuzzy != nullptr) {
+    *make_next_word_fuzzy = false;
+  }
+  C_BLOB_IT rej_it(&rej_cblobs);
+  for (unsigned i = 0; i < outlines.size(); ++i) {
+    C_OUTLINE *outline = outlines[i];
+    if (outline == nullptr) {
+      continue; // Already used it.
+    }
+    if (wanted[i]) {
+      C_BLOB *target_blob = target_blobs[i];
+      TBOX noise_box = outline->bounding_box();
+      if (target_blob == nullptr) {
+        target_blob = new C_BLOB(outline);
+        // Need to find the insertion point.
+        C_BLOB_IT blob_it(&cblobs);
+        for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+          C_BLOB *blob = blob_it.data();
+          TBOX blob_box = blob->bounding_box();
+          if (blob_box.left() > noise_box.left()) {
+            if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) {
+              // We might want to join this word to its predecessor.
+              outline_added_to_start = true;
+            }
+            blob_it.add_before_stay_put(target_blob);
+            break;
+          }
+        }
+        if (blob_it.cycled_list()) {
+          blob_it.add_to_end(target_blob);
+          if (make_next_word_fuzzy != nullptr) {
+            *make_next_word_fuzzy = true;
+          }
+        }
+        // Add all consecutive wanted, but null-blob outlines to same blob.
+        C_OUTLINE_IT ol_it(target_blob->out_list());
+        while (i + 1 < outlines.size() && wanted[i + 1] && target_blobs[i + 1] == nullptr) {
+          ++i;
+          ol_it.add_to_end(outlines[i]);
+        }
+      } else {
+        // Insert outline into this blob.
+        C_OUTLINE_IT ol_it(target_blob->out_list());
+        ol_it.add_to_end(outline);
+      }
+    } else {
+      // Put back on noise list.
+      rej_it.add_to_end(new C_BLOB(outline));
+    }
+  }
+  return outline_added_to_start;
+}
+
+} // namespace tesseract
--- a/3rdparty/tesseract_ocr/tesseract/src/ccstruct/werd.h
+++ b/3rdparty/tesseract_ocr/tesseract/src/ccstruct/werd.h
@ -0,0 +1,212 @@
+/**********************************************************************
+ * File:        werd.h
+ * Description: Code for the WERD class.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef WERD_H
+#define WERD_H
+
+#include "elst2.h"
+#include "params.h"
+#include "stepblob.h"
+
+#include <bitset>
+
+namespace tesseract {
+
+enum WERD_FLAGS {
+  W_SEGMENTED,          ///< correctly segmented
+  W_ITALIC,             ///< italic text
+  W_BOLD,               ///< bold text
+  W_BOL,                ///< start of line
+  W_EOL,                ///< end of line
+  W_NORMALIZED,         ///< flags
+  W_SCRIPT_HAS_XHEIGHT, ///< x-height concept makes sense.
+  W_SCRIPT_IS_LATIN,    ///< Special case latin for y. splitting.
+  W_DONT_CHOP,          ///< fixed pitch chopped
+  W_REP_CHAR,           ///< repeated character
+  W_FUZZY_SP,           ///< fuzzy space
+  W_FUZZY_NON,          ///< fuzzy nonspace
+  W_INVERSE             ///< white on black
+};
+
+enum DISPLAY_FLAGS {
+  /* Display flags bit number allocations */
+  DF_BOX,          ///< Bounding box
+  DF_TEXT,         ///< Correct ascii
+  DF_POLYGONAL,    ///< Polyg approx
+  DF_EDGE_STEP,    ///< Edge steps
+  DF_BN_POLYGONAL, ///< BL normalisd polyapx
+  DF_BLAMER        ///< Blamer information
+};
+
+class ROW; // forward decl
+
+class TESS_API WERD : public ELIST2_LINK {
+public:
+  WERD() = default;
+  // WERD constructed with:
+  //   blob_list - blobs of the word (we take this list's contents)
+  //   blanks - number of blanks before the word
+  //   text - correct text (outlives WERD)
+  WERD(C_BLOB_LIST *blob_list, uint8_t blanks, const char *text);
+
+  // WERD constructed from:
+  //   blob_list - blobs in the word
+  //   clone - werd to clone flags, etc from.
+  WERD(C_BLOB_LIST *blob_list, WERD *clone);
+
+  // Construct a WERD from a single_blob and clone the flags from this.
+  // W_BOL and W_EOL flags are set according to the given values.
+  WERD *ConstructFromSingleBlob(bool bol, bool eol, C_BLOB *blob);
+
+  ~WERD() = default;
+
+  // assignment
+  WERD &operator=(const WERD &source);
+
+  // This method returns a new werd constructed using the blobs in the input
+  // all_blobs list, which correspond to the blobs in this werd object. The
+  // blobs used to construct the new word are consumed and removed from the
+  // input all_blobs list.
+  // Returns nullptr if the word couldn't be constructed.
+  // Returns original blobs for which no matches were found in the output list
+  // orphan_blobs (appends).
+  WERD *ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs);
+
+  // Accessors for reject / DUFF blobs in various formats
+  C_BLOB_LIST *rej_cblob_list() { // compact format
+    return &rej_cblobs;
+  }
+
+  // Accessors for good blobs in various formats.
+  C_BLOB_LIST *cblob_list() { // get compact blobs
+    return &cblobs;
+  }
+
+  uint8_t space() const { // access function
+    return blanks;
+  }
+  void set_blanks(uint8_t new_blanks) {
+    blanks = new_blanks;
+  }
+  int script_id() const {
+    return script_id_;
+  }
+  void set_script_id(int id) {
+    script_id_ = id;
+  }
+
+  // Returns the (default) bounding box including all the dots.
+  TBOX bounding_box() const; // compute bounding box
+  // Returns the bounding box including the desired combination of upper and
+  // lower noise/diacritic elements.
+  TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
+  // Returns the bounding box of only the good blobs.
+  TBOX true_bounding_box() const;
+
+  const char *text() const {
+    return correct.c_str();
+  }
+  void set_text(const char *new_text) {
+    correct = new_text;
+  }
+
+  bool flag(WERD_FLAGS mask) const {
+    return flags[mask];
+  }
+  void set_flag(WERD_FLAGS mask, bool value) {
+    flags.set(mask, value);
+  }
+
+  bool display_flag(uint8_t flag) const {
+    return disp_flags[flag];
+  }
+  void set_display_flag(uint8_t flag, bool value) {
+    disp_flags.set(flag, value);
+  }
+
+  WERD *shallow_copy(); // shallow copy word
+
+  // reposition word by vector
+  void move(const ICOORD vec);
+
+  // join other's blobs onto this werd, emptying out other.
+  void join_on(WERD *other);
+
+  // copy other's blobs onto this word, leaving other intact.
+  void copy_on(WERD *other);
+
+  // tprintf word metadata (but not blob innards)
+  void print() const;
+
+#ifndef GRAPHICS_DISABLED
+  // plot word on window in a uniform colour
+  void plot(ScrollView *window, ScrollView::Color colour);
+
+  // Get the next color in the (looping) rainbow.
+  static ScrollView::Color NextColor(ScrollView::Color colour);
+
+  // plot word on window in a rainbow of colours
+  void plot(ScrollView *window);
+
+  // plot rejected blobs in a rainbow of colours
+  void plot_rej_blobs(ScrollView *window);
+#endif // !GRAPHICS_DISABLED
+
+  // Removes noise from the word by moving small outlines to the rej_cblobs
+  // list, based on the size_threshold.
+  void CleanNoise(float size_threshold);
+
+  // Extracts all the noise outlines and stuffs the pointers into the given
+  // vector of outlines. Afterwards, the outlines vector owns the pointers.
+  void GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines);
+  // Adds the selected outlines to the indcated real blobs, and puts the rest
+  // back in rej_cblobs where they came from. Where the target_blobs entry is
+  // nullptr, a run of wanted outlines is put into a single new blob.
+  // Ownership of the outlines is transferred back to the word. (Hence
+  // vector and not PointerVector.)
+  // Returns true if any new blob was added to the start of the word, which
+  // suggests that it might need joining to the word before it, and likewise
+  // sets make_next_word_fuzzy true if any new blob was added to the end.
+  bool AddSelectedOutlines(const std::vector<bool> &wanted,
+                           const std::vector<C_BLOB *> &target_blobs,
+                           const std::vector<C_OUTLINE *> &outlines, bool *make_next_word_fuzzy);
+
+private:
+  uint8_t blanks = 0;     // no of blanks
+  std::bitset<16> flags;  // flags about word
+  std::bitset<16> disp_flags; // display flags
+  int16_t script_id_ = 0; // From unicharset.
+  std::string correct;    // correct text
+  C_BLOB_LIST cblobs;     // compacted blobs
+  C_BLOB_LIST rej_cblobs; // DUFF blobs
+};
+
+ELIST2IZEH(WERD)
+
+} // namespace tesseract
+
+#include "ocrrow.h" // placed here due to
+
+namespace tesseract {
+
+// compare words by increasing order of left edge, suitable for qsort(3)
+int word_comparator(const void *word1p, const void *word2p);
+
+} // namespace tesseract
+
+#endif