feat: 集成Tesseract源码到项目中

Description:   由于仓库中的Tesseract不是最新版本导致产生了一个bug,因此将Tesseract源码集成到项目中

Log: no
Change-Id: I088de95d6c6ab670406daa8d47ed2ed46929c2c0
This commit is contained in:
wangcong
2021-06-22 20:13:39 +08:00
parent 40c90fc3c7
commit 0cfed22ed4
439 changed files with 185083 additions and 13 deletions

View File

@ -0,0 +1,578 @@
///////////////////////////////////////////////////////////////////////
// File: blamer.cpp
// Description: Module allowing precise error causes to be allocated.
// Author: Rike Antonova
// Refactored: Ray Smith
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "blamer.h"
#include "blobs.h" // for TPOINT, TWERD, TBLOB
#include "errcode.h" // for ASSERT_HOST
#if !defined(DISABLED_LEGACY_ENGINE)
# include "lm_pain_points.h" // for LMPainPoints
#endif
#include "matrix.h" // for MATRIX
#include "normalis.h" // for DENORM
#include "pageres.h" // for WERD_RES
#include "unicharset.h" // for UNICHARSET
#include <cmath> // for abs
#include <cstdlib> // for abs
namespace tesseract {
// Names for each value of IncorrectResultReason enum. Keep in sync.
const char kBlameCorrect[] = "corr";
const char kBlameClassifier[] = "cl";
const char kBlameChopper[] = "chop";
const char kBlameClassLMTradeoff[] = "cl/LM";
const char kBlamePageLayout[] = "pglt";
const char kBlameSegsearchHeur[] = "ss_heur";
const char kBlameSegsearchPP[] = "ss_pp";
const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
const char kBlameAdaption[] = "adapt";
const char kBlameNoTruthSplit[] = "no_tr_spl";
const char kBlameNoTruth[] = "no_tr";
const char kBlameUnknown[] = "unkn";
const char *const kIncorrectResultReasonNames[] = {
kBlameCorrect, kBlameClassifier, kBlameChopper, kBlameClassLMTradeoff,
kBlamePageLayout, kBlameSegsearchHeur, kBlameSegsearchPP, kBlameClassOldLMTradeoff,
kBlameAdaption, kBlameNoTruthSplit, kBlameNoTruth, kBlameUnknown};
const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
return kIncorrectResultReasonNames[irr];
}
const char *BlamerBundle::IncorrectReason() const {
return kIncorrectResultReasonNames[incorrect_result_reason_];
}
// Functions to setup the blamer.
// Whole word string, whole word bounding box.
void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_str,
const TBOX &word_box) {
truth_word_.InsertBox(0, word_box);
truth_has_char_boxes_ = false;
// Encode the string as UNICHAR_IDs.
std::vector<UNICHAR_ID> encoding;
std::vector<char> lengths;
unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
int total_length = 0;
for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
std::string uch(truth_str + total_length);
uch.resize(lengths[i] - total_length);
UNICHAR_ID id = encoding[i];
if (id != INVALID_UNICHAR_ID) {
uch = unicharset.get_normed_unichar(id);
}
truth_text_.push_back(uch);
}
}
// Single "character" string, "character" bounding box.
// May be called multiple times to indicate the characters in a word.
void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str,
const TBOX &char_box) {
std::string symbol_str(char_str);
UNICHAR_ID id = unicharset.unichar_to_id(char_str);
if (id != INVALID_UNICHAR_ID) {
std::string normed_uch(unicharset.get_normed_unichar(id));
if (normed_uch.length() > 0) {
symbol_str = normed_uch;
}
}
int length = truth_word_.length();
truth_text_.push_back(symbol_str);
truth_word_.InsertBox(length, char_box);
if (length == 0) {
truth_has_char_boxes_ = true;
} else if (truth_word_.BlobBox(length - 1) == char_box) {
truth_has_char_boxes_ = false;
}
}
// Marks that there is something wrong with the truth text, like it contains
// reject characters.
void BlamerBundle::SetRejectedTruth() {
incorrect_result_reason_ = IRR_NO_TRUTH;
truth_has_char_boxes_ = false;
}
// Returns true if the provided word_choice is correct.
bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const {
if (word_choice == nullptr) {
return false;
}
const UNICHARSET *uni_set = word_choice->unicharset();
std::string normed_choice_str;
for (int i = 0; i < word_choice->length(); ++i) {
normed_choice_str += uni_set->get_normed_unichar(word_choice->unichar_id(i));
}
std::string truth_str = TruthString();
return truth_str == normed_choice_str;
}
void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug) {
debug += "Truth ";
for (auto &text : this->truth_text_) {
debug += text;
}
if (!this->truth_has_char_boxes_) {
debug += " (no char boxes)";
}
if (choice != nullptr) {
debug += " Choice ";
std::string choice_str;
choice->string_and_lengths(&choice_str, nullptr);
debug += choice_str;
}
if (msg.length() > 0) {
debug += "\n";
debug += msg;
}
debug += "\n";
}
// Sets up the norm_truth_word from truth_word using the given DENORM.
void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) {
// TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
TPOINT topleft;
TPOINT botright;
TPOINT norm_topleft;
TPOINT norm_botright;
for (int b = 0; b < truth_word_.length(); ++b) {
const TBOX &box = truth_word_.BlobBox(b);
topleft.x = box.left();
topleft.y = box.top();
botright.x = box.right();
botright.y = box.bottom();
denorm.NormTransform(nullptr, topleft, &norm_topleft);
denorm.NormTransform(nullptr, botright, &norm_botright);
TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y);
norm_truth_word_.InsertBox(b, norm_box);
}
}
// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
// bundles) where the right edge/ of the left-hand word is word1_right,
// and the left edge of the right-hand word is word2_left.
void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
BlamerBundle *bundle2) const {
std::string debug_str;
// Find truth boxes that correspond to the split in the blobs.
int b;
int begin2_truth_index = -1;
if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
debug_str = "Looking for truth split at";
debug_str += " end1_x " + std::to_string(word1_right);
debug_str += " begin2_x " + std::to_string(word2_left);
debug_str += "\nnorm_truth_word boxes:\n";
if (norm_truth_word_.length() > 1) {
norm_truth_word_.BlobBox(0).print_to_str(debug_str);
for (b = 1; b < norm_truth_word_.length(); ++b) {
norm_truth_word_.BlobBox(b).print_to_str(debug_str);
if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
(abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
begin2_truth_index = b;
debug_str += "Split found";
break;
}
}
debug_str += '\n';
}
}
// Populate truth information in word and word2 with the first and second
// part of the original truth.
if (begin2_truth_index > 0) {
bundle1->truth_has_char_boxes_ = true;
bundle1->norm_box_tolerance_ = norm_box_tolerance_;
bundle2->truth_has_char_boxes_ = true;
bundle2->norm_box_tolerance_ = norm_box_tolerance_;
BlamerBundle *curr_bb = bundle1;
for (b = 0; b < norm_truth_word_.length(); ++b) {
if (b == begin2_truth_index) {
curr_bb = bundle2;
}
curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
curr_bb->truth_text_.push_back(truth_text_[b]);
}
} else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
} else {
debug_str += "Truth split not found";
debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n";
bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
}
}
// "Joins" the blames from bundle1 and bundle2 into *this.
void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2,
bool debug) {
std::string debug_str;
IncorrectResultReason irr = incorrect_result_reason_;
if (irr != IRR_NO_TRUTH_SPLIT) {
debug_str = "";
}
if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
debug_str += "Blame from part 1: ";
debug_str += bundle1.debug_;
irr = bundle1.incorrect_result_reason_;
}
if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
debug_str += "Blame from part 2: ";
debug_str += bundle2.debug_;
if (irr == IRR_CORRECT) {
irr = bundle2.incorrect_result_reason_;
} else if (irr != bundle2.incorrect_result_reason_) {
irr = IRR_UNKNOWN;
}
}
incorrect_result_reason_ = irr;
if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
SetBlame(irr, debug_str, nullptr, debug);
}
}
// If a blob with the same bounding box as one of the truth character
// bounding boxes is not classified as the corresponding truth character
// blames character classifier for incorrect answer.
void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
const BLOB_CHOICE_LIST &choices, bool debug) {
if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
return; // Nothing to do here.
}
for (int b = 0; b < norm_truth_word_.length(); ++b) {
const TBOX &truth_box = norm_truth_word_.BlobBox(b);
// Note that we are more strict on the bounding box boundaries here
// than in other places (chopper, segmentation search), since we do
// not have the ability to check the previous and next bounding box.
if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
bool found = false;
bool incorrect_adapted = false;
UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
const char *truth_str = truth_text_[b].c_str();
// We promise not to modify the list or its contents, using a
// const BLOB_CHOICE* below.
BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST *>(&choices));
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
const BLOB_CHOICE *choice = choices_it.data();
if (strcmp(truth_str, unicharset.get_normed_unichar(choice->unichar_id())) == 0) {
found = true;
break;
} else if (choice->IsAdapted()) {
incorrect_adapted = true;
incorrect_adapted_id = choice->unichar_id();
}
} // end choices_it for loop
if (!found) {
std::string debug_str = "unichar ";
debug_str += truth_str;
debug_str += " not found in classification list";
SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
} else if (incorrect_adapted) {
std::string debug_str = "better rating for adapted ";
debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
debug_str += " than for correct ";
debug_str += truth_str;
SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
}
break;
}
} // end iterating over blamer_bundle->norm_truth_word
}
// Checks whether chops were made at all the character bounding box
// boundaries in word->truth_word. If not - blames the chopper for an
// incorrect answer.
void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
if (NoTruth() || !truth_has_char_boxes_ || word->chopped_word->blobs.empty()) {
return;
}
std::string debug_str;
bool missing_chop = false;
int num_blobs = word->chopped_word->blobs.size();
int box_index = 0;
int blob_index = 0;
int16_t truth_x = -1;
while (box_index < truth_word_.length() && blob_index < num_blobs) {
truth_x = norm_truth_word_.BlobBox(box_index).right();
TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
++blob_index;
continue; // encountered an extra chop, keep looking
} else if (curr_blob->bounding_box().right() > truth_x + norm_box_tolerance_) {
missing_chop = true;
break;
} else {
++blob_index;
}
}
if (missing_chop || box_index < norm_truth_word_.length()) {
std::string debug_str;
if (missing_chop) {
debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
debug_str += ") at Bounding Box=";
TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
curr_blob->bounding_box().print_to_str(debug_str);
debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
} else {
debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
debug_str += " truth box(es)";
}
debug_str += "\nMaximally chopped word boxes:\n";
for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
curr_blob->bounding_box().print_to_str(debug_str);
debug_str += '\n';
}
debug_str += "Truth bounding boxes:\n";
for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
debug_str += '\n';
}
SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
}
}
// Blames the classifier or the language model if, after running only the
// chopper, best_choice is incorrect and no blame has been yet set.
// Blames the classifier if best_choice is classifier's top choice and is a
// dictionary word (i.e. language model could not have helped).
// Otherwise, blames the language model (formerly permuter word adjustment).
void BlamerBundle::BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
bool valid_permuter, bool debug) {
if (valid_permuter) {
// Find out whether best choice is a top choice.
best_choice_is_dict_and_top_choice_ = true;
for (int i = 0; i < word->best_choice->length(); ++i) {
BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
ASSERT_HOST(!blob_choice_it.empty());
BLOB_CHOICE *first_choice = nullptr;
for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
blob_choice_it.forward()) { // find first non-fragment choice
if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
first_choice = blob_choice_it.data();
break;
}
}
ASSERT_HOST(first_choice != nullptr);
if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
best_choice_is_dict_and_top_choice_ = false;
break;
}
}
}
std::string debug_str;
if (best_choice_is_dict_and_top_choice_) {
debug_str = "Best choice is: incorrect, top choice, dictionary word";
debug_str += " with permuter ";
debug_str += word->best_choice->permuter_name();
} else {
debug_str = "Classifier/Old LM tradeoff is to blame";
}
SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER : IRR_CLASS_OLD_LM_TRADEOFF,
debug_str, word->best_choice, debug);
}
// Sets up the correct_segmentation_* to mark the correct bounding boxes.
void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
#ifndef DISABLED_LEGACY_ENGINE
params_training_bundle_.StartHypothesisList();
#endif // ndef DISABLED_LEGACY_ENGINE
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
return; // Nothing to do here.
}
std::string debug_str = "Blamer computing correct_segmentation_cols\n";
int curr_box_col = 0;
int next_box_col = 0;
int num_blobs = word->NumBlobs();
if (num_blobs == 0) {
return; // No blobs to play with.
}
int blob_index = 0;
int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
for (int truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
++blob_index) {
++next_box_col;
int16_t curr_box_x = next_box_x;
if (blob_index + 1 < num_blobs) {
next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
}
int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
debug_str += " " + std::to_string(truth_x);
debug_str += "\n";
if (curr_box_x > (truth_x + norm_box_tolerance_)) {
break; // failed to find a matching box
} else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
(blob_index + 1 >= num_blobs || // next box can't be included
next_box_x > truth_x + norm_box_tolerance_)) {
correct_segmentation_cols_.push_back(curr_box_col);
correct_segmentation_rows_.push_back(next_box_col - 1);
++truth_idx;
debug_str += "col=" + std::to_string(curr_box_col);
debug_str += " row=" + std::to_string(next_box_col - 1);
debug_str += "\n";
curr_box_col = next_box_col;
}
}
if (blob_index < num_blobs || // trailing blobs
correct_segmentation_cols_.size() != norm_truth_word_.length()) {
debug_str +=
"Blamer failed to find correct segmentation"
" (tolerance=" +
std::to_string(norm_box_tolerance_);
if (blob_index >= num_blobs) {
debug_str += " blob == nullptr";
}
debug_str += ")\n";
debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
debug_str += "\n";
SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
correct_segmentation_cols_.clear();
correct_segmentation_rows_.clear();
}
}
// Returns true if a guided segmentation search is needed.
bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
return incorrect_result_reason_ == IRR_CORRECT && !segsearch_is_looking_for_blame_ &&
truth_has_char_boxes_ && !ChoiceIsCorrect(best_choice);
}
#if !defined(DISABLED_LEGACY_ENGINE)
// Setup ready to guide the segmentation search to the correct segmentation.
void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
WERD_RES *word_res) {
segsearch_is_looking_for_blame_ = true;
if (debug) {
tprintf("segsearch starting to look for blame\n");
}
// Fill pain points for any unclassifed blob corresponding to the
// correct segmentation state.
debug_str += "Correct segmentation:\n";
for (int idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
debug_str += "\n";
if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
wildcard_id) &&
!pain_points->GeneratePainPoint(
correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
segsearch_is_looking_for_blame_ = false;
debug_str += "\nFailed to insert pain point\n";
SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
break;
}
} // end for blamer_bundle->correct_segmentation_cols/rows
}
#endif // !defined(DISABLED_LEGACY_ENGINE)
// Returns true if the guided segsearch is in progress.
bool BlamerBundle::GuidedSegsearchStillGoing() const {
return segsearch_is_looking_for_blame_;
}
// The segmentation search has ended. Sets the blame appropriately.
void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
// If we are still looking for blame (i.e. best_choice is incorrect, but a
// path representing the correct segmentation could be constructed), we can
// blame segmentation search pain point prioritization if the rating of the
// path corresponding to the correct segmentation is better than that of
// best_choice (i.e. language model would have done the correct thing, but
// because of poor pain point prioritization the correct segmentation was
// never explored). Otherwise we blame the tradeoff between the language model
// and the classifier, since even after exploring the path corresponding to
// the correct segmentation incorrect best_choice would have been chosen.
// One special case when we blame the classifier instead is when best choice
// is incorrect, but it is a dictionary word and it classifier's top choice.
if (segsearch_is_looking_for_blame_) {
segsearch_is_looking_for_blame_ = false;
if (best_choice_is_dict_and_top_choice_) {
debug_str = "Best choice is: incorrect, top choice, dictionary word";
debug_str += " with permuter ";
debug_str += best_choice->permuter_name();
SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
} else if (best_correctly_segmented_rating_ < best_choice->rating()) {
debug_str += "Correct segmentation state was not explored";
SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
} else {
if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
debug_str += "Correct segmentation paths were pruned by LM\n";
} else {
debug_str += "Best correct segmentation rating " +
std::to_string(best_correctly_segmented_rating_);
debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
}
SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
}
}
}
// If the bundle is null or still does not indicate the correct result,
// fix it and use some backup reason for the blame.
void BlamerBundle::LastChanceBlame(bool debug, WERD_RES *word) {
if (word->blamer_bundle == nullptr) {
word->blamer_bundle = new BlamerBundle();
word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", word->best_choice, debug);
} else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", word->best_choice, debug);
} else {
bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
if (irr == IRR_CORRECT && !correct) {
std::string debug_str = "Choice is incorrect after recognition";
word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, debug);
} else if (irr != IRR_CORRECT && correct) {
if (debug) {
tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str());
}
word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
word->blamer_bundle->debug_ = "";
}
}
}
// Sets the misadaption debug if this word is incorrect, as this word is
// being adapted to.
void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug) {
if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) {
misadaption_debug_ = "misadapt to word (";
misadaption_debug_ += best_choice->permuter_name();
misadaption_debug_ += "): ";
FillDebugString("", best_choice, misadaption_debug_);
if (debug) {
tprintf("%s\n", misadaption_debug_.c_str());
}
}
}
} // namespace tesseract

View File

@ -0,0 +1,350 @@
///////////////////////////////////////////////////////////////////////
// File: blamer.h
// Description: Module allowing precise error causes to be allocated.
// Author: Rike Antonova
// Refactored: Ray Smith
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
#define TESSERACT_CCSTRUCT_BLAMER_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "boxword.h" // for BoxWord
#ifndef DISABLED_LEGACY_ENGINE
# include "params_training_featdef.h" // for ParamsTrainingBundle, ParamsTra...
#endif // ndef DISABLED_LEGACY_ENGINE
#include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only)
#include "rect.h" // for TBOX
#include "tprintf.h" // for tprintf
#include <tesseract/unichar.h> // for UNICHAR_ID
#include <cstdint> // for int16_t
#include <cstring> // for memcpy
#include <vector> // for std::vector
namespace tesseract {
class DENORM;
class MATRIX;
class UNICHARSET;
class WERD_RES;
struct MATRIX_COORD;
struct TWERD;
class LMPainPoints;
static const int16_t kBlamerBoxTolerance = 5;
// Enum for expressing the source of error.
// Note: Please update kIncorrectResultReasonNames when modifying this enum.
enum IncorrectResultReason {
// The text recorded in best choice == truth text
IRR_CORRECT,
// Either: Top choice is incorrect and is a dictionary word (language model
// is unlikely to help correct such errors, so blame the classifier).
// Or: the correct unichar was not included in shortlist produced by the
// classifier at all.
IRR_CLASSIFIER,
// Chopper have not found one or more splits that correspond to the correct
// character bounding boxes recorded in BlamerBundle::truth_word.
IRR_CHOPPER,
// Classifier did include correct unichars for each blob in the correct
// segmentation, however its rating could have been too bad to allow the
// language model to pull out the correct choice. On the other hand the
// strength of the language model might have been too weak to favor the
// correct answer, this we call this case a classifier-language model
// tradeoff error.
IRR_CLASS_LM_TRADEOFF,
// Page layout failed to produce the correct bounding box. Blame page layout
// if the truth was not found for the word, which implies that the bounding
// box of the word was incorrect (no truth word had a similar bounding box).
IRR_PAGE_LAYOUT,
// SegSearch heuristic prevented one or more blobs from the correct
// segmentation state to be classified (e.g. the blob was too wide).
IRR_SEGSEARCH_HEUR,
// The correct segmentaiton state was not explored because of poor SegSearch
// pain point prioritization. We blame SegSearch pain point prioritization
// if the best rating of a choice constructed from correct segmentation is
// better than that of the best choice (i.e. if we got to explore the correct
// segmentation state, language model would have picked the correct choice).
IRR_SEGSEARCH_PP,
// Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
// and thus use the old language model (permuters).
// TODO(antonova): integrate the new language mode with chopper
IRR_CLASS_OLD_LM_TRADEOFF,
// If there is an incorrect adaptive template match with a better score than
// a correct one (either pre-trained or adapted), mark this as adaption error.
IRR_ADAPTION,
// split_and_recog_word() failed to find a suitable split in truth.
IRR_NO_TRUTH_SPLIT,
// Truth is not available for this word (e.g. when words in corrected content
// file are turned into ~~~~ because an appropriate alignment was not found.
IRR_NO_TRUTH,
// The text recorded in best choice != truth text, but none of the above
// reasons are set.
IRR_UNKNOWN,
IRR_NUM_REASONS
};
// Blamer-related information to determine the source of errors.
struct BlamerBundle {
static const char *IncorrectReasonName(IncorrectResultReason irr);
BlamerBundle()
: truth_has_char_boxes_(false)
, incorrect_result_reason_(IRR_CORRECT)
, lattice_data_(nullptr) {
ClearResults();
}
BlamerBundle(const BlamerBundle &other) {
this->CopyTruth(other);
this->CopyResults(other);
}
~BlamerBundle() {
delete[] lattice_data_;
}
// Accessors.
std::string TruthString() const {
std::string truth_str;
for (auto &text : truth_text_) {
truth_str += text;
}
return truth_str;
}
IncorrectResultReason incorrect_result_reason() const {
return incorrect_result_reason_;
}
bool NoTruth() const {
return incorrect_result_reason_ == IRR_NO_TRUTH || incorrect_result_reason_ == IRR_PAGE_LAYOUT;
}
bool HasDebugInfo() const {
return debug_.length() > 0 || misadaption_debug_.length() > 0;
}
const std::string &debug() const {
return debug_;
}
const std::string &misadaption_debug() const {
return misadaption_debug_;
}
void UpdateBestRating(float rating) {
if (rating < best_correctly_segmented_rating_) {
best_correctly_segmented_rating_ = rating;
}
}
int correct_segmentation_length() const {
return correct_segmentation_cols_.size();
}
// Returns true if the given ratings matrix col,row position is included
// in the correct segmentation path at the given index.
bool MatrixPositionCorrect(int index, const MATRIX_COORD &coord) {
return correct_segmentation_cols_[index] == coord.col &&
correct_segmentation_rows_[index] == coord.row;
}
void set_best_choice_is_dict_and_top_choice(bool value) {
best_choice_is_dict_and_top_choice_ = value;
}
const char *lattice_data() const {
return lattice_data_;
}
int lattice_size() const {
return lattice_size_; // size of lattice_data in bytes
}
void set_lattice_data(const char *data, int size) {
lattice_size_ = size;
delete[] lattice_data_;
lattice_data_ = new char[lattice_size_];
memcpy(lattice_data_, data, lattice_size_);
}
#ifndef DISABLED_LEGACY_ENGINE
const tesseract::ParamsTrainingBundle &params_training_bundle() const {
return params_training_bundle_;
}
// Adds a new ParamsTrainingHypothesis to the current hypothesis list.
void AddHypothesis(const tesseract::ParamsTrainingHypothesis &hypo) {
params_training_bundle_.AddHypothesis(hypo);
}
#endif // ndef DISABLED_LEGACY_ENGINE
// Functions to setup the blamer.
// Whole word string, whole word bounding box.
void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box);
// Single "character" string, "character" bounding box.
// May be called multiple times to indicate the characters in a word.
void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box);
// Marks that there is something wrong with the truth text, like it contains
// reject characters.
void SetRejectedTruth();
// Returns true if the provided word_choice is correct.
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const;
void ClearResults() {
norm_truth_word_.DeleteAllBoxes();
norm_box_tolerance_ = 0;
if (!NoTruth()) {
incorrect_result_reason_ = IRR_CORRECT;
}
debug_ = "";
segsearch_is_looking_for_blame_ = false;
best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
correct_segmentation_cols_.clear();
correct_segmentation_rows_.clear();
best_choice_is_dict_and_top_choice_ = false;
delete[] lattice_data_;
lattice_data_ = nullptr;
lattice_size_ = 0;
}
void CopyTruth(const BlamerBundle &other) {
truth_has_char_boxes_ = other.truth_has_char_boxes_;
truth_word_ = other.truth_word_;
truth_text_ = other.truth_text_;
incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
}
void CopyResults(const BlamerBundle &other) {
norm_truth_word_ = other.norm_truth_word_;
norm_box_tolerance_ = other.norm_box_tolerance_;
incorrect_result_reason_ = other.incorrect_result_reason_;
segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
correct_segmentation_cols_ = other.correct_segmentation_cols_;
correct_segmentation_rows_ = other.correct_segmentation_rows_;
best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_;
if (other.lattice_data_ != nullptr) {
lattice_data_ = new char[other.lattice_size_];
memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
lattice_size_ = other.lattice_size_;
} else {
lattice_data_ = nullptr;
}
}
const char *IncorrectReason() const;
// Appends choice and truth details to the given debug string.
void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug);
// Sets up the norm_truth_word from truth_word using the given DENORM.
void SetupNormTruthWord(const DENORM &denorm);
// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
// bundles) where the right edge/ of the left-hand word is word1_right,
// and the left edge of the right-hand word is word2_left.
void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
BlamerBundle *bundle2) const;
// "Joins" the blames from bundle1 and bundle2 into *this.
void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug);
// If a blob with the same bounding box as one of the truth character
// bounding boxes is not classified as the corresponding truth character
// blames character classifier for incorrect answer.
void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
const BLOB_CHOICE_LIST &choices, bool debug);
// Checks whether chops were made at all the character bounding box
// boundaries in word->truth_word. If not - blames the chopper for an
// incorrect answer.
void SetChopperBlame(const WERD_RES *word, bool debug);
// Blames the classifier or the language model if, after running only the
// chopper, best_choice is incorrect and no blame has been yet set.
// Blames the classifier if best_choice is classifier's top choice and is a
// dictionary word (i.e. language model could not have helped).
// Otherwise, blames the language model (formerly permuter word adjustment).
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset,
bool valid_permuter, bool debug);
// Sets up the correct_segmentation_* to mark the correct bounding boxes.
void SetupCorrectSegmentation(const TWERD *word, bool debug);
// Returns true if a guided segmentation search is needed.
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
// Setup ready to guide the segmentation search to the correct segmentation.
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
double max_char_wh_ratio, WERD_RES *word_res);
// Returns true if the guided segsearch is in progress.
bool GuidedSegsearchStillGoing() const;
// The segmentation search has ended. Sets the blame appropriately.
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);
// If the bundle is null or still does not indicate the correct result,
// fix it and use some backup reason for the blame.
static void LastChanceBlame(bool debug, WERD_RES *word);
// Sets the misadaption debug if this word is incorrect, as this word is
// being adapted to.
void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);
private:
// Copy assignment operator (currently unused, therefore private).
BlamerBundle &operator=(const BlamerBundle &other) = delete;
void SetBlame(IncorrectResultReason irr, const std::string &msg, const WERD_CHOICE *choice,
bool debug) {
incorrect_result_reason_ = irr;
debug_ = IncorrectReason();
debug_ += " to blame: ";
FillDebugString(msg, choice, debug_);
if (debug) {
tprintf("SetBlame(): %s", debug_.c_str());
}
}
private:
// Set to true when bounding boxes for individual unichars are recorded.
bool truth_has_char_boxes_;
// Variables used by the segmentation search when looking for the blame.
// Set to true while segmentation search is continued after the usual
// termination condition in order to look for the blame.
bool segsearch_is_looking_for_blame_;
// Set to true if best choice is a dictionary word and
// classifier's top choice.
bool best_choice_is_dict_and_top_choice_;
// Tolerance for bounding box comparisons in normalized space.
int norm_box_tolerance_;
// The true_word (in the original image coordinate space) contains ground
// truth bounding boxes for this WERD_RES.
tesseract::BoxWord truth_word_;
// Same as above, but in normalized coordinates
// (filled in by WERD_RES::SetupForRecognition()).
tesseract::BoxWord norm_truth_word_;
// Contains ground truth unichar for each of the bounding boxes in truth_word.
std::vector<std::string> truth_text_;
// The reason for incorrect OCR result.
IncorrectResultReason incorrect_result_reason_;
// Debug text associated with the blame.
std::string debug_;
// Misadaption debug information (filled in if this word was misadapted to).
std::string misadaption_debug_;
// Vectors populated by SegSearch to indicate column and row indices that
// correspond to blobs with correct bounding boxes.
std::vector<int> correct_segmentation_cols_;
std::vector<int> correct_segmentation_rows_;
// Best rating for correctly segmented path
// (set and used by SegSearch when looking for blame).
float best_correctly_segmented_rating_;
int lattice_size_; // size of lattice_data in bytes
// Serialized segmentation search lattice.
char *lattice_data_;
// Information about hypotheses (paths) explored by the segmentation search.
#ifndef DISABLED_LEGACY_ENGINE
tesseract::ParamsTrainingBundle params_training_bundle_;
#endif // ndef DISABLED_LEGACY_ENGINE
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_BLAMER_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,853 @@
/**********************************************************************
* File: blobbox.h (Formerly blobnbox.h)
* Description: Code for the textord blob class.
* Author: Ray Smith
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLOBBOX_H
#define BLOBBOX_H
#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
#include "elst2.h" // for ELIST2_ITERATOR, ELIST2IZEH, ELIST2_LINK
#include "errcode.h" // for ASSERT_HOST
#include "ocrblock.h" // for BLOCK
#include "params.h" // for DoubleParam, double_VAR_H
#include "pdblock.h" // for PDBLK
#include "points.h" // for FCOORD, ICOORD, ICOORDELT_LIST
#include "quspline.h" // for QSPLINE
#include "rect.h" // for TBOX
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include "statistc.h" // for STATS
#include "stepblob.h" // for C_BLOB
#include "tprintf.h" // for tprintf
#include "werd.h" // for WERD_LIST
#include <cinttypes> // for PRId32
#include <cmath> // for std::sqrt
#include <cstdint> // for int16_t, int32_t
struct Pix;
namespace tesseract {
class C_OUTLINE;
enum PITCH_TYPE {
PITCH_DUNNO, // insufficient data
PITCH_DEF_FIXED, // definitely fixed
PITCH_MAYBE_FIXED, // could be
PITCH_DEF_PROP,
PITCH_MAYBE_PROP,
PITCH_CORR_FIXED,
PITCH_CORR_PROP
};
// The possible tab-stop types of each side of a BLOBNBOX.
// The ordering is important, as it is used for deleting dead-ends in the
// search. ALIGNED, CONFIRMED and VLINE should remain greater than the
// non-aligned, unset, or deleted members.
enum TabType {
TT_NONE, // Not a tab.
TT_DELETED, // Not a tab after detailed analysis.
TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
TT_CONFIRMED, // Aligned with neighbours.
TT_VLINE // Detected as a vertical line.
};
// The possible region types of a BLOBNBOX.
// Note: keep all the text types > BRT_UNKNOWN and all the image types less.
// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
// *Type static functions below.
enum BlobRegionType {
BRT_NOISE, // Neither text nor image.
BRT_HLINE, // Horizontal separator line.
BRT_VLINE, // Vertical separator line.
BRT_RECTIMAGE, // Rectangular image.
BRT_POLYIMAGE, // Non-rectangular image.
BRT_UNKNOWN, // Not determined yet.
BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
BRT_TEXT, // Convincing text.
BRT_COUNT // Number of possibilities.
};
// enum for elements of arrays that refer to neighbours.
// NOTE: keep in this order, so ^2 can be used to flip direction.
enum BlobNeighbourDir { BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE, BND_COUNT };
// enum for special type of text characters, such as math symbol or italic.
enum BlobSpecialTextType {
BSTT_NONE, // No special.
BSTT_ITALIC, // Italic style.
BSTT_DIGIT, // Digit symbols.
BSTT_MATH, // Mathematical symbols (not including digit).
BSTT_UNCLEAR, // Characters with low recognition rate.
BSTT_SKIP, // Characters that we skip labeling (usually too small).
BSTT_COUNT
};
inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) {
return static_cast<BlobNeighbourDir>(dir ^ 2);
}
// BlobTextFlowType indicates the quality of neighbouring information
// related to a chain of connected components, either horizontally or
// vertically. Also used by ColPartition for the collection of blobs
// within, which should all have the same value in most cases.
enum BlobTextFlowType {
BTFT_NONE, // No text flow set yet.
BTFT_NONTEXT, // Flow too poor to be likely text.
BTFT_NEIGHBOURS, // Neighbours support flow in this direction.
BTFT_CHAIN, // There is a weak chain of text in this direction.
BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction.
BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
BTFT_LEADER, // Leader dots/dashes etc.
BTFT_COUNT
};
// Returns true if type1 dominates type2 in a merge. Mostly determined by the
// ordering of the enum, LEADER is weak and dominates nothing.
// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
// this cannot be true if t1 == t2, so the result is undefined.
inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
// LEADER always loses.
if (type1 == BTFT_LEADER) {
return false;
}
if (type2 == BTFT_LEADER) {
return true;
}
// With those out of the way, the ordering of the enum determines the result.
return type1 >= type2;
}
class ColPartition;
class BLOBNBOX;
ELISTIZEH(BLOBNBOX)
class BLOBNBOX : public ELIST_LINK {
public:
BLOBNBOX() {
ReInit();
}
explicit BLOBNBOX(C_BLOB *srcblob) {
box = srcblob->bounding_box();
ReInit();
cblob_ptr = srcblob;
area = static_cast<int>(srcblob->area());
}
~BLOBNBOX() {
if (owns_cblob_) {
delete cblob_ptr;
}
}
static void clear_blobnboxes(BLOBNBOX_LIST *boxes) {
BLOBNBOX_IT it = boxes;
// A BLOBNBOX generally doesn't own its blobs, so if they do, you
// have to delete them explicitly.
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX *box = it.data();
// TODO: remove next line, currently still needed for resultiterator_test.
delete box->remove_cblob();
}
}
static BLOBNBOX *RealBlob(C_OUTLINE *outline) {
auto *blob = new C_BLOB(outline);
return new BLOBNBOX(blob);
}
// Rotates the box and the underlying blob.
void rotate(FCOORD rotation);
// Methods that act on the box without touching the underlying blob.
// Reflect the box in the y-axis, leaving the underlying blob untouched.
void reflect_box_in_y_axis();
// Rotates the box by the angle given by rotation.
// If the blob is a diacritic, then only small rotations for skew
// correction can be applied.
void rotate_box(FCOORD rotation);
// Moves just the box by the given vector.
void translate_box(ICOORD v) {
if (IsDiacritic()) {
box.move(v);
base_char_top_ += v.y();
base_char_bottom_ += v.y();
} else {
box.move(v);
set_diacritic_box(box);
}
}
void merge(BLOBNBOX *nextblob);
void really_merge(BLOBNBOX *other);
void chop( // fake chop blob
BLOBNBOX_IT *start_it, // location of this
BLOBNBOX_IT *blob_it, // iterator
FCOORD rotation, // for landscape
float xheight); // line height
void NeighbourGaps(int gaps[BND_COUNT]) const;
void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const;
void CleanNeighbours();
// Returns positive if there is at least one side neighbour that has a
// similar stroke width and is not on the other side of a rule line.
int GoodTextBlob() const;
// Returns the number of side neighbours that are of type BRT_NOISE.
int NoisyNeighbours() const;
// Returns true if the blob is noise and has no owner.
bool DeletableNoise() const {
return owner() == nullptr && region_type() == BRT_NOISE;
}
// Returns true, and sets vert_possible/horz_possible if the blob has some
// feature that makes it individually appear to flow one way.
// eg if it has a high aspect ratio, yet has a complex shape, such as a
// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
bool DefiniteIndividualFlow();
// Returns true if there is no tabstop violation in merging this and other.
bool ConfirmNoTabViolation(const BLOBNBOX &other) const;
// Returns true if other has a similar stroke width to this.
bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance,
double constant_tolerance) const;
// Returns a bounding box of the outline contained within the
// given horizontal range.
TBOX BoundsWithinLimits(int left, int right);
// Estimates and stores the baseline position based on the shape of the
// outline.
void EstimateBaselinePosition();
// Simple accessors.
const TBOX &bounding_box() const {
return box;
}
// Set the bounding box. Use with caution.
// Normally use compute_bounding_box instead.
void set_bounding_box(const TBOX &new_box) {
box = new_box;
base_char_top_ = box.top();
base_char_bottom_ = box.bottom();
}
void compute_bounding_box() {
box = cblob_ptr->bounding_box();
base_char_top_ = box.top();
base_char_bottom_ = box.bottom();
baseline_y_ = box.bottom();
}
const TBOX &reduced_box() const {
return red_box;
}
void set_reduced_box(TBOX new_box) {
red_box = new_box;
reduced = true;
}
int32_t enclosed_area() const {
return area;
}
bool joined_to_prev() const {
return joined;
}
bool red_box_set() const {
return reduced;
}
int repeated_set() const {
return repeated_set_;
}
void set_repeated_set(int set_id) {
repeated_set_ = set_id;
}
C_BLOB *cblob() const {
return cblob_ptr;
}
C_BLOB *remove_cblob() {
auto blob = cblob_ptr;
cblob_ptr = nullptr;
owns_cblob_ = false;
return blob;
}
TabType left_tab_type() const {
return left_tab_type_;
}
void set_left_tab_type(TabType new_type) {
left_tab_type_ = new_type;
}
TabType right_tab_type() const {
return right_tab_type_;
}
void set_right_tab_type(TabType new_type) {
right_tab_type_ = new_type;
}
BlobRegionType region_type() const {
return region_type_;
}
void set_region_type(BlobRegionType new_type) {
region_type_ = new_type;
}
BlobSpecialTextType special_text_type() const {
return spt_type_;
}
void set_special_text_type(BlobSpecialTextType new_type) {
spt_type_ = new_type;
}
BlobTextFlowType flow() const {
return flow_;
}
void set_flow(BlobTextFlowType value) {
flow_ = value;
}
bool vert_possible() const {
return vert_possible_;
}
void set_vert_possible(bool value) {
vert_possible_ = value;
}
bool horz_possible() const {
return horz_possible_;
}
void set_horz_possible(bool value) {
horz_possible_ = value;
}
int left_rule() const {
return left_rule_;
}
void set_left_rule(int new_left) {
left_rule_ = new_left;
}
int right_rule() const {
return right_rule_;
}
void set_right_rule(int new_right) {
right_rule_ = new_right;
}
int left_crossing_rule() const {
return left_crossing_rule_;
}
void set_left_crossing_rule(int new_left) {
left_crossing_rule_ = new_left;
}
int right_crossing_rule() const {
return right_crossing_rule_;
}
void set_right_crossing_rule(int new_right) {
right_crossing_rule_ = new_right;
}
float horz_stroke_width() const {
return horz_stroke_width_;
}
void set_horz_stroke_width(float width) {
horz_stroke_width_ = width;
}
float vert_stroke_width() const {
return vert_stroke_width_;
}
void set_vert_stroke_width(float width) {
vert_stroke_width_ = width;
}
float area_stroke_width() const {
return area_stroke_width_;
}
tesseract::ColPartition *owner() const {
return owner_;
}
void set_owner(tesseract::ColPartition *new_owner) {
owner_ = new_owner;
}
bool leader_on_left() const {
return leader_on_left_;
}
void set_leader_on_left(bool flag) {
leader_on_left_ = flag;
}
bool leader_on_right() const {
return leader_on_right_;
}
void set_leader_on_right(bool flag) {
leader_on_right_ = flag;
}
BLOBNBOX *neighbour(BlobNeighbourDir n) const {
return neighbours_[n];
}
bool good_stroke_neighbour(BlobNeighbourDir n) const {
return good_stroke_neighbours_[n];
}
void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good) {
neighbours_[n] = neighbour;
good_stroke_neighbours_[n] = good;
}
bool IsDiacritic() const {
return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
}
int base_char_top() const {
return base_char_top_;
}
int base_char_bottom() const {
return base_char_bottom_;
}
int baseline_position() const {
return baseline_y_;
}
int line_crossings() const {
return line_crossings_;
}
void set_line_crossings(int value) {
line_crossings_ = value;
}
void set_diacritic_box(const TBOX &diacritic_box) {
base_char_top_ = diacritic_box.top();
base_char_bottom_ = diacritic_box.bottom();
}
BLOBNBOX *base_char_blob() const {
return base_char_blob_;
}
void set_base_char_blob(BLOBNBOX *blob) {
base_char_blob_ = blob;
}
void set_owns_cblob(bool value) {
owns_cblob_ = value;
}
bool UniquelyVertical() const {
return vert_possible_ && !horz_possible_;
}
bool UniquelyHorizontal() const {
return horz_possible_ && !vert_possible_;
}
// Returns true if the region type is text.
static bool IsTextType(BlobRegionType type) {
return type == BRT_TEXT || type == BRT_VERT_TEXT;
}
// Returns true if the region type is image.
static bool IsImageType(BlobRegionType type) {
return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
}
// Returns true if the region type is line.
static bool IsLineType(BlobRegionType type) {
return type == BRT_HLINE || type == BRT_VLINE;
}
// Returns true if the region type cannot be merged.
static bool UnMergeableType(BlobRegionType type) {
return IsLineType(type) || IsImageType(type);
}
// Helper to call CleanNeighbours on all blobs on the list.
static void CleanNeighbours(BLOBNBOX_LIST *blobs);
// Helper to delete all the deletable blobs on the list.
static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs);
// Helper to compute edge offsets for all the blobs on the list.
// See coutln.h for an explanation of edge offsets.
static void ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs);
#ifndef GRAPHICS_DISABLED
// Helper to draw all the blobs on the list in the given body_colour,
// with child outlines in the child_colour.
static void PlotBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
ScrollView::Color child_colour, ScrollView *win);
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
// given list in the given body_colour, with child outlines in the
// child_colour.
static void PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour,
ScrollView::Color child_colour, ScrollView *win);
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type);
// Keep in sync with BlobRegionType.
ScrollView::Color BoxColor() const;
void plot(ScrollView *window, // window to draw in
ScrollView::Color blob_colour, // for outer bits
ScrollView::Color child_colour); // for holes
#endif
// Initializes members set by StrokeWidth and beyond, without discarding
// stored area and strokewidth values, which are expensive to calculate.
void ReInit() {
joined = false;
reduced = false;
repeated_set_ = 0;
left_tab_type_ = TT_NONE;
right_tab_type_ = TT_NONE;
region_type_ = BRT_UNKNOWN;
flow_ = BTFT_NONE;
spt_type_ = BSTT_SKIP;
left_rule_ = 0;
right_rule_ = 0;
left_crossing_rule_ = 0;
right_crossing_rule_ = 0;
if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0) {
area_stroke_width_ = 2.0f * area / cblob()->perimeter();
}
owner_ = nullptr;
base_char_top_ = box.top();
base_char_bottom_ = box.bottom();
baseline_y_ = box.bottom();
line_crossings_ = 0;
base_char_blob_ = nullptr;
horz_possible_ = false;
vert_possible_ = false;
leader_on_left_ = false;
leader_on_right_ = false;
ClearNeighbours();
}
void ClearNeighbours() {
for (int n = 0; n < BND_COUNT; ++n) {
neighbours_[n] = nullptr;
good_stroke_neighbours_[n] = false;
}
}
private:
C_BLOB *cblob_ptr = nullptr; // edgestep blob
TBOX box; // bounding box
TBOX red_box; // bounding box
int32_t area = 0; // enclosed area
int32_t repeated_set_ = 0; // id of the set of repeated blobs
TabType left_tab_type_ = TT_NONE; // Indicates tab-stop assessment
TabType right_tab_type_ = TT_NONE; // Indicates tab-stop assessment
BlobRegionType region_type_ = BRT_UNKNOWN; // Type of region this blob belongs to
BlobTextFlowType flow_ = BTFT_NONE; // Quality of text flow.
BlobSpecialTextType spt_type_; // Special text type.
bool joined = false; // joined to prev
bool reduced = false; // reduced box set
int16_t left_rule_ = 0; // x-coord of nearest but not crossing rule line
int16_t right_rule_ = 0; // x-coord of nearest but not crossing rule line
int16_t left_crossing_rule_; // x-coord of nearest or crossing rule line
int16_t right_crossing_rule_; // x-coord of nearest or crossing rule line
int16_t base_char_top_; // y-coord of top/bottom of diacritic base,
int16_t base_char_bottom_; // if it exists else top/bottom of this blob.
int16_t baseline_y_; // Estimate of baseline position.
int32_t line_crossings_; // Number of line intersections touched.
BLOBNBOX *base_char_blob_; // The blob that was the base char.
tesseract::ColPartition *owner_; // Who will delete me when I am not needed
BLOBNBOX *neighbours_[BND_COUNT];
float horz_stroke_width_ = 0.0f; // Median horizontal stroke width
float vert_stroke_width_ = 0.0f; // Median vertical stroke width
float area_stroke_width_ = 0.0f; // Stroke width from area/perimeter ratio.
bool good_stroke_neighbours_[BND_COUNT];
bool horz_possible_; // Could be part of horizontal flow.
bool vert_possible_; // Could be part of vertical flow.
bool leader_on_left_; // There is a leader to the left.
bool leader_on_right_; // There is a leader to the right.
// Iff true, then the destructor should delete the cblob_ptr.
// TODO(rays) migrate all uses to correctly setting this flag instead of
// deleting the C_BLOB before deleting the BLOBNBOX.
bool owns_cblob_ = false;
};
class TO_ROW : public ELIST2_LINK {
public:
static const int kErrorWeight = 3;
TO_ROW() {
clear();
} // empty
TO_ROW( // constructor
BLOBNBOX *blob, // from first blob
float top, // of row //target height
float bottom, float row_size);
void print() const;
float max_y() const { // access function
return y_max;
}
float min_y() const {
return y_min;
}
float mean_y() const {
return (y_min + y_max) / 2.0f;
}
float initial_min_y() const {
return initial_y_min;
}
float line_m() const { // access to line fit
return m;
}
float line_c() const {
return c;
}
float line_error() const {
return error;
}
float parallel_c() const {
return para_c;
}
float parallel_error() const {
return para_error;
}
float believability() const { // baseline goodness
return credibility;
}
float intercept() const { // real parallel_c
return y_origin;
}
void add_blob( // put in row
BLOBNBOX *blob, // blob to add
float top, // of row //target height
float bottom, float row_size);
void insert_blob( // put in row in order
BLOBNBOX *blob);
BLOBNBOX_LIST *blob_list() { // get list
return &blobs;
}
void set_line( // set line spec
float new_m, // line to set
float new_c, float new_error) {
m = new_m;
c = new_c;
error = new_error;
}
void set_parallel_line( // set fixed gradient line
float gradient, // page gradient
float new_c, float new_error) {
para_c = new_c;
para_error = new_error;
credibility = blobs.length() - kErrorWeight * new_error;
y_origin = new_c / std::sqrt(1 + gradient * gradient);
// real intercept
}
void set_limits( // set min,max
float new_min, // bottom and
float new_max) { // top of row
y_min = new_min;
y_max = new_max;
}
void compute_vertical_projection();
// get projection
bool rep_chars_marked() const {
return num_repeated_sets_ != -1;
}
void clear_rep_chars_marked() {
num_repeated_sets_ = -1;
}
int num_repeated_sets() const {
return num_repeated_sets_;
}
void set_num_repeated_sets(int num_sets) {
num_repeated_sets_ = num_sets;
}
// true when dead
bool merged = false;
bool all_caps; // had no ascenders
bool used_dm_model; // in guessing pitch
int16_t projection_left; // start of projection
int16_t projection_right; // start of projection
PITCH_TYPE pitch_decision; // how strong is decision
float fixed_pitch; // pitch or 0
float fp_space; // sp if fixed pitch
float fp_nonsp; // nonsp if fixed pitch
float pr_space; // sp if prop
float pr_nonsp; // non sp if prop
float spacing; // to "next" row
float xheight; // of line
int xheight_evidence; // number of blobs of height xheight
float ascrise; // ascenders
float descdrop; // descenders
float body_size; // of CJK characters. Assumed to be
// xheight+ascrise for non-CJK text.
int32_t min_space; // min size for real space
int32_t max_nonspace; // max size of non-space
int32_t space_threshold; // space vs nonspace
float kern_size; // average non-space
float space_size; // average space
WERD_LIST rep_words; // repeated chars
ICOORDELT_LIST char_cells; // fixed pitch cells
QSPLINE baseline; // curved baseline
STATS projection; // vertical projection
private:
void clear(); // clear all values to reasonable defaults
BLOBNBOX_LIST blobs; // blobs in row
float y_min; // coords
float y_max;
float initial_y_min;
float m, c; // line spec
float error; // line error
float para_c; // constrained fit
float para_error;
float y_origin; // rotated para_c;
float credibility; // baseline believability
int num_repeated_sets_; // number of sets of repeated blobs
// set to -1 if we have not searched
// for repeated blobs in this row yet
};
ELIST2IZEH(TO_ROW)
class TESS_API TO_BLOCK : public ELIST_LINK {
public:
TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
clear();
} // empty
TO_BLOCK( // constructor
BLOCK *src_block); // real block
~TO_BLOCK();
void clear(); // clear all scalar members.
TO_ROW_LIST *get_rows() { // access function
return &row_list;
}
// Rotate all the blobnbox lists and the underlying block. Then update the
// median size statistic from the blobs list.
void rotate(const FCOORD &rotation) {
BLOBNBOX_LIST *blobnbox_list[] = {&blobs, &underlines, &noise_blobs,
&small_blobs, &large_blobs, nullptr};
for (BLOBNBOX_LIST **list = blobnbox_list; *list != nullptr; ++list) {
BLOBNBOX_IT it(*list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->rotate(rotation);
}
}
// Rotate the block
ASSERT_HOST(block->pdblk.poly_block() != nullptr);
block->rotate(rotation);
// Update the median size statistic from the blobs list.
STATS widths(0, block->pdblk.bounding_box().width());
STATS heights(0, block->pdblk.bounding_box().height());
BLOBNBOX_IT blob_it(&blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
widths.add(blob_it.data()->bounding_box().width(), 1);
heights.add(blob_it.data()->bounding_box().height(), 1);
}
block->set_median_size(static_cast<int>(widths.median() + 0.5),
static_cast<int>(heights.median() + 0.5));
}
void print_rows() { // debug info
TO_ROW_IT row_it = &row_list;
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
auto row = row_it.data();
tprintf("Row range (%g,%g), para_c=%g, blobcount=%" PRId32 "\n", row->min_y(), row->max_y(),
row->parallel_c(), row->blob_list()->length());
}
}
// Reorganizes the blob lists with a different definition of small, medium
// and large, compared to the original definition.
// Height is still the primary filter key, but medium width blobs of small
// height become medium, and very wide blobs of small height stay small.
void ReSetAndReFilterBlobs();
// Deletes noise blobs from all lists where not owned by a ColPartition.
void DeleteUnownedNoise();
// Computes and stores the edge offsets on each blob for use in feature
// extraction, using greyscale if the supplied grey and thresholds pixes
// are 8-bit or otherwise (if nullptr or not 8 bit) the original binary
// edge step outlines.
// Thresholds must either be the same size as grey or an integer down-scale
// of grey.
// See coutln.h for an explanation of edge offsets.
void ComputeEdgeOffsets(Image thresholds, Image grey);
#ifndef GRAPHICS_DISABLED
// Draw the noise blobs from all lists in red.
void plot_noise_blobs(ScrollView *to_win);
// Draw the blobs on on the various lists in the block in different colors.
void plot_graded_blobs(ScrollView *to_win);
#endif
BLOBNBOX_LIST blobs; // medium size
BLOBNBOX_LIST underlines; // underline blobs
BLOBNBOX_LIST noise_blobs; // very small
BLOBNBOX_LIST small_blobs; // fairly small
BLOBNBOX_LIST large_blobs; // big blobs
BLOCK *block; // real block
PITCH_TYPE pitch_decision; // how strong is decision
float line_spacing; // estimate
// line_size is a lower-bound estimate of the font size in pixels of
// the text in the block (with ascenders and descenders), being a small
// (1.25) multiple of the median height of filtered blobs.
// In most cases the font size will be bigger, but it will be closer
// if the text is allcaps, or in a no-x-height script.
float line_size; // estimate
float max_blob_size; // line assignment limit
float baseline_offset; // phase shift
float xheight; // median blob size
float fixed_pitch; // pitch or 0
float kern_size; // average non-space
float space_size; // average space
int32_t min_space; // min definite space
int32_t max_nonspace; // max definite
float fp_space; // sp if fixed pitch
float fp_nonsp; // nonsp if fixed pitch
float pr_space; // sp if prop
float pr_nonsp; // non sp if prop
TO_ROW *key_row; // starting row
private:
TO_ROW_LIST row_list; // temporary rows
};
ELISTIZEH(TO_BLOCK)
extern double_VAR_H(textord_error_weight, 3, "Weighting for error in believability");
void find_cblob_limits( // get y limits
C_BLOB *blob, // blob to search
float leftx, // x limits
float rightx,
FCOORD rotation, // for landscape
float &ymin, // output y limits
float &ymax);
void find_cblob_vlimits( // get y limits
C_BLOB *blob, // blob to search
float leftx, // x limits
float rightx,
float &ymin, // output y limits
float &ymax);
void find_cblob_hlimits( // get x limits
C_BLOB *blob, // blob to search
float bottomy, // y limits
float topy,
float &xmin, // output x limits
float &xymax);
C_BLOB *crotate_cblob( // rotate it
C_BLOB *blob, // blob to search
FCOORD rotation // for landscape
);
TBOX box_next( // get bounding box
BLOBNBOX_IT *it // iterator to blobds
);
TBOX box_next_pre_chopped( // get bounding box
BLOBNBOX_IT *it // iterator to blobds
);
void vertical_cblob_projection( // project outlines
C_BLOB *blob, // blob to project
STATS *stats // output
);
void vertical_coutline_projection( // project outlines
C_OUTLINE *outline, // outline to project
STATS *stats // output
);
#ifndef GRAPHICS_DISABLED
void plot_blob_list(ScrollView *win, // window to draw in
BLOBNBOX_LIST *list, // blob list
ScrollView::Color body_colour, // colour to draw
ScrollView::Color child_colour); // colour of child
#endif // !GRAPHICS_DISABLED
} // namespace tesseract
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,476 @@
/******************************************************************************
*
* File: blobs.h
* Description: Blob definition
* Author: Mark Seaman, OCR Technology
*
* (c) Copyright 1989, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
#ifndef BLOBS_H
#define BLOBS_H
#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH
#include "normalis.h" // for DENORM
#include "points.h" // for FCOORD, ICOORD
#include "rect.h" // for TBOX
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include <tesseract/publictypes.h> // for OcrEngineMode
#include <cstdint> // for int16_t
struct Pix;
namespace tesseract {
class BLOCK;
class C_BLOB;
class C_OUTLINE;
class LLSQ;
class ROW;
class WERD;
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
struct TPOINT {
TPOINT() : x(0), y(0) {}
TPOINT(int16_t vx, int16_t vy) : x(vx), y(vy) {}
TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {}
void operator+=(const TPOINT &other) {
x += other.x;
y += other.y;
}
void operator/=(int divisor) {
x /= divisor;
y /= divisor;
}
bool operator==(const TPOINT &other) const {
return x == other.x && y == other.y;
}
// Returns true when the two line segments cross each other.
// (Moved from outlines.cpp).
static bool IsCrossed(const TPOINT &a0, const TPOINT &a1, const TPOINT &b0, const TPOINT &b1);
// Assign the difference from point p1 to point p2.
void diff(const TPOINT &p1, const TPOINT &p2) {
x = p1.x - p2.x;
y = p1.y - p2.y;
}
// Return cross product.
int cross(const TPOINT &other) const {
return x * other.y - y * other.x;
}
// Return scalar or dot product.
int dot(const TPOINT &other) const {
return x * other.x + y * other.y;
}
// Calculate length of vector.
int length() const {
return x * x + y * y;
}
int16_t x; // absolute x coord.
int16_t y; // absolute y coord.
};
using VECTOR = TPOINT; // structure for coordinates.
struct EDGEPT {
EDGEPT() = default;
EDGEPT(const EDGEPT &src) : next(nullptr), prev(nullptr) {
CopyFrom(src);
}
EDGEPT &operator=(const EDGEPT &src) {
CopyFrom(src);
return *this;
}
// Copies the data elements, but leaves the pointers untouched.
void CopyFrom(const EDGEPT &src) {
pos = src.pos;
vec = src.vec;
is_hidden = src.is_hidden;
runlength = src.runlength;
dir = src.dir;
fixed = src.fixed;
src_outline = src.src_outline;
start_step = src.start_step;
step_count = src.step_count;
}
// Returns the squared distance between the points, with the x-component
// weighted by x_factor.
int WeightedDistance(const EDGEPT &other, int x_factor) const {
int x_dist = pos.x - other.pos.x;
int y_dist = pos.y - other.pos.y;
return x_dist * x_dist * x_factor + y_dist * y_dist;
}
// Returns true if the positions are equal.
bool EqualPos(const EDGEPT &other) const {
return pos == other.pos;
}
// Returns the bounding box of the outline segment from *this to *end.
// Ignores hidden edge flags.
TBOX SegmentBox(const EDGEPT *end) const {
TBOX box(pos.x, pos.y, pos.x, pos.y);
const EDGEPT *pt = this;
do {
pt = pt->next;
if (pt->pos.x < box.left()) {
box.set_left(pt->pos.x);
}
if (pt->pos.x > box.right()) {
box.set_right(pt->pos.x);
}
if (pt->pos.y < box.bottom()) {
box.set_bottom(pt->pos.y);
}
if (pt->pos.y > box.top()) {
box.set_top(pt->pos.y);
}
} while (pt != end && pt != this);
return box;
}
// Returns the area of the outline segment from *this to *end.
// Ignores hidden edge flags.
int SegmentArea(const EDGEPT *end) const {
int area = 0;
const EDGEPT *pt = this->next;
do {
TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
area += origin_vec.cross(pt->vec);
pt = pt->next;
} while (pt != end && pt != this);
return area;
}
// Returns true if the number of points in the outline segment from *this to
// *end is less that min_points and false if we get back to *this first.
// Ignores hidden edge flags.
bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const {
int count = 0;
const EDGEPT *pt = this;
do {
if (pt == end) {
return true;
}
pt = pt->next;
++count;
} while (pt != this && count <= min_points);
return false;
}
// Accessors to hide or reveal a cut edge from feature extractors.
void Hide() {
is_hidden = true;
}
void Reveal() {
is_hidden = false;
}
bool IsHidden() const {
return is_hidden;
}
void MarkChop() {
dir = 1;
}
bool IsChopPt() const {
return dir != 0;
}
TPOINT pos; // position
VECTOR vec; // vector to next point
bool is_hidden = false;
uint8_t runlength = 0;
int8_t dir = 0;
int8_t fixed = 0;
EDGEPT *next = nullptr; // anticlockwise element
EDGEPT *prev = nullptr; // clockwise element
C_OUTLINE *src_outline = nullptr; // Outline it came from.
// The following fields are not used if src_outline is nullptr.
int start_step = 0; // Location of pos in src_outline.
int step_count = 0; // Number of steps used (may wrap around).
};
// For use in chop and findseam to keep a list of which EDGEPTs were inserted.
CLISTIZEH(EDGEPT)
struct TESSLINE {
TESSLINE() : is_hole(false), loop(nullptr), next(nullptr) {}
TESSLINE(const TESSLINE &src) : loop(nullptr), next(nullptr) {
CopyFrom(src);
}
~TESSLINE() {
Clear();
}
TESSLINE &operator=(const TESSLINE &src) {
CopyFrom(src);
return *this;
}
// Consume the circular list of EDGEPTs to make a TESSLINE.
static TESSLINE *BuildFromOutlineList(EDGEPT *outline);
// Copies the data and the outline, but leaves next untouched.
void CopyFrom(const TESSLINE &src);
// Deletes owned data.
void Clear();
// Normalize in-place using the DENORM.
void Normalize(const DENORM &denorm);
// Rotates by the given rotation in place.
void Rotate(const FCOORD rotation);
// Moves by the given vec in place.
void Move(const ICOORD vec);
// Scales by the given factor in place.
void Scale(float factor);
// Sets up the start and vec members of the loop from the pos members.
void SetupFromPos();
// Recomputes the bounding box from the points in the loop.
void ComputeBoundingBox();
// Computes the min and max cross product of the outline points with the
// given vec and returns the results in min_xp and max_xp. Geometrically
// this is the left and right edge of the outline perpendicular to the
// given direction, but to get the distance units correct, you would
// have to divide by the modulus of vec.
void MinMaxCrossProduct(const TPOINT vec, int *min_xp, int *max_xp) const;
TBOX bounding_box() const;
// Returns true if *this and other have equal bounding boxes.
bool SameBox(const TESSLINE &other) const {
return topleft == other.topleft && botright == other.botright;
}
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrosses(const TPOINT &pt1, const TPOINT &pt2) const {
if (Contains(pt1) && Contains(pt2)) {
EDGEPT *pt = loop;
do {
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) {
return true;
}
pt = pt->next;
} while (pt != loop);
}
return false;
}
// Returns true if the point is contained within the outline box.
bool Contains(const TPOINT &pt) const {
return topleft.x <= pt.x && pt.x <= botright.x && botright.y <= pt.y && pt.y <= topleft.y;
}
#ifndef GRAPHICS_DISABLED
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color);
#endif // !GRAPHICS_DISABLED
// Returns the first outline point that has a different src_outline to its
// predecessor, or, if all the same, the lowest indexed point.
EDGEPT *FindBestStartPt() const;
int BBArea() const {
return (botright.x - topleft.x) * (topleft.y - botright.y);
}
TPOINT topleft; // Top left of loop.
TPOINT botright; // Bottom right of loop.
TPOINT start; // Start of loop.
bool is_hole; // True if this is a hole/child outline.
EDGEPT *loop; // Edgeloop.
TESSLINE *next; // Next outline in blob.
}; // Outline structure.
struct TBLOB {
TBLOB() : outlines(nullptr) {}
TBLOB(const TBLOB &src) : outlines(nullptr) {
CopyFrom(src);
}
~TBLOB() {
Clear();
}
TBLOB &operator=(const TBLOB &src) {
CopyFrom(src);
return *this;
}
// Factory to build a TBLOB from a C_BLOB with polygonal approximation along
// the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
// contain pointers to the input C_OUTLINEs that enable higher-resolution
// feature extraction that does not use the polygonal approximation.
static TBLOB *PolygonalCopy(bool allow_detailed_fx, C_BLOB *src);
// Factory builds a blob with no outlines, but copies the other member data.
static TBLOB *ShallowCopy(const TBLOB &src);
// Normalizes the blob for classification only if needed.
// (Normally this means a non-zero classify rotation.)
// If no Normalization is needed, then nullptr is returned, and the input blob
// can be used directly. Otherwise a new TBLOB is returned which must be
// deleted after use.
TBLOB *ClassifyNormalizeIfNeeded() const;
// Copies the data and the outlines, but leaves next untouched.
void CopyFrom(const TBLOB &src);
// Deletes owned data.
void Clear();
// Sets up the built-in DENORM and normalizes the blob in-place.
// For parameters see DENORM::SetupNormalization, plus the inverse flag for
// this blob and the Pix for the full image.
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift,
float final_yshift, bool inverse, Image pix);
// Rotates by the given rotation in place.
void Rotate(const FCOORD rotation);
// Moves by the given vec in place.
void Move(const ICOORD vec);
// Scales by the given factor in place.
void Scale(float factor);
// Recomputes the bounding boxes of the outlines.
void ComputeBoundingBoxes();
// Returns the number of outlines.
int NumOutlines() const;
TBOX bounding_box() const;
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const {
for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
if (outline->SegmentCrosses(pt1, pt2)) {
return true;
}
}
return false;
}
// Returns true if the point is contained within any of the outline boxes.
bool Contains(const TPOINT &pt) const {
for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
if (outline->Contains(pt)) {
return true;
}
}
return false;
}
// Finds and deletes any duplicate outlines in this blob, without deleting
// their EDGEPTs.
void EliminateDuplicateOutlines();
// Swaps the outlines of *this and next if needed to keep the centers in
// increasing x.
void CorrectBlobOrder(TBLOB *next);
const DENORM &denorm() const {
return denorm_;
}
#ifndef GRAPHICS_DISABLED
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color);
#endif // !GRAPHICS_DISABLED
int BBArea() const {
int total_area = 0;
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
total_area += outline->BBArea();
}
return total_area;
}
// Computes the center of mass and second moments for the old baseline and
// 2nd moment normalizations. Returns the outline length.
// The input denorm should be the normalizations that have been applied from
// the image to the current state of this TBLOB.
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const;
// Computes the precise bounding box of the coords that are generated by
// GetEdgeCoords. This may be different from the bounding box of the polygon.
void GetPreciseBoundingBox(TBOX *precise_box) const;
// Adds edges to the given vectors.
// For all the edge steps in all the outlines, or polygonal approximation
// where there are no edge steps, collects the steps into x_coords/y_coords.
// x_coords is a collection of the x-coords of vertical edges for each
// y-coord starting at box.bottom().
// y_coords is a collection of the y-coords of horizontal edges for each
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
void GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
std::vector<std::vector<int>> &y_coords) const;
TESSLINE *outlines; // List of outlines in blob.
private: // TODO(rays) Someday the data members will be private too.
// For all the edge steps in all the outlines, or polygonal approximation
// where there are no edge steps, collects the steps into the bounding_box,
// llsq and/or the x_coords/y_coords. Both are used in different kinds of
// normalization.
// For a description of x_coords, y_coords, see GetEdgeCoords above.
void CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
std::vector<std::vector<int>> *x_coords,
std::vector<std::vector<int>> *y_coords) const;
private:
// DENORM indicating the transformations that this blob has undergone so far.
DENORM denorm_;
}; // Blob structure.
struct TWERD {
TWERD() : latin_script(false) {}
TWERD(const TWERD &src) {
CopyFrom(src);
}
~TWERD() {
Clear();
}
TWERD &operator=(const TWERD &src) {
CopyFrom(src);
return *this;
}
// Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
// approximation along the way.
static TWERD *PolygonalCopy(bool allow_detailed_fx, WERD *src);
// Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs.
void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
const TBOX *norm_box, DENORM *word_denorm);
// Copies the data and the blobs, but leaves next untouched.
void CopyFrom(const TWERD &src);
// Deletes owned data.
void Clear();
// Recomputes the bounding boxes of the blobs.
void ComputeBoundingBoxes();
// Returns the number of blobs in the word.
int NumBlobs() const {
return blobs.size();
}
TBOX bounding_box() const;
// Merges the blobs from start to end, not including end, and deletes
// the blobs between start and end.
void MergeBlobs(int start, int end);
#ifndef GRAPHICS_DISABLED
void plot(ScrollView *window);
#endif // !GRAPHICS_DISABLED
std::vector<TBLOB *> blobs; // Blobs in word.
bool latin_script; // This word is in a latin-based script.
};
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location);
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location);
} // namespace tesseract
#endif

View File

@ -0,0 +1,74 @@
/**********************************************************************
* File: blread.cpp (Formerly pdread.c)
* Description: Friend function of BLOCK to read the uscan pd file.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "blread.h"
#include "ocrblock.h" // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only)
#include "scanutils.h" // for tfscanf
#include <cstdio> // for fclose, fopen, FILE
namespace tesseract {
#define UNLV_EXT ".uzn" // unlv zone file
/**********************************************************************
* read_unlv_file
*
* Read a whole unlv zone file to make a list of blocks.
**********************************************************************/
bool read_unlv_file( // print list of sides
std::string &name, // basename of file
int32_t xsize, // image size
int32_t ysize, // image size
BLOCK_LIST *blocks // output list
) {
FILE *pdfp; // file pointer
BLOCK *block; // current block
int x; // current top-down coords
int y;
int width; // of current block
int height;
BLOCK_IT block_it = blocks; // block iterator
name += UNLV_EXT; // add extension
if ((pdfp = fopen(name.c_str(), "rb")) == nullptr) {
return false; // didn't read one
} else {
while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
// make rect block
block = new BLOCK(name.c_str(), true, 0, 0, static_cast<int16_t>(x),
static_cast<int16_t>(ysize - y - height), static_cast<int16_t>(x + width),
static_cast<int16_t>(ysize - y));
// on end of list
block_it.add_to_end(block);
}
fclose(pdfp);
}
tprintf("UZN file %s loaded.\n", name.c_str());
return true;
}
void FullPageBlock(int width, int height, BLOCK_LIST *blocks) {
BLOCK_IT block_it(blocks);
auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
block_it.add_to_end(block);
}
} // namespace tesseract

View File

@ -0,0 +1,40 @@
/**********************************************************************
* File: blread.h (Formerly pdread.h)
* Description: Friend function of BLOCK to read the uscan pd file.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLREAD_H
#define BLREAD_H
#include <cstdint> // for int32_t
#include <string> // for std::string
namespace tesseract {
class BLOCK_LIST;
bool read_unlv_file( // print list of sides
std::string &name, // basename of file
int32_t xsize, // image size
int32_t ysize, // image size
BLOCK_LIST *blocks // output list
);
void FullPageBlock(int width, int height, BLOCK_LIST *blocks);
} // namespace tesseract
#endif

View File

@ -0,0 +1,282 @@
/**********************************************************************
* File: boxread.cpp
* Description: Read data from a box file.
* Author: Ray Smith
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "boxread.h"
#include "errcode.h" // for ERRCODE, TESSEXIT
#include "fileerr.h" // for CANTOPENFILE
#include "rect.h" // for TBOX
#include "tprintf.h" // for tprintf
#include <tesseract/unichar.h> // for UNICHAR
#include "helpers.h" // for chomp_string
#include <climits> // for INT_MAX
#include <cstring> // for strchr, strcmp
#include <fstream> // for std::ifstream
#include <locale> // for std::locale::classic
#include <sstream> // for std::stringstream
#include <string> // for std::string
namespace tesseract {
// Special char code used to identify multi-blob labels.
static const char *kMultiBlobLabelCode = "WordStr";
// Returns the box file name corresponding to the given image_filename.
static std::string BoxFileName(const char *image_filename) {
std::string box_filename = image_filename;
size_t length = box_filename.length();
std::string last = (length > 8) ? box_filename.substr(length - 8) : "";
if (last == ".bin.png" || last == ".nrm.png") {
box_filename.resize(length - 8);
} else {
size_t lastdot = box_filename.find_last_of('.');
if (lastdot < length) {
box_filename.resize(lastdot);
}
}
box_filename += ".box";
return box_filename;
}
// Open the boxfile based on the given image filename.
FILE *OpenBoxFile(const char *fname) {
std::string filename = BoxFileName(fname);
FILE *box_file = nullptr;
if (!(box_file = fopen(filename.c_str(), "rb"))) {
CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.c_str());
}
return box_file;
}
// Reads all boxes from the given filename.
// Reads a specific target_page number if >= 0, or all pages otherwise.
// Skips blanks if skip_blanks is true.
// The UTF-8 label of the box is put in texts, and the full box definition as
// a string is put in box_texts, with the corresponding page number in pages.
// Each of the output vectors is optional (may be nullptr).
// Returns false if no boxes are found.
bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector<TBOX> *boxes,
std::vector<std::string> *texts, std::vector<std::string> *box_texts,
std::vector<int> *pages) {
std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
if (box_data.empty()) {
return false;
}
// Convert the array of bytes to a string, so it can be used by the parser.
box_data.push_back('\0');
return ReadMemBoxes(target_page, skip_blanks, &box_data[0],
/*continue_on_failure*/ true, boxes, texts, box_texts, pages);
}
// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure,
std::vector<TBOX> *boxes, std::vector<std::string> *texts,
std::vector<std::string> *box_texts, std::vector<int> *pages) {
std::string box_str(box_data);
std::vector<std::string> lines = split(box_str, '\n');
if (lines.empty()) {
return false;
}
int num_boxes = 0;
for (auto &line : lines) {
int page = 0;
std::string utf8_str;
TBOX box;
if (!ParseBoxFileStr(line.c_str(), &page, utf8_str, &box)) {
if (continue_on_failure) {
continue;
} else {
return false;
}
}
if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) {
continue;
}
if (target_page >= 0 && page != target_page) {
continue;
}
if (boxes != nullptr) {
boxes->push_back(box);
}
if (texts != nullptr) {
texts->push_back(utf8_str);
}
if (box_texts != nullptr) {
std::string full_text;
MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
box_texts->push_back(full_text);
}
if (pages != nullptr) {
pages->push_back(page);
}
++num_boxes;
}
return num_boxes > 0;
}
// TODO(rays) convert all uses of ReadNextBox to use the new ReadAllBoxes.
// Box files are used ONLY DURING TRAINING, but by both processes of
// creating tr files with tesseract, and unicharset_extractor.
// ReadNextBox factors out the code to interpret a line of a box
// file so that applybox and unicharset_extractor interpret the same way.
// This function returns the next valid box file utf8 string and coords
// and returns true, or false on eof (and closes the file).
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
// for valid utf-8 and allows space or tab between fields.
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box) {
return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
}
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
TBOX *bounding_box) {
int page = 0;
char buff[kBoxReadBufSize]; // boxfile read buffer
char *buffptr = buff;
while (fgets(buff, sizeof(buff) - 1, box_file)) {
(*line_number)++;
buffptr = buff;
const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
buffptr += 3; // Skip unicode file designation.
}
// Check for blank lines in box file
if (*buffptr == '\n' || *buffptr == '\0') {
continue;
}
// Skip blank boxes.
if (*buffptr == ' ' || *buffptr == '\t') {
continue;
}
if (*buffptr != '\0') {
if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
tprintf("Box file format error on line %i; ignored\n", *line_number);
continue;
}
if (target_page >= 0 && target_page != page) {
continue; // Not on the appropriate page.
}
return true; // Successfully read a box.
}
}
fclose(box_file);
return false; // EOF
}
// Parses the given box file string into a page_number, utf8_str, and
// bounding_box. Returns true on a successful parse.
// The box file is assumed to contain box definitions, one per line, of the
// following format for blob-level boxes:
// <UTF8 str> <left> <bottom> <right> <top> <page id>
// and for word/line-level boxes:
// WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
// See applyybox.cpp for more information.
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
TBOX *bounding_box) {
*bounding_box = TBOX(); // Initialize it to empty.
utf8_str = "";
char uch[kBoxReadBufSize];
const char *buffptr = boxfile_str;
// Read the unichar without messing up on Tibetan.
// According to issue 253 the utf-8 surrogates 85 and A0 are treated
// as whitespace by sscanf, so it is more reliable to just find
// ascii space and tab.
int uch_len = 0;
// Skip unicode file designation, if present.
const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
buffptr += 3;
}
// Allow a single blank as the UTF-8 string. Check for empty string and
// then blindly eat the first character.
if (*buffptr == '\0') {
return false;
}
do {
uch[uch_len++] = *buffptr++;
} while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
uch_len < kBoxReadBufSize - 1);
uch[uch_len] = '\0';
if (*buffptr != '\0') {
++buffptr;
}
int x_min = INT_MAX;
int y_min = INT_MAX;
int x_max = INT_MIN;
int y_max = INT_MIN;
*page_number = 0;
std::stringstream stream(buffptr);
stream.imbue(std::locale::classic());
stream >> x_min;
stream >> y_min;
stream >> x_max;
stream >> y_max;
stream >> *page_number;
if (x_max < x_min || y_max < y_min) {
tprintf("Bad box coordinates in boxfile string! %s\n", ubuf);
return false;
}
// Test for long space-delimited string label.
if (strcmp(uch, kMultiBlobLabelCode) == 0 && (buffptr = strchr(buffptr, '#')) != nullptr) {
strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
chomp_string(uch);
uch_len = strlen(uch);
}
// Validate UTF8 by making unichars with it.
int used = 0;
while (used < uch_len) {
tesseract::UNICHAR ch(uch + used, uch_len - used);
int new_used = ch.utf8_len();
if (new_used == 0) {
tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n", uch + used, uch[used], used + 1);
return false;
}
used += new_used;
}
utf8_str = uch;
if (x_min > x_max) {
std::swap(x_min, x_max);
}
if (y_min > y_max) {
std::swap(y_min, y_max);
}
bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
return true; // Successfully read a box.
}
// Creates a box file string from a unichar string, TBOX and page number.
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str) {
box_str = unichar_str;
box_str += " " + std::to_string(box.left());
box_str += " " + std::to_string(box.bottom());
box_str += " " + std::to_string(box.right());
box_str += " " + std::to_string(box.top());
box_str += " " + std::to_string(page_num);
}
} // namespace tesseract

View File

@ -0,0 +1,89 @@
/**********************************************************************
* File: boxread.h
* Description: Read data from a box file.
* Author: Ray Smith
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCUTIL_BOXREAD_H_
#define TESSERACT_CCUTIL_BOXREAD_H_
#include <cstdio> // for FILE
#include <string> // for std::string
#include <vector> // for std::vector
#include <tesseract/export.h> // for TESS_API
namespace tesseract {
class TBOX;
// Size of buffer used to read a line from a box file.
const int kBoxReadBufSize = 1024;
// Open the boxfile based on the given image filename.
// Returns nullptr if the box file cannot be opened.
TESS_API
FILE *OpenBoxFile(const char *filename);
// Reads all boxes from the given filename.
// Reads a specific target_page number if >= 0, or all pages otherwise.
// Skips blanks if skip_blanks is true.
// The UTF-8 label of the box is put in texts, and the full box definition as
// a string is put in box_texts, with the corresponding page number in pages.
// Each of the output vectors is optional (may be nullptr).
// Returns false if no boxes are found.
bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector<TBOX> *boxes,
std::vector<std::string> *texts, std::vector<std::string> *box_texts,
std::vector<int> *pages);
// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
// continue_on_failure allows reading to continue even if an invalid box is
// encountered and will return true if it succeeds in reading some boxes.
// It otherwise gives up and returns false on encountering an invalid box.
TESS_API
bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure,
std::vector<TBOX> *boxes, std::vector<std::string> *texts,
std::vector<std::string> *box_texts, std::vector<int> *pages);
// ReadNextBox factors out the code to interpret a line of a box
// file so that applybox and unicharset_extractor interpret the same way.
// This function returns the next valid box file utf8 string and coords
// and returns true, or false on eof (and closes the file).
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
// for valid utf-8 and allows space or tab between fields.
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
TESS_API
bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box);
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
TESS_API
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
TBOX *bounding_box);
// Parses the given box file string into a page_number, utf8_str, and
// bounding_box. Returns true on a successful parse.
TESS_API
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
TBOX *bounding_box);
// Creates a box file string from a unichar string, TBOX and page number.
TESS_API
void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str);
} // namespace tesseract
#endif // TESSERACT_CCUTIL_BOXREAD_H_

View File

@ -0,0 +1,205 @@
///////////////////////////////////////////////////////////////////////
// File: boxword.cpp
// Description: Class to represent the bounding boxes of the output.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "boxword.h"
#include "blobs.h"
#include "host.h" // for NearlyEqual
#include "normalis.h"
#include "ocrblock.h"
#include "pageres.h"
namespace tesseract {
// Clip output boxes to input blob boxes for bounds that are within this
// tolerance. Otherwise, the blob may be chopped and we have to just use
// the word bounding box.
const int kBoxClipTolerance = 2;
BoxWord::BoxWord() : length_(0) {}
BoxWord::BoxWord(const BoxWord &src) {
CopyFrom(src);
}
BoxWord &BoxWord::operator=(const BoxWord &src) {
CopyFrom(src);
return *this;
}
void BoxWord::CopyFrom(const BoxWord &src) {
bbox_ = src.bbox_;
length_ = src.length_;
boxes_.clear();
boxes_.reserve(length_);
for (int i = 0; i < length_; ++i) {
boxes_.push_back(src.boxes_[i]);
}
}
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
// switch back to original image coordinates.
BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) {
auto *boxword = new BoxWord();
// Count the blobs.
boxword->length_ = tessword->NumBlobs();
// Allocate memory.
boxword->boxes_.reserve(boxword->length_);
for (int b = 0; b < boxword->length_; ++b) {
TBLOB *tblob = tessword->blobs[b];
TBOX blob_box;
for (TESSLINE *outline = tblob->outlines; outline != nullptr; outline = outline->next) {
EDGEPT *edgept = outline->loop;
// Iterate over the edges.
do {
if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
ICOORD pos(edgept->pos.x, edgept->pos.y);
TPOINT denormed;
tblob->denorm().DenormTransform(nullptr, edgept->pos, &denormed);
pos.set_x(denormed.x);
pos.set_y(denormed.y);
TBOX pt_box(pos, pos);
blob_box += pt_box;
}
edgept = edgept->next;
} while (edgept != outline->loop);
}
boxword->boxes_.push_back(blob_box);
}
boxword->ComputeBoundingBox();
return boxword;
}
// Clean up the bounding boxes from the polygonal approximation by
// expanding slightly, then clipping to the blobs from the original_word
// that overlap. If not null, the block provides the inverse rotation.
void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
for (int i = 0; i < length_; ++i) {
TBOX box = boxes_[i];
// Expand by a single pixel, as the poly approximation error is 1 pixel.
box = TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
// Now find the original box that matches.
TBOX original_box;
C_BLOB_IT b_it(original_word->cblob_list());
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX blob_box = b_it.data()->bounding_box();
if (block != nullptr) {
blob_box.rotate(block->re_rotation());
}
if (blob_box.major_overlap(box)) {
original_box += blob_box;
}
}
if (!original_box.null_box()) {
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance)) {
box.set_left(original_box.left());
}
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance)) {
box.set_right(original_box.right());
}
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
box.set_top(original_box.top());
}
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance)) {
box.set_bottom(original_box.bottom());
}
}
original_box = original_word->bounding_box();
if (block != nullptr) {
original_box.rotate(block->re_rotation());
}
boxes_[i] = box.intersection(original_box);
}
ComputeBoundingBox();
}
// Merges the boxes from start to end, not including end, and deletes
// the boxes between start and end.
void BoxWord::MergeBoxes(int start, int end) {
start = ClipToRange(start, 0, length_);
end = ClipToRange(end, 0, length_);
if (end <= start + 1) {
return;
}
for (int i = start + 1; i < end; ++i) {
boxes_[start] += boxes_[i];
}
int shrinkage = end - 1 - start;
length_ -= shrinkage;
for (int i = start + 1; i < length_; ++i) {
boxes_[i] = boxes_[i + shrinkage];
}
boxes_.resize(length_);
}
// Inserts a new box before the given index.
// Recomputes the bounding box.
void BoxWord::InsertBox(int index, const TBOX &box) {
if (index < length_) {
boxes_.insert(boxes_.begin() + index, box);
} else {
boxes_.push_back(box);
}
length_ = boxes_.size();
ComputeBoundingBox();
}
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void BoxWord::ChangeBox(int index, const TBOX &box) {
boxes_[index] = box;
ComputeBoundingBox();
}
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void BoxWord::DeleteBox(int index) {
ASSERT_HOST(0 <= index && index < length_);
boxes_.erase(boxes_.begin() + index);
--length_;
ComputeBoundingBox();
}
// Deletes all the boxes stored in BoxWord.
void BoxWord::DeleteAllBoxes() {
length_ = 0;
boxes_.clear();
bbox_ = TBOX();
}
// Computes the bounding box of the word.
void BoxWord::ComputeBoundingBox() {
bbox_ = TBOX();
for (int i = 0; i < length_; ++i) {
bbox_ += boxes_[i];
}
}
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.
void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const {
for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
TBOX blob_box = other.blobs[i]->bounding_box();
if (blob_box == boxes_[i]) {
cb(i);
}
}
}
} // namespace tesseract.

View File

@ -0,0 +1,97 @@
///////////////////////////////////////////////////////////////////////
// File: boxword.h
// Description: Class to represent the bounding boxes of the output.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
#define TESSERACT_CSTRUCT_BOXWORD_H_
#include "rect.h" // for TBOX
#include <functional> // for std::function
namespace tesseract {
class BLOCK;
class WERD;
struct TWERD;
// Class to hold an array of bounding boxes for an output word and
// the bounding box of the whole word.
class BoxWord {
public:
BoxWord();
explicit BoxWord(const BoxWord &src);
~BoxWord() = default;
BoxWord &operator=(const BoxWord &src);
void CopyFrom(const BoxWord &src);
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
// switch back to original image coordinates.
static BoxWord *CopyFromNormalized(TWERD *tessword);
// Clean up the bounding boxes from the polygonal approximation by
// expanding slightly, then clipping to the blobs from the original_word
// that overlap. If not null, the block provides the inverse rotation.
void ClipToOriginalWord(const BLOCK *block, WERD *original_word);
// Merges the boxes from start to end, not including end, and deletes
// the boxes between start and end.
void MergeBoxes(int start, int end);
// Inserts a new box before the given index.
// Recomputes the bounding box.
void InsertBox(int index, const TBOX &box);
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void ChangeBox(int index, const TBOX &box);
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void DeleteBox(int index);
// Deletes all the boxes stored in BoxWord.
void DeleteAllBoxes();
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.
void ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const;
const TBOX &bounding_box() const {
return bbox_;
}
int length() const {
return length_;
}
const TBOX &BlobBox(int index) const {
return boxes_[index];
}
private:
void ComputeBoundingBox();
TBOX bbox_;
int length_;
std::vector<TBOX> boxes_;
};
} // namespace tesseract.
#endif // TESSERACT_CSTRUCT_BOXWORD_H_

View File

@ -0,0 +1,36 @@
///////////////////////////////////////////////////////////////////////
// File: ccstruct.cpp
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ccstruct.h"
namespace tesseract {
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
const double CCStruct::kDescenderFraction = 0.25;
const double CCStruct::kXHeightFraction = 0.5;
const double CCStruct::kAscenderFraction = 0.25;
const double CCStruct::kXHeightCapRatio =
CCStruct::kXHeightFraction / (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
// Destructor.
// It is defined here, so the compiler can create a single vtable
// instead of weak vtables in every compilation unit.
CCStruct::~CCStruct() = default;
} // namespace tesseract

View File

@ -0,0 +1,41 @@
///////////////////////////////////////////////////////////////////////
// File: ccstruct.h
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
#include "ccutil.h" // for CCUtil
namespace tesseract {
class TESS_API CCStruct : public CCUtil {
public:
CCStruct() = default;
~CCStruct() override;
// Globally accessible constants.
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
static const double kDescenderFraction; // = 0.25;
static const double kXHeightFraction; // = 0.5;
static const double kAscenderFraction; // = 0.25;
// Derived value giving the x-height as a fraction of cap-height.
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,297 @@
/**********************************************************************
* File: coutln.h
* Description: Code for the C_OUTLINE class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef COUTLN_H
#define COUTLN_H
#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
#include "mod128.h" // for DIR128, DIRBITS
#include "points.h" // for ICOORD, FCOORD
#include "rect.h" // for TBOX
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include <tesseract/export.h> // for DLLSYM
#include <cstdint> // for int16_t, int32_t
#include <bitset> // for std::bitset<16>
struct Pix;
namespace tesseract {
class CRACKEDGE;
class DENORM;
#define INTERSECTING INT16_MAX // no winding number
// mask to get step
#define STEP_MASK 3
enum C_OUTLINE_FLAGS {
COUT_INVERSE // White on black blob
};
// Simple struct to hold the 3 values needed to compute a more precise edge
// position and direction. The offset_numerator is the difference between the
// grey threshold and the mean pixel value. pixel_diff is the difference between
// the pixels in the edge. Consider the following row of pixels: p1 p2 p3 p4 p5
// Say the image was thresholded at threshold t, making p1, p2, p3 black
// and p4, p5 white (p1, p2, p3 < t, and p4, p5 >= t), but suppose that
// max(p[i+1] - p[i]) is p3 - p2. Then the extrapolated position of the edge,
// based on the maximum gradient, is at the crack between p2 and p3 plus the
// offset (t - (p2+p3)/2)/(p3 - p2). We store the pixel difference p3-p2
// denominator in pixel_diff and the offset numerator, relative to the original
// binary edge (t - (p2+p3)/2) - (p3 -p2) in offset_numerator.
// The sign of offset_numerator and pixel_diff are manipulated to ensure
// that the pixel_diff, which will be used as a weight, is always positive.
// The direction stores the quantized feature direction for the given step
// computed from the edge gradient. (Using binary_angle_plus_pi.)
// If the pixel_diff is zero, it means that the direction of the gradient
// is in conflict with the step direction, so this step is to be ignored.
struct EdgeOffset {
int8_t offset_numerator;
uint8_t pixel_diff;
uint8_t direction;
};
class C_OUTLINE; // forward declaration
ELISTIZEH(C_OUTLINE)
class C_OUTLINE : public ELIST_LINK {
public:
C_OUTLINE() {
stepcount = 0;
offsets = nullptr;
}
C_OUTLINE( // constructor
CRACKEDGE *startpt, // from edge detector
ICOORD bot_left, // bounding box //length of loop
ICOORD top_right, int16_t length);
C_OUTLINE(ICOORD startpt, // start of loop
DIR128 *new_steps, // steps in loop
int16_t length); // length of loop
// outline to copy
C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); // and rotate
// Build a fake outline, given just a bounding box and append to the list.
static void FakeOutline(const TBOX &box, C_OUTLINE_LIST *outlines);
~C_OUTLINE() { // destructor
delete[] offsets;
}
bool flag( // test flag
C_OUTLINE_FLAGS mask) const { // flag to test
return flags[mask];
}
void set_flag( // set flag value
C_OUTLINE_FLAGS mask, // flag to test
bool value) { // value to set
flags.set(mask, value);
}
C_OUTLINE_LIST *child() { // get child list
return &children;
}
// access function
const TBOX &bounding_box() const {
return box;
}
void set_step( // set a step
int16_t stepindex, // index of step
int8_t stepdir) { // chain code
int shift = stepindex % 4 * 2;
uint8_t mask = 3 << shift;
steps[stepindex / 4] = ((stepdir << shift) & mask) | (steps[stepindex / 4] & ~mask);
// squeeze 4 into byte
}
void set_step( // set a step
int16_t stepindex, // index of step
DIR128 stepdir) { // direction
// clean it
int8_t chaindir = stepdir.get_dir() >> (DIRBITS - 2);
// difference
set_step(stepindex, chaindir);
// squeeze 4 into byte
}
int32_t pathlength() const { // get path length
return stepcount;
}
// Return step at a given index as a DIR128.
DIR128 step_dir(int index) const {
return DIR128(
static_cast<int16_t>(((steps[index / 4] >> (index % 4 * 2)) & STEP_MASK) << (DIRBITS - 2)));
}
// Return the step vector for the given outline position.
ICOORD step(int index) const { // index of step
return step_coords[chain_code(index)];
}
// get start position
const ICOORD &start_pos() const {
return start;
}
// Returns the position at the given index on the outline.
// NOT to be used lightly, as it has to iterate the outline to find out.
ICOORD position_at_index(int index) const {
ICOORD pos = start;
for (int i = 0; i < index; ++i) {
pos += step(i);
}
return pos;
}
// Returns the sub-pixel accurate position given the integer position pos
// at the given index on the outline. pos may be a return value of
// position_at_index, or computed by repeatedly adding step to the
// start_pos() in the usual way.
FCOORD sub_pixel_pos_at_index(const ICOORD &pos, int index) const {
const ICOORD &step_to_next(step(index));
FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f, pos.y() + step_to_next.y() / 2.0f);
if (offsets != nullptr && offsets[index].pixel_diff > 0) {
float offset = offsets[index].offset_numerator;
offset /= offsets[index].pixel_diff;
if (step_to_next.x() != 0) {
f_pos.set_y(f_pos.y() + offset);
} else {
f_pos.set_x(f_pos.x() + offset);
}
}
return f_pos;
}
// Returns the step direction for the given index or -1 if there is none.
int direction_at_index(int index) const {
if (offsets != nullptr && offsets[index].pixel_diff > 0) {
return offsets[index].direction;
}
return -1;
}
// Returns the edge strength for the given index.
// If there are no recorded edge strengths, returns 1 (assuming the image
// is binary). Returns 0 if the gradient direction conflicts with the
// step direction, indicating that this position could be skipped.
int edge_strength_at_index(int index) const {
if (offsets != nullptr) {
return offsets[index].pixel_diff;
}
return 1;
}
// Return the step as a chain code (0-3) related to the standard feature
// direction of binary_angle_plus_pi by:
// chain_code * 64 = feature direction.
int chain_code(int index) const { // index of step
return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK;
}
int32_t area() const; // Returns area of self and 1st level children.
int32_t perimeter() const; // Total perimeter of self and 1st level children.
int32_t outer_area() const; // Returns area of self only.
int32_t count_transitions( // count maxima
int32_t threshold); // size threshold
bool operator<( // containment test
const C_OUTLINE &other) const;
bool operator>( // containment test
C_OUTLINE &other) const {
return other < *this; // use the < to do it
}
int16_t winding_number( // get winding number
ICOORD testpt) const; // around this point
// get direction
int16_t turn_direction() const;
void reverse(); // reverse direction
void move( // reposition outline
const ICOORD vec); // by vector
// Returns true if *this and its children are legally nested.
// The outer area of a child should have the opposite sign to the
// parent. If not, it means we have discarded an outline in between
// (probably due to excessive length).
bool IsLegallyNested() const;
// If this outline is smaller than the given min_size, delete this and
// remove from its list, via *it, after checking that *it points to this.
// Otherwise, if any children of this are too small, delete them.
// On entry, *it must be an iterator pointing to this. If this gets deleted
// then this is extracted from *it, so an iteration can continue.
void RemoveSmallRecursive(int min_size, C_OUTLINE_IT *it);
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
// pix is 8-bit. Does nothing otherwise.
void ComputeEdgeOffsets(int threshold, Image pix);
// Adds sub-pixel resolution EdgeOffsets for the outline using only
// a binary image source.
void ComputeBinaryOffsets();
// Renders the outline to the given pix, with left and top being
// the coords of the upper-left corner of the pix.
void render(int left, int top, Image pix) const;
// Renders just the outline to the given pix (no fill), with left and top
// being the coords of the upper-left corner of the pix.
void render_outline(int left, int top, Image pix) const;
#ifndef GRAPHICS_DISABLED
void plot( // draw one
ScrollView *window, // window to draw in
ScrollView::Color colour) const; // colour to draw it
// Draws the outline in the given colour, normalized using the given denorm,
// making use of sub-pixel accurate information if available.
void plot_normed(const DENORM &denorm, ScrollView::Color colour, ScrollView *window) const;
#endif // !GRAPHICS_DISABLED
C_OUTLINE &operator=(const C_OUTLINE &source);
static C_OUTLINE *deep_copy(const C_OUTLINE *src) {
auto *outline = new C_OUTLINE;
*outline = *src;
return outline;
}
static ICOORD chain_step(int chaindir);
// The maximum length of any outline. The stepcount is stored as 16 bits,
// but it is probably not a good idea to increase this constant by much
// and switch to 32 bits, as it plays an important role in keeping huge
// outlines invisible, which prevents bad speed behavior.
static const int kMaxOutlineLength = 16000;
private:
// Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
// by the step, increment, and vertical step ? x : y position * increment
// at step s Mod stepcount respectively. Used to add or subtract the
// direction and position to/from accumulators of a small neighbourhood.
void increment_step(int s, int increment, ICOORD *pos, int *dir_counts, int *pos_totals) const;
int step_mem() const {
return (stepcount + 3) / 4;
}
TBOX box; // bounding box
ICOORD start; // start coord
int16_t stepcount; // no of steps
std::bitset<16> flags; // flags about outline
std::vector<uint8_t> steps; // step array
EdgeOffset *offsets; // Higher precision edge.
C_OUTLINE_LIST children; // child elements
static ICOORD step_coords[4];
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,42 @@
/**********************************************************************
* File: crakedge.h (Formerly: crkedge.h)
* Description: Structures for the Crack following edge detector.
* Author: Ray Smith
* Created: Fri Mar 22 16:06:38 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CRAKEDGE_H
#define CRAKEDGE_H
#include "mod128.h"
#include "points.h"
namespace tesseract {
class CRACKEDGE {
public:
CRACKEDGE() = default;
ICOORD pos; /*position of crack */
int8_t stepx; // edge step
int8_t stepy;
int8_t stepdir; // chaincode
CRACKEDGE *prev; /*previous point */
CRACKEDGE *next; /*next point */
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,58 @@
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "image.h"
#include <allheaders.h>
namespace tesseract {
// Class to hold a Pixa collection of debug images with captions and save them
// to a PDF file.
class DebugPixa {
public:
// TODO(rays) add another constructor with size control.
DebugPixa() {
pixa_ = pixaCreate(0);
#ifdef TESSERACT_DISABLE_DEBUG_FONTS
fonts_ = NULL;
#else
fonts_ = bmfCreate(nullptr, 14);
#endif
}
// If the filename_ has been set and there are any debug images, they are
// written to the set filename_.
~DebugPixa() {
pixaDestroy(&pixa_);
bmfDestroy(&fonts_);
}
// Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top.
void AddPix(const Image pix, const char *caption) {
int depth = pixGetDepth(pix);
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Image pix_debug =
pixAddSingleTextblock(pix, fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT);
}
// Sets the destination filename and enables images to be written to a PDF
// on destruction.
void WritePDF(const char *filename) {
if (pixaGetCount(pixa_) > 0) {
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
pixaClear(pixa_);
}
}
private:
// The collection of images to put in the PDF.
Pixa *pixa_;
// The fonts used to draw text captions.
L_Bmf *fonts_;
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_

View File

@ -0,0 +1,302 @@
///////////////////////////////////////////////////////////////////////
// File: detlinefit.cpp
// Description: Deterministic least median squares line fitting.
// Author: Ray Smith
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "detlinefit.h"
#include "helpers.h" // for IntCastRounded
#include "statistc.h"
#include "tprintf.h"
#include <algorithm>
#include <cfloat> // for FLT_MAX
namespace tesseract {
// The number of points to consider at each end.
const int kNumEndPoints = 3;
// The minimum number of points at which to switch to number of points
// for badly fitted lines.
// To ensure a sensible error metric, kMinPointsForErrorCount should be at
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
// ComputeUpperQuartileError.
const int kMinPointsForErrorCount = 16;
// The maximum real distance to use before switching to number of
// mis-fitted points, which will get square-rooted for true distance.
const int kMaxRealDistance = 2.0;
DetLineFit::DetLineFit() : square_length_(0.0) {}
// Delete all Added points.
void DetLineFit::Clear() {
pts_.clear();
distances_.clear();
}
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
void DetLineFit::Add(const ICOORD &pt) {
pts_.emplace_back(pt, 0);
}
// Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics.
void DetLineFit::Add(const ICOORD &pt, int halfwidth) {
pts_.emplace_back(pt, halfwidth);
}
// Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error.
double DetLineFit::Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2) {
// Do something sensible with no points.
if (pts_.empty()) {
pt1->set_x(0);
pt1->set_y(0);
*pt2 = *pt1;
return 0.0;
}
// Count the points and find the first and last kNumEndPoints.
int pt_count = pts_.size();
ICOORD *starts[kNumEndPoints];
if (skip_first >= pt_count) {
skip_first = pt_count - 1;
}
int start_count = 0;
int end_i = std::min(skip_first + kNumEndPoints, pt_count);
for (int i = skip_first; i < end_i; ++i) {
starts[start_count++] = &pts_[i].pt;
}
ICOORD *ends[kNumEndPoints];
if (skip_last >= pt_count) {
skip_last = pt_count - 1;
}
int end_count = 0;
end_i = std::max(0, pt_count - kNumEndPoints - skip_last);
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
ends[end_count++] = &pts_[i].pt;
}
// 1 or 2 points need special treatment.
if (pt_count <= 2) {
*pt1 = *starts[0];
if (pt_count > 1) {
*pt2 = *ends[0];
} else {
*pt2 = *pt1;
}
return 0.0;
}
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
// overlap in the starts, ends sets, this is OK and taken care of by the
// if (*start != *end) test below, which also tests for equal input points.
double best_uq = -1.0;
// Iterate each pair of points and find the best fitting line.
for (int i = 0; i < start_count; ++i) {
ICOORD *start = starts[i];
for (int j = 0; j < end_count; ++j) {
ICOORD *end = ends[j];
if (*start != *end) {
ComputeDistances(*start, *end);
// Compute the upper quartile error from the line.
double dist = EvaluateLineFit();
if (dist < best_uq || best_uq < 0.0) {
best_uq = dist;
*pt1 = *start;
*pt2 = *end;
}
}
}
}
// Finally compute the square root to return the true distance.
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
}
// Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points.
// *Makes use of floating point arithmetic*
double DetLineFit::ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist,
bool debug, ICOORD *line_pt) {
ComputeConstrainedDistances(direction, min_dist, max_dist);
// Do something sensible with no points or computed distances.
if (pts_.empty() || distances_.empty()) {
line_pt->set_x(0);
line_pt->set_y(0);
return 0.0;
}
auto median_index = distances_.size() / 2;
std::nth_element(distances_.begin(), distances_.begin() + median_index, distances_.end());
*line_pt = distances_[median_index].data();
if (debug) {
tprintf("Constrained fit to dir %g, %g = %d, %d :%zu distances:\n", direction.x(), direction.y(),
line_pt->x(), line_pt->y(), distances_.size());
for (int i = 0; i < distances_.size(); ++i) {
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data().x(), distances_[i].data().y(),
distances_[i].key());
}
tprintf("Result = %zu\n", median_index);
}
// Center distances on the fitted point.
double dist_origin = direction * *line_pt;
for (auto &distance : distances_) {
distance.key() -= dist_origin;
}
return sqrt(EvaluateLineFit());
}
// Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line.
bool DetLineFit::SufficientPointsForIndependentFit() const {
return distances_.size() >= kMinPointsForErrorCount;
}
// Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class.
double DetLineFit::Fit(float *m, float *c) {
ICOORD start, end;
double error = Fit(&start, &end);
if (end.x() != start.x()) {
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
*c = start.y() - *m * start.x();
} else {
*m = 0.0f;
*c = 0.0f;
}
return error;
}
// Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients.
double DetLineFit::ConstrainedFit(double m, float *c) {
// Do something sensible with no points.
if (pts_.empty()) {
*c = 0.0f;
return 0.0;
}
double cos = 1.0 / sqrt(1.0 + m * m);
FCOORD direction(cos, m * cos);
ICOORD line_pt;
double error = ConstrainedFit(direction, -FLT_MAX, FLT_MAX, false, &line_pt);
*c = line_pt.y() - line_pt.x() * m;
return error;
}
// Computes and returns the squared evaluation metric for a line fit.
double DetLineFit::EvaluateLineFit() {
// Compute the upper quartile error from the line.
double dist = ComputeUpperQuartileError();
if (distances_.size() >= kMinPointsForErrorCount && dist > kMaxRealDistance * kMaxRealDistance) {
// Use the number of mis-fitted points as the error metric, as this
// gives a better measure of fit for badly fitted lines where more
// than a quarter are badly fitted.
double threshold = kMaxRealDistance * sqrt(square_length_);
dist = NumberOfMisfittedPoints(threshold);
}
return dist;
}
// Computes the absolute error distances of the points from the line,
// and returns the squared upper-quartile error distance.
double DetLineFit::ComputeUpperQuartileError() {
int num_errors = distances_.size();
if (num_errors == 0) {
return 0.0;
}
// Get the absolute values of the errors.
for (int i = 0; i < num_errors; ++i) {
if (distances_[i].key() < 0) {
distances_[i].key() = -distances_[i].key();
}
}
// Now get the upper quartile distance.
auto index = 3 * num_errors / 4;
std::nth_element(distances_.begin(), distances_.begin() + index, distances_.end());
double dist = distances_[index].key();
// The true distance is the square root of the dist squared / square_length.
// Don't bother with the square root. Just return the square distance.
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
}
// Returns the number of sample points that have an error more than threshold.
int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
int num_misfits = 0;
int num_dists = distances_.size();
// Get the absolute values of the errors.
for (int i = 0; i < num_dists; ++i) {
if (distances_[i].key() > threshold) {
++num_misfits;
}
}
return num_misfits;
}
// Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances.
// Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half.
void DetLineFit::ComputeDistances(const ICOORD &start, const ICOORD &end) {
distances_.clear();
ICOORD line_vector = end;
line_vector -= start;
square_length_ = line_vector.sqlength();
int line_length = IntCastRounded(sqrt(square_length_));
// Compute the distance of each point from the line.
int prev_abs_dist = 0;
int prev_dot = 0;
for (int i = 0; i < pts_.size(); ++i) {
ICOORD pt_vector = pts_[i].pt;
pt_vector -= start;
int dot = line_vector % pt_vector;
// Compute |line_vector||pt_vector|sin(angle between)
int dist = line_vector * pt_vector;
int abs_dist = dist < 0 ? -dist : dist;
if (abs_dist > prev_abs_dist && i > 0) {
// Ignore this point if it overlaps the previous one.
int separation = abs(dot - prev_dot);
if (separation < line_length * pts_[i].halfwidth ||
separation < line_length * pts_[i - 1].halfwidth) {
continue;
}
}
distances_.emplace_back(dist, pts_[i].pt);
prev_abs_dist = abs_dist;
prev_dot = dot;
}
}
// Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_.
void DetLineFit::ComputeConstrainedDistances(const FCOORD &direction, double min_dist,
double max_dist) {
distances_.clear();
square_length_ = direction.sqlength();
// Compute the distance of each point from the line.
for (auto &pt : pts_) {
FCOORD pt_vector = pt.pt;
// Compute |line_vector||pt_vector|sin(angle between)
double dist = direction * pt_vector;
if (min_dist <= dist && dist <= max_dist) {
distances_.emplace_back(dist, pt.pt);
}
}
}
} // namespace tesseract.

View File

@ -0,0 +1,157 @@
///////////////////////////////////////////////////////////////////////
// File: detlinefit.h
// Description: Deterministic least upper-quartile squares line fitting.
// Author: Ray Smith
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
#include "kdpair.h"
#include "points.h"
namespace tesseract {
// This class fits a line to a set of ICOORD points.
// There is no restriction on the direction of the line, as it
// uses a vector method, ie no concern over infinite gradients.
// The fitted line has the least upper quartile of squares of perpendicular
// distances of all source points from the line, subject to the constraint
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
// i.e. the 9 combinations of one of the first 3 and last 3 points.
// A fundamental assumption of this algorithm is that one of the first 3 and
// one of the last 3 points are near the best line fit.
// The points must be Added in line order for the algorithm to work properly.
// No floating point calculations are needed* to make an accurate fit,
// and no random numbers are needed** so the algorithm is deterministic,
// architecture-stable, and compiler-stable as well as stable to minor
// changes in the input.
// *A single floating point division is used to compute each line's distance.
// This is unlikely to result in choice of a different line, but if it does,
// it would be easy to replace with a 64 bit integer calculation.
// **Random numbers are used in the nth_item function, but the worst
// non-determinism that can result is picking a different result among equals,
// and that wouldn't make any difference to the end-result distance, so the
// randomness does not affect the determinism of the algorithm. The random
// numbers are only there to guarantee average linear time.
// Fitting time is linear, but with a high constant, as it tries 9 different
// lines and computes the distance of all points each time.
// This class is aimed at replacing the LLSQ (linear least squares) and
// LMS (least median of squares) classes that are currently used for most
// of the line fitting in Tesseract.
class DetLineFit {
public:
DetLineFit();
~DetLineFit() = default;
// Delete all Added points.
void Clear();
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
// Add must be called on points in sequence along the line.
void Add(const ICOORD &pt);
// Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics.
void Add(const ICOORD &pt, int halfwidth);
// Fits a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error.
double Fit(ICOORD *pt1, ICOORD *pt2) {
return Fit(0, 0, pt1, pt2);
}
// Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error.
double Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2);
// Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points.
// *Makes use of floating point arithmetic*
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug,
ICOORD *line_pt);
// Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line.
bool SufficientPointsForIndependentFit() const;
// Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class.
double Fit(float *m, float *c);
// Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients.
double ConstrainedFit(double m, float *c);
private:
// Simple struct to hold an ICOORD point and a halfwidth representing half
// the "width" (supposedly approximately parallel to the direction of the
// line) of each point, such that distant points can be discarded when they
// overlap nearer points. (Think i dot and other diacritics or noise.)
struct PointWidth {
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
PointWidth(const ICOORD &pt0, int halfwidth0) : pt(pt0), halfwidth(halfwidth0) {}
ICOORD pt;
int halfwidth;
};
// Type holds the distance of each point from the fitted line and the point
// itself. Use of double allows integer distances from ICOORDs to be stored
// exactly, and also the floating point results from ConstrainedFit.
using DistPointPair = KDPairInc<double, ICOORD>;
// Computes and returns the squared evaluation metric for a line fit.
double EvaluateLineFit();
// Computes the absolute values of the precomputed distances_,
// and returns the squared upper-quartile error distance.
double ComputeUpperQuartileError();
// Returns the number of sample points that have an error more than threshold.
int NumberOfMisfittedPoints(double threshold) const;
// Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances_.
// Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half.
void ComputeDistances(const ICOORD &start, const ICOORD &end);
// Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_.
void ComputeConstrainedDistances(const FCOORD &direction, double min_dist, double max_dist);
// Stores all the source points in the order they were given and their
// halfwidths, if any.
std::vector<PointWidth> pts_;
// Stores the computed perpendicular distances of (some of) the pts_ from a
// given vector (assuming it goes through the origin, making it a line).
// Since the distances may be a subset of the input points, and get
// re-ordered by the nth_item function, the original point is stored
// along side the distance.
std::vector<DistPointPair> distances_; // Distances of points.
// The squared length of the vector used to compute distances_.
double square_length_;
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_

View File

@ -0,0 +1,99 @@
/**********************************************************************
* File: dppoint.cpp
* Description: Simple generic dynamic programming class.
* Author: Ray Smith
* Created: Wed Mar 25 19:08:01 PDT 2009
*
* (C) Copyright 2009, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "dppoint.h"
#include "errcode.h"
#include "tprintf.h"
namespace tesseract {
// Solve the dynamic programming problem for the given array of points, with
// the given size and cost function.
// Steps backwards are limited to being between min_step and max_step
// inclusive.
// The return value is the tail of the best path.
DPPoint *DPPoint::Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size,
DPPoint *points) {
if (size <= 0 || max_step < min_step || min_step >= size) {
return nullptr; // Degenerate, but not necessarily an error.
}
ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true.
if (debug) {
tprintf("min = %d, max=%d\n", min_step, max_step);
}
// Evaluate the total cost at each point.
for (int i = 0; i < size; ++i) {
for (int offset = min_step; offset <= max_step; ++offset) {
DPPoint *prev = offset <= i ? points + i - offset : nullptr;
int64_t new_cost = (points[i].*cost_func)(prev);
if (points[i].best_prev_ != nullptr && offset > min_step * 2 &&
new_cost > points[i].total_cost_) {
break; // Find only the first minimum if going over twice the min.
}
}
points[i].total_cost_ += points[i].local_cost_;
if (debug) {
tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n", i, points[i].local_cost_,
points[i].total_cost_, points[i].total_steps_);
}
}
// Now find the end of the best path and return it.
int best_cost = points[size - 1].total_cost_;
int best_end = size - 1;
for (int end = best_end - 1; end >= size - min_step; --end) {
int cost = points[end].total_cost_;
if (cost < best_cost) {
best_cost = cost;
best_end = end;
}
}
return points + best_end;
}
// A CostFunc that takes the variance of step into account in the cost.
int64_t DPPoint::CostWithVariance(const DPPoint *prev) {
if (prev == nullptr || prev == this) {
UpdateIfBetter(0, 1, nullptr, 0, 0, 0);
return 0;
}
int delta = this - prev;
int32_t n = prev->n_ + 1;
int32_t sig_x = prev->sig_x_ + delta;
int64_t sig_xsq = prev->sig_xsq_ + delta * delta;
int64_t cost = (sig_xsq - sig_x * sig_x / n) / n;
cost += prev->total_cost_;
UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq);
return cost;
}
// Update the other members if the cost is lower.
void DPPoint::UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint *prev, int32_t n,
int32_t sig_x, int64_t sig_xsq) {
if (cost < total_cost_) {
total_cost_ = cost;
total_steps_ = steps;
best_prev_ = prev;
n_ = n;
sig_x_ = sig_x;
sig_xsq_ = sig_xsq;
}
}
} // namespace tesseract.

View File

@ -0,0 +1,105 @@
/**********************************************************************
* File: dppoint.h
* Description: Simple generic dynamic programming class.
* Author: Ray Smith
* Created: Wed Mar 25 18:57:01 PDT 2009
*
* (C) Copyright 2009, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCSTRUCT_DPPOINT_H_
#define TESSERACT_CCSTRUCT_DPPOINT_H_
#include <cstdint>
namespace tesseract {
// A simple class to provide a dynamic programming solution to a class of
// 1st-order problems in which the cost is dependent only on the current
// step and the best cost to that step, with a possible special case
// of using the variance of the steps, and only the top choice is required.
// Useful for problems such as finding the optimal cut points in a fixed-pitch
// (vertical or horizontal) situation.
// Skeletal Example:
// DPPoint* array = new DPPoint[width];
// for (int i = 0; i < width; i++) {
// array[i].AddLocalCost(cost_at_i)
// }
// DPPoint* best_end = DPPoint::Solve(..., array);
// while (best_end != nullptr) {
// int cut_index = best_end - array;
// best_end = best_end->best_prev();
// }
// delete [] array;
class DPPoint {
public:
// The cost function evaluates the total cost at this (excluding this's
// local_cost) and if it beats this's total_cost, then
// replace the appropriate values in this.
using CostFunc = int64_t (DPPoint::*)(const DPPoint *);
DPPoint()
: local_cost_(0)
, total_cost_(INT32_MAX)
, total_steps_(1)
, best_prev_(nullptr)
, n_(0)
, sig_x_(0)
, sig_xsq_(0) {}
// Solve the dynamic programming problem for the given array of points, with
// the given size and cost function.
// Steps backwards are limited to being between min_step and max_step
// inclusive.
// The return value is the tail of the best path.
static DPPoint *Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size,
DPPoint *points);
// A CostFunc that takes the variance of step into account in the cost.
int64_t CostWithVariance(const DPPoint *prev);
// Accessors.
int total_cost() const {
return total_cost_;
}
int Pathlength() const {
return total_steps_;
}
const DPPoint *best_prev() const {
return best_prev_;
}
void AddLocalCost(int new_cost) {
local_cost_ += new_cost;
}
private:
// Code common to different cost functions.
// Update the other members if the cost is lower.
void UpdateIfBetter(int64_t cost, int32_t steps, const DPPoint *prev, int32_t n, int32_t sig_x,
int64_t sig_xsq);
int32_t local_cost_; // Cost of this point on its own.
int32_t total_cost_; // Sum of all costs in best path to here.
// During cost calculations local_cost is excluded.
int32_t total_steps_; // Number of steps in best path to here.
const DPPoint *best_prev_; // Pointer to prev point in best path from here.
// Information for computing the variance part of the cost.
int32_t n_; // Number of steps in best path to here for variance.
int32_t sig_x_; // Sum of step sizes for computing variance.
int64_t sig_xsq_; // Sum of squares of steps for computing variance.
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DPPOINT_H_

View File

@ -0,0 +1,227 @@
///////////////////////////////////////////////////////////////////////
// File: fontinfo.cpp
// Description: Font information classes abstracted from intproto.h/cpp.
// Author: rays@google.com (Ray Smith)
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "fontinfo.h"
#include "bitvector.h"
#include "unicity_table.h"
namespace tesseract {
// Writes to the given file. Returns false in case of error.
bool FontInfo::Serialize(FILE *fp) const {
if (!write_info(fp, *this)) {
return false;
}
if (!write_spacing_info(fp, *this)) {
return false;
}
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool FontInfo::DeSerialize(TFile *fp) {
if (!read_info(fp, this)) {
return false;
}
if (!read_spacing_info(fp, this)) {
return false;
}
return true;
}
FontInfoTable::FontInfoTable() {
using namespace std::placeholders; // for _1, _2
set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
}
FontInfoTable::~FontInfoTable() = default;
// Writes to the given file. Returns false in case of error.
bool FontInfoTable::Serialize(FILE *fp) const {
return this->SerializeClasses(fp);
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool FontInfoTable::DeSerialize(TFile *fp) {
truncate(0);
return this->DeSerializeClasses(fp);
}
// Returns true if the given set of fonts includes one with the same
// properties as font_id.
bool FontInfoTable::SetContainsFontProperties(int font_id,
const std::vector<ScoredFont> &font_set) const {
uint32_t properties = at(font_id).properties;
for (auto f : font_set) {
if (at(f.fontinfo_id).properties == properties) {
return true;
}
}
return false;
}
// Returns true if the given set of fonts includes multiple properties.
bool FontInfoTable::SetContainsMultipleFontProperties(
const std::vector<ScoredFont> &font_set) const {
if (font_set.empty()) {
return false;
}
int first_font = font_set[0].fontinfo_id;
uint32_t properties = at(first_font).properties;
for (int f = 1; f < font_set.size(); ++f) {
if (at(font_set[f].fontinfo_id).properties != properties) {
return true;
}
}
return false;
}
// Moves any non-empty FontSpacingInfo entries from other to this.
void FontInfoTable::MoveSpacingInfoFrom(FontInfoTable *other) {
using namespace std::placeholders; // for _1, _2
set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
for (int i = 0; i < other->size(); ++i) {
std::vector<FontSpacingInfo *> *spacing_vec = other->at(i).spacing_vec;
if (spacing_vec != nullptr) {
int target_index = get_index(other->at(i));
if (target_index < 0) {
// Bit copy the FontInfo and steal all the pointers.
push_back(other->at(i));
other->at(i).name = nullptr;
} else {
delete at(target_index).spacing_vec;
at(target_index).spacing_vec = other->at(i).spacing_vec;
}
other->at(i).spacing_vec = nullptr;
}
}
}
// Moves this to the target unicity table.
void FontInfoTable::MoveTo(UnicityTable<FontInfo> *target) {
target->clear();
using namespace std::placeholders; // for _1, _2
target->set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
for (int i = 0; i < size(); ++i) {
// Bit copy the FontInfo and steal all the pointers.
target->push_back(at(i));
at(i).name = nullptr;
at(i).spacing_vec = nullptr;
}
}
// Callbacks for GenericVector.
void FontInfoDeleteCallback(FontInfo f) {
if (f.spacing_vec != nullptr) {
for (auto data : *f.spacing_vec) {
delete data;
}
delete f.spacing_vec;
f.spacing_vec = nullptr;
}
delete[] f.name;
f.name = nullptr;
}
/*---------------------------------------------------------------------------*/
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(TFile *f, FontInfo *fi) {
uint32_t size;
if (!f->DeSerialize(&size)) {
return false;
}
char *font_name = new char[size + 1];
fi->name = font_name;
if (!f->DeSerialize(font_name, size)) {
return false;
}
font_name[size] = '\0';
return f->DeSerialize(&fi->properties);
}
bool write_info(FILE *f, const FontInfo &fi) {
int32_t size = strlen(fi.name);
return tesseract::Serialize(f, &size) && tesseract::Serialize(f, &fi.name[0], size) &&
tesseract::Serialize(f, &fi.properties);
}
bool read_spacing_info(TFile *f, FontInfo *fi) {
int32_t vec_size, kern_size;
if (!f->DeSerialize(&vec_size)) {
return false;
}
ASSERT_HOST(vec_size >= 0);
if (vec_size == 0) {
return true;
}
fi->init_spacing(vec_size);
for (int i = 0; i < vec_size; ++i) {
auto *fs = new FontSpacingInfo();
if (!f->DeSerialize(&fs->x_gap_before) || !f->DeSerialize(&fs->x_gap_after) ||
!f->DeSerialize(&kern_size)) {
delete fs;
return false;
}
if (kern_size < 0) { // indication of a nullptr entry in fi->spacing_vec
delete fs;
continue;
}
if (kern_size > 0 &&
(!f->DeSerialize(fs->kerned_unichar_ids) || !f->DeSerialize(fs->kerned_x_gaps))) {
delete fs;
return false;
}
fi->add_spacing(i, fs);
}
return true;
}
bool write_spacing_info(FILE *f, const FontInfo &fi) {
int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
if (!tesseract::Serialize(f, &vec_size)) {
return false;
}
int16_t x_gap_invalid = -1;
for (int i = 0; i < vec_size; ++i) {
FontSpacingInfo *fs = fi.spacing_vec->at(i);
int32_t kern_size = (fs == nullptr) ? -1 : fs->kerned_x_gaps.size();
if (fs == nullptr) {
// Writing two invalid x-gaps.
if (!tesseract::Serialize(f, &x_gap_invalid, 2) || !tesseract::Serialize(f, &kern_size)) {
return false;
}
} else {
if (!tesseract::Serialize(f, &fs->x_gap_before) ||
!tesseract::Serialize(f, &fs->x_gap_after) || !tesseract::Serialize(f, &kern_size)) {
return false;
}
}
if (kern_size > 0 &&
(!Serialize(f, fs->kerned_unichar_ids) || !Serialize(f, fs->kerned_x_gaps))) {
return false;
}
}
return true;
}
bool write_set(FILE *f, const FontSet &fs) {
int size = fs.size();
return tesseract::Serialize(f, &size) && tesseract::Serialize(f, &fs[0], size);
}
} // namespace tesseract.

View File

@ -0,0 +1,205 @@
///////////////////////////////////////////////////////////////////////
// File: fontinfo.h
// Description: Font information classes abstracted from intproto.h/cpp.
// Author: rays@google.com (Ray Smith)
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
#define TESSERACT_CCSTRUCT_FONTINFO_H_
#include "errcode.h"
#include <tesseract/unichar.h>
#include "genericvector.h"
#include <cstdint> // for uint16_t, uint32_t
#include <cstdio> // for FILE
#include <vector>
namespace tesseract {
template <typename T>
class UnicityTable;
// Simple struct to hold a font and a score. The scores come from the low-level
// integer matcher, so they are in the uint16_t range. Fonts are an index to
// fontinfo_table.
// These get copied around a lot, so best to keep them small.
struct ScoredFont {
ScoredFont() : fontinfo_id(-1), score(0) {}
ScoredFont(int font_id, uint16_t classifier_score)
: fontinfo_id(font_id), score(classifier_score) {}
// Index into fontinfo table, but inside the classifier, may be a shapetable
// index.
int32_t fontinfo_id;
// Raw score from the low-level classifier.
uint16_t score;
};
// Struct for information about spacing between characters in a particular font.
struct FontSpacingInfo {
int16_t x_gap_before;
int16_t x_gap_after;
std::vector<UNICHAR_ID> kerned_unichar_ids;
std::vector<int16_t> kerned_x_gaps;
};
/*
* font_properties contains properties about boldness, italicness, fixed pitch,
* serif, fraktur
*/
struct FontInfo {
FontInfo() : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {}
~FontInfo() = default;
bool operator==(const FontInfo &rhs) const {
return strcmp(name, rhs.name) == 0;
}
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE *fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(TFile *fp);
// Reserves unicharset_size spots in spacing_vec.
void init_spacing(int unicharset_size) {
spacing_vec = new std::vector<FontSpacingInfo *>();
spacing_vec->resize(unicharset_size);
}
// Adds the given pointer to FontSpacingInfo to spacing_vec member
// (FontInfo class takes ownership of the pointer).
// Note: init_spacing should be called before calling this function.
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
ASSERT_HOST(static_cast<size_t>(uch_id) < spacing_vec->size());
(*spacing_vec)[uch_id] = spacing_info;
}
// Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
return (spacing_vec == nullptr || spacing_vec->size() <= static_cast<size_t>(uch_id)) ? nullptr
: (*spacing_vec)[uch_id];
}
// Fills spacing with the value of the x gap expected between the two given
// UNICHAR_IDs. Returns true on success.
bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const {
const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
if (prev_fsi == nullptr || fsi == nullptr) {
return false;
}
size_t i = 0;
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
if (prev_fsi->kerned_unichar_ids[i] == uch_id) {
break;
}
}
if (i < prev_fsi->kerned_unichar_ids.size()) {
*spacing = prev_fsi->kerned_x_gaps[i];
} else {
*spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
}
return true;
}
bool is_italic() const {
return properties & 1;
}
bool is_bold() const {
return (properties & 2) != 0;
}
bool is_fixed_pitch() const {
return (properties & 4) != 0;
}
bool is_serif() const {
return (properties & 8) != 0;
}
bool is_fraktur() const {
return (properties & 16) != 0;
}
char *name;
uint32_t properties;
// The universal_id is a field reserved for the initialization process
// to assign a unique id number to all fonts loaded for the current
// combination of languages. This id will then be returned by
// ResultIterator::WordFontAttributes.
int32_t universal_id;
// Horizontal spacing between characters (indexed by UNICHAR_ID).
std::vector<FontSpacingInfo *> *spacing_vec;
};
// Every class (character) owns a FontSet that represents all the fonts that can
// render this character.
// Since almost all the characters from the same script share the same set of
// fonts, the sets are shared over multiple classes (see
// Classify::fontset_table_). Thus, a class only store an id to a set.
// Because some fonts cannot render just one character of a set, there are a
// lot of FontSet that differ only by one font. Rather than storing directly
// the FontInfo in the FontSet structure, it's better to share FontInfos among
// FontSets (Classify::fontinfo_table_).
using FontSet = std::vector<int>;
// Class that adds a bit of functionality on top of GenericVector to
// implement a table of FontInfo that replaces UniCityTable<FontInfo>.
// TODO(rays) change all references once all existing traineddata files
// are replaced.
class FontInfoTable : public GenericVector<FontInfo> {
public:
TESS_API // when you remove inheritance from GenericVector, move this on
// class level
FontInfoTable();
TESS_API
~FontInfoTable();
// Writes to the given file. Returns false in case of error.
TESS_API
bool Serialize(FILE *fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
TESS_API
bool DeSerialize(TFile *fp);
// Returns true if the given set of fonts includes one with the same
// properties as font_id.
TESS_API
bool SetContainsFontProperties(int font_id, const std::vector<ScoredFont> &font_set) const;
// Returns true if the given set of fonts includes multiple properties.
TESS_API
bool SetContainsMultipleFontProperties(const std::vector<ScoredFont> &font_set) const;
// Moves any non-empty FontSpacingInfo entries from other to this.
TESS_API
void MoveSpacingInfoFrom(FontInfoTable *other);
// Moves this to the target unicity table.
TESS_API
void MoveTo(UnicityTable<FontInfo> *target);
};
// Deletion callbacks for GenericVector.
void FontInfoDeleteCallback(FontInfo f);
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(TFile *f, FontInfo *fi);
bool write_info(FILE *f, const FontInfo &fi);
bool read_spacing_info(TFile *f, FontInfo *fi);
bool write_spacing_info(FILE *f, const FontInfo &fi);
bool write_set(FILE *f, const FontSet &fs);
} // namespace tesseract.
#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */

View File

@ -0,0 +1,60 @@
///////////////////////////////////////////////////////////////////////
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "image.h"
#include <allheaders.h>
namespace tesseract {
Image Image::clone() const {
return pixClone(pix_);
}
Image Image::copy() const {
return pixCopy(nullptr, pix_);
}
void Image::destroy() {
pixDestroy(&pix_);
}
bool Image::isZero() const {
l_int32 r = 0;
pixZero(pix_, &r);
return r == 1;
}
Image Image::operator|(Image i) const {
return pixOr(nullptr, pix_, i);
}
Image &Image::operator|=(Image i) {
pixOr(pix_, pix_, i);
return *this;
}
Image Image::operator&(Image i) const {
return pixAnd(nullptr, pix_, i);
}
Image &Image::operator&=(Image i) {
pixAnd(pix_, pix_, i);
return *this;
}
}

View File

@ -0,0 +1,53 @@
///////////////////////////////////////////////////////////////////////
// File: image.h
// Description: Image wrapper.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_IMAGE_H_
#define TESSERACT_CCSTRUCT_IMAGE_H_
#include <tesseract/export.h>
struct Pix;
namespace tesseract {
class TESS_API Image {
public:
Pix *pix_ = nullptr;
public:
Image() = default;
Image(Pix *pix) : pix_(pix) {}
// service
operator Pix *() const { return pix_; }
explicit operator Pix **() { return &pix_; }
Pix *operator->() const { return pix_; }
// api
Image clone() const; // increases refcount
Image copy() const; // does full copy
void destroy();
bool isZero() const;
// ops
Image operator|(Image) const;
Image &operator|=(Image);
Image operator&(Image) const;
Image &operator&=(Image);
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_IMAGE_H_

View File

@ -0,0 +1,742 @@
///////////////////////////////////////////////////////////////////////
// File: imagedata.cpp
// Description: Class to hold information about a single multi-page tiff
// training file and its corresponding boxes or text file.
// Author: Ray Smith
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "imagedata.h"
#include "boxread.h" // for ReadMemBoxes
#include "rect.h" // for TBOX
#include "scrollview.h" // for ScrollView, ScrollView::CYAN, ScrollView::NONE
#include "tprintf.h" // for tprintf
#include "helpers.h" // for IntCastRounded, TRand, ClipToRange, Modulo
#include "serialis.h" // for TFile
#include <allheaders.h> // for pixDestroy, pixGetHeight, pixGetWidth, lept_...
#include <cinttypes> // for PRId64
namespace tesseract {
// Number of documents to read ahead while training. Doesn't need to be very
// large.
const int kMaxReadAhead = 8;
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
// Takes ownership of the pix and destroys it.
ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) {
SetPix(pix);
}
ImageData::~ImageData() {
#ifdef TESSERACT_IMAGEDATA_AS_PIX
internal_pix_.destroy();
#endif
}
// Builds and returns an ImageData from the basic data. Note that imagedata,
// truth_text, and box_text are all the actual file data, NOT filenames.
ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
const char *imagedata, int imagedatasize, const char *truth_text,
const char *box_text) {
auto *image_data = new ImageData();
image_data->imagefilename_ = name;
image_data->page_number_ = page_number;
image_data->language_ = lang;
// Save the imagedata.
// TODO: optimize resize (no init).
image_data->image_data_.resize(imagedatasize);
memcpy(&image_data->image_data_[0], imagedata, imagedatasize);
if (!image_data->AddBoxes(box_text)) {
if (truth_text == nullptr || truth_text[0] == '\0') {
tprintf("Error: No text corresponding to page %d from image %s!\n", page_number, name);
delete image_data;
return nullptr;
}
image_data->transcription_ = truth_text;
// If we have no boxes, the transcription is in the 0th box_texts_.
image_data->box_texts_.emplace_back(truth_text);
// We will create a box for the whole image on PreScale, to save unpacking
// the image now.
} else if (truth_text != nullptr && truth_text[0] != '\0' &&
image_data->transcription_ != truth_text) {
// Save the truth text as it is present and disagrees with the box text.
image_data->transcription_ = truth_text;
}
return image_data;
}
// Writes to the given file. Returns false in case of error.
bool ImageData::Serialize(TFile *fp) const {
if (!fp->Serialize(imagefilename_)) {
return false;
}
if (!fp->Serialize(&page_number_)) {
return false;
}
if (!fp->Serialize(image_data_)) {
return false;
}
if (!fp->Serialize(language_)) {
return false;
}
if (!fp->Serialize(transcription_)) {
return false;
}
if (!fp->Serialize(boxes_)) {
return false;
}
if (!fp->Serialize(box_texts_)) {
return false;
}
int8_t vertical = vertical_text_;
return fp->Serialize(&vertical);
}
// Reads from the given file. Returns false in case of error.
bool ImageData::DeSerialize(TFile *fp) {
if (!fp->DeSerialize(imagefilename_)) {
return false;
}
if (!fp->DeSerialize(&page_number_)) {
return false;
}
if (!fp->DeSerialize(image_data_)) {
return false;
}
if (!fp->DeSerialize(language_)) {
return false;
}
if (!fp->DeSerialize(transcription_)) {
return false;
}
if (!fp->DeSerialize(boxes_)) {
return false;
}
if (!fp->DeSerialize(box_texts_)) {
return false;
}
int8_t vertical = 0;
if (!fp->DeSerialize(&vertical)) {
return false;
}
vertical_text_ = vertical != 0;
return true;
}
// As DeSerialize, but only seeks past the data - hence a static method.
bool ImageData::SkipDeSerialize(TFile *fp) {
if (!fp->DeSerializeSkip()) {
return false;
}
int32_t page_number;
if (!fp->DeSerialize(&page_number)) {
return false;
}
if (!fp->DeSerializeSkip()) {
return false;
}
if (!fp->DeSerializeSkip()) {
return false;
}
if (!fp->DeSerializeSkip()) {
return false;
}
if (!fp->DeSerializeSkip(sizeof(TBOX))) {
return false;
}
int32_t number;
if (!fp->DeSerialize(&number)) {
return false;
}
for (int i = 0; i < number; i++) {
if (!fp->DeSerializeSkip()) {
return false;
}
}
int8_t vertical = 0;
return fp->DeSerialize(&vertical);
}
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
void ImageData::SetPix(Image pix) {
#ifdef TESSERACT_IMAGEDATA_AS_PIX
internal_pix_ = pix;
#else
SetPixInternal(pix, &image_data_);
#endif
}
// Returns the Pix image for *this. Must be pixDestroyed after use.
Image ImageData::GetPix() const {
#ifdef TESSERACT_IMAGEDATA_AS_PIX
# ifdef GRAPHICS_DISABLED
/* The only caller of this is the scaling functions to prescale the
* source. Thus we can just return a new pointer to the same data. */
return internal_pix_.clone();
# else
/* pixCopy always does an actual copy, so the caller can modify the
* changed data. */
return internal_pix_.copy();
# endif
#else
return GetPixInternal(image_data_);
#endif
}
// Gets anything and everything with a non-nullptr pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned.
// Also returns (if not nullptr) the width and height of the scaled image.
// The return value is the scaled Pix, which must be pixDestroyed after use,
// and scale_factor (if not nullptr) is set to the scale factor that was applied
// to the image to achieve the target_height.
Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
int *scaled_height, std::vector<TBOX> *boxes) const {
int input_width = 0;
int input_height = 0;
Image src_pix = GetPix();
ASSERT_HOST(src_pix != nullptr);
input_width = pixGetWidth(src_pix);
input_height = pixGetHeight(src_pix);
if (target_height == 0) {
target_height = std::min(input_height, max_height);
}
float im_factor = static_cast<float>(target_height) / input_height;
if (scaled_width != nullptr) {
*scaled_width = IntCastRounded(im_factor * input_width);
}
if (scaled_height != nullptr) {
*scaled_height = target_height;
}
// Get the scaled image.
Image pix = pixScale(src_pix, im_factor, im_factor);
if (pix == nullptr) {
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
im_factor);
src_pix.destroy();
return nullptr;
}
if (scaled_width != nullptr) {
*scaled_width = pixGetWidth(pix);
}
if (scaled_height != nullptr) {
*scaled_height = pixGetHeight(pix);
}
src_pix.destroy();
if (boxes != nullptr) {
// Get the boxes.
boxes->clear();
for (auto box : boxes_) {
box.scale(im_factor);
boxes->push_back(box);
}
if (boxes->empty()) {
// Make a single box for the whole image.
TBOX box(0, 0, im_factor * input_width, target_height);
boxes->push_back(box);
}
}
if (scale_factor != nullptr) {
*scale_factor = im_factor;
}
return pix;
}
int ImageData::MemoryUsed() const {
return image_data_.size();
}
#ifndef GRAPHICS_DISABLED
// Draws the data in a new window.
void ImageData::Display() const {
const int kTextSize = 64;
// Draw the image.
Image pix = GetPix();
if (pix == nullptr) {
return;
}
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
auto *win =
new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
win->Draw(pix, 0, height - 1);
pix.destroy();
// Draw the boxes.
win->Pen(ScrollView::RED);
win->Brush(ScrollView::NONE);
int text_size = kTextSize;
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size) {
text_size = boxes_[0].height() * 2;
}
win->TextAttributes("Arial", text_size, false, false, false);
if (!boxes_.empty()) {
for (int b = 0; b < boxes_.size(); ++b) {
boxes_[b].plot(win);
win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].c_str());
}
} else {
// The full transcription.
win->Pen(ScrollView::CYAN);
win->Text(0, height + kTextSize * 2, transcription_.c_str());
}
win->Update();
win->Wait();
}
#endif
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
const std::vector<int> &box_pages) {
// Copy the boxes and make the transcription.
for (int i = 0; i < box_pages.size(); ++i) {
if (page_number_ >= 0 && box_pages[i] != page_number_) {
continue;
}
transcription_ += texts[i];
boxes_.push_back(boxes[i]);
box_texts_.push_back(texts[i]);
}
}
#ifndef TESSERACT_IMAGEDATA_AS_PIX
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
void ImageData::SetPixInternal(Image pix, std::vector<char> *image_data) {
l_uint8 *data;
size_t size;
l_int32 ret;
ret = pixWriteMem(&data, &size, pix, IFF_PNG);
if (ret) {
ret = pixWriteMem(&data, &size, pix, IFF_PNM);
}
pix.destroy();
// TODO: optimize resize (no init).
image_data->resize(size);
memcpy(&(*image_data)[0], data, size);
lept_free(data);
}
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
Image ImageData::GetPixInternal(const std::vector<char> &image_data) {
Image pix = nullptr;
if (!image_data.empty()) {
// Convert the array to an image.
const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);
pix = pixReadMem(u_data, image_data.size());
}
return pix;
}
#endif
// Parses the text string as a box file and adds any discovered boxes that
// match the page number. Returns false on error.
bool ImageData::AddBoxes(const char *box_text) {
if (box_text != nullptr && box_text[0] != '\0') {
std::vector<TBOX> boxes;
std::vector<std::string> texts;
std::vector<int> box_pages;
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
/*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) {
AddBoxes(boxes, texts, box_pages);
return true;
} else {
tprintf("Error: No boxes for page %d from image %s!\n", page_number_, imagefilename_.c_str());
}
}
return false;
}
DocumentData::DocumentData(const std::string &name)
: document_name_(name)
, pages_offset_(-1)
, total_pages_(-1)
, memory_used_(0)
, max_memory_(0)
, reader_(nullptr) {}
DocumentData::~DocumentData() {
if (thread.joinable()) {
thread.join();
}
std::lock_guard<std::mutex> lock_p(pages_mutex_);
std::lock_guard<std::mutex> lock_g(general_mutex_);
for (auto data : pages_) {
delete data;
}
}
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
bool DocumentData::LoadDocument(const char *filename, int start_page, int64_t max_memory,
FileReader reader) {
SetDocument(filename, max_memory, reader);
pages_offset_ = start_page;
return ReCachePages();
}
// Sets up the document, without actually loading it.
void DocumentData::SetDocument(const char *filename, int64_t max_memory, FileReader reader) {
std::lock_guard<std::mutex> lock_p(pages_mutex_);
std::lock_guard<std::mutex> lock(general_mutex_);
document_name_ = filename;
pages_offset_ = -1;
max_memory_ = max_memory;
reader_ = reader;
}
// Writes all the pages to the given filename. Returns false on error.
bool DocumentData::SaveDocument(const char *filename, FileWriter writer) {
std::lock_guard<std::mutex> lock(pages_mutex_);
TFile fp;
fp.OpenWrite(nullptr);
if (!fp.Serialize(pages_) || !fp.CloseWrite(filename, writer)) {
tprintf("Serialize failed: %s\n", filename);
return false;
}
return true;
}
// Adds the given page data to this document, counting up memory.
void DocumentData::AddPageToDocument(ImageData *page) {
std::lock_guard<std::mutex> lock(pages_mutex_);
pages_.push_back(page);
set_memory_used(memory_used() + page->MemoryUsed());
}
// If the given index is not currently loaded, loads it using a separate
// thread.
void DocumentData::LoadPageInBackground(int index) {
ImageData *page = nullptr;
if (IsPageAvailable(index, &page)) {
return;
}
std::lock_guard<std::mutex> lock(pages_mutex_);
if (pages_offset_ == index) {
return;
}
pages_offset_ = index;
for (auto page : pages_) {
delete page;
}
pages_.clear();
if (thread.joinable()) {
thread.join();
}
thread = std::thread(&tesseract::DocumentData::ReCachePages, this);
}
// Returns a pointer to the page with the given index, modulo the total
// number of pages. Blocks until the background load is completed.
const ImageData *DocumentData::GetPage(int index) {
ImageData *page = nullptr;
while (!IsPageAvailable(index, &page)) {
// If there is no background load scheduled, schedule one now.
pages_mutex_.lock();
bool needs_loading = pages_offset_ != index;
pages_mutex_.unlock();
if (needs_loading) {
LoadPageInBackground(index);
}
// We can't directly load the page, or the background load will delete it
// while the caller is using it, so give it a chance to work.
std::this_thread::yield();
}
return page;
}
// Returns true if the requested page is available, and provides a pointer,
// which may be nullptr if the document is empty. May block, even though it
// doesn't guarantee to return true.
bool DocumentData::IsPageAvailable(int index, ImageData **page) {
std::lock_guard<std::mutex> lock(pages_mutex_);
int num_pages = NumPages();
if (num_pages == 0 || index < 0) {
*page = nullptr; // Empty Document.
return true;
}
if (num_pages > 0) {
index = Modulo(index, num_pages);
if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
*page = pages_[index - pages_offset_]; // Page is available already.
return true;
}
}
return false;
}
// Removes all pages from memory and frees the memory, but does not forget
// the document metadata.
int64_t DocumentData::UnCache() {
std::lock_guard<std::mutex> lock(pages_mutex_);
int64_t memory_saved = memory_used();
for (auto page : pages_) {
delete page;
}
pages_.clear();
pages_offset_ = -1;
set_total_pages(-1);
set_memory_used(0);
tprintf("Unloaded document %s, saving %" PRId64 " memory\n", document_name_.c_str(),
memory_saved);
return memory_saved;
}
// Shuffles all the pages in the document.
void DocumentData::Shuffle() {
TRand random;
// Different documents get shuffled differently, but the same for the same
// name.
random.set_seed(document_name_.c_str());
int num_pages = pages_.size();
// Execute one random swap for each page in the document.
for (int i = 0; i < num_pages; ++i) {
int src = random.IntRand() % num_pages;
int dest = random.IntRand() % num_pages;
std::swap(pages_[src], pages_[dest]);
}
}
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
// starting at index pages_offset_.
bool DocumentData::ReCachePages() {
std::lock_guard<std::mutex> lock(pages_mutex_);
// Read the file.
set_total_pages(0);
set_memory_used(0);
int loaded_pages = 0;
for (auto page : pages_) {
delete page;
}
pages_.clear();
TFile fp;
if (!fp.Open(document_name_.c_str(), reader_) || !fp.DeSerializeSize(&loaded_pages) ||
loaded_pages <= 0) {
tprintf("Deserialize header failed: %s\n", document_name_.c_str());
return false;
}
pages_offset_ %= loaded_pages;
// Skip pages before the first one we want, and load the rest until max
// memory and skip the rest after that.
int page;
for (page = 0; page < loaded_pages; ++page) {
uint8_t non_null;
if (!fp.DeSerialize(&non_null)) {
break;
}
if (page < pages_offset_ || (max_memory_ > 0 && memory_used() > max_memory_)) {
if (non_null && !ImageData::SkipDeSerialize(&fp)) {
break;
}
} else {
ImageData *image_data = nullptr;
if (non_null) {
image_data = new ImageData;
if (!image_data->DeSerialize(&fp)) {
delete image_data;
break;
}
}
pages_.push_back(image_data);
if (image_data->imagefilename().empty()) {
image_data->set_imagefilename(document_name_);
image_data->set_page_number(page);
}
set_memory_used(memory_used() + image_data->MemoryUsed());
}
}
if (page < loaded_pages) {
tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(), page,
loaded_pages);
for (auto page : pages_) {
delete page;
}
pages_.clear();
} else if (loaded_pages > 1) {
// Avoid lots of messages for training with single line images.
tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), loaded_pages,
pages_offset_ + 1, pages_offset_ + pages_.size(), document_name_.c_str());
}
set_total_pages(loaded_pages);
return !pages_.empty();
}
// A collection of DocumentData that knows roughly how much memory it is using.
DocumentCache::DocumentCache(int64_t max_memory) : max_memory_(max_memory) {}
DocumentCache::~DocumentCache() {
for (auto *document : documents_) {
delete document;
}
}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
CachingStrategy cache_strategy, FileReader reader) {
cache_strategy_ = cache_strategy;
int64_t fair_share_memory = 0;
// In the round-robin case, each DocumentData handles restricting its content
// to its fair share of memory. In the sequential case, DocumentCache
// determines which DocumentDatas are held entirely in memory.
if (cache_strategy_ == CS_ROUND_ROBIN) {
fair_share_memory = max_memory_ / filenames.size();
}
for (auto filename : filenames) {
auto *document = new DocumentData(filename);
document->SetDocument(filename.c_str(), fair_share_memory, reader);
AddToCache(document);
}
if (!documents_.empty()) {
// Try to get the first page now to verify the list of filenames.
if (GetPageBySerial(0) != nullptr) {
return true;
}
tprintf("Load of page 0 failed!\n");
}
return false;
}
// Adds document to the cache.
bool DocumentCache::AddToCache(DocumentData *data) {
documents_.push_back(data);
return true;
}
// Finds and returns a document by name.
DocumentData *DocumentCache::FindDocument(const std::string &document_name) const {
for (auto *document : documents_) {
if (document->document_name() == document_name) {
return document;
}
}
return nullptr;
}
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
// strategy, could take a long time.
int DocumentCache::TotalPages() {
if (cache_strategy_ == CS_SEQUENTIAL) {
// In sequential mode, we assume each doc has the same number of pages
// whether it is true or not.
if (num_pages_per_doc_ == 0) {
GetPageSequential(0);
}
return num_pages_per_doc_ * documents_.size();
}
int total_pages = 0;
for (auto *document : documents_) {
// We have to load a page to make NumPages() valid.
document->GetPage(0);
total_pages += document->NumPages();
}
return total_pages;
}
// Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents. Highly disk-intensive, but doesn't need samples
// to be shuffled between files to begin with.
const ImageData *DocumentCache::GetPageRoundRobin(int serial) {
int num_docs = documents_.size();
int doc_index = serial % num_docs;
const ImageData *doc = documents_[doc_index]->GetPage(serial / num_docs);
for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
doc_index = (serial + offset) % num_docs;
int page = (serial + offset) / num_docs;
documents_[doc_index]->LoadPageInBackground(page);
}
return doc;
}
// Returns a page by serial number, selecting them in sequence from each file.
// Requires the samples to be shuffled between the files to give a random or
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
const ImageData *DocumentCache::GetPageSequential(int serial) {
int num_docs = documents_.size();
ASSERT_HOST(num_docs > 0);
if (num_pages_per_doc_ == 0) {
// Use the pages in the first doc as the number of pages in each doc.
documents_[0]->GetPage(0);
num_pages_per_doc_ = documents_[0]->NumPages();
if (num_pages_per_doc_ == 0) {
tprintf("First document cannot be empty!!\n");
ASSERT_HOST(num_pages_per_doc_ > 0);
}
// Get rid of zero now if we don't need it.
if (serial / num_pages_per_doc_ % num_docs > 0) {
documents_[0]->UnCache();
}
}
int doc_index = serial / num_pages_per_doc_ % num_docs;
const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
// Count up total memory. Background loading makes it more complicated to
// keep a running count.
int64_t total_memory = 0;
for (auto *document : documents_) {
total_memory += document->memory_used();
}
if (total_memory >= max_memory_) {
// Find something to un-cache.
// If there are more than 3 in front, then serial is from the back reader
// of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
// we create a hole between them and then un-caching the backmost occupied
// will work for both.
int num_in_front = CountNeighbourDocs(doc_index, 1);
for (int offset = num_in_front - 2; offset > 1 && total_memory >= max_memory_; --offset) {
int next_index = (doc_index + offset) % num_docs;
total_memory -= documents_[next_index]->UnCache();
}
// If that didn't work, the best solution is to un-cache from the back. If
// we take away the document that a 2nd reader is using, it will put it
// back and make a hole between.
int num_behind = CountNeighbourDocs(doc_index, -1);
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; ++offset) {
int next_index = (doc_index + offset + num_docs) % num_docs;
total_memory -= documents_[next_index]->UnCache();
}
}
int next_index = (doc_index + 1) % num_docs;
if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
documents_[next_index]->LoadPageInBackground(0);
}
return doc;
}
// Helper counts the number of adjacent cached neighbours of index looking in
// direction dir, ie index+dir, index+2*dir etc.
int DocumentCache::CountNeighbourDocs(int index, int dir) {
int num_docs = documents_.size();
for (int offset = dir; abs(offset) < num_docs; offset += dir) {
int offset_index = (index + offset + num_docs) % num_docs;
if (!documents_[offset_index]->IsCached()) {
return offset - dir;
}
}
return num_docs;
}
} // namespace tesseract.

View File

@ -0,0 +1,362 @@
///////////////////////////////////////////////////////////////////////
// File: imagedata.h
// Description: Class to hold information about a single image and its
// corresponding boxes or text file.
// Author: Ray Smith
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
#define TESSERACT_IMAGE_IMAGEDATA_H_
#include "image.h"
#include "points.h" // for FCOORD
#include <mutex> // for std::mutex
#include <thread> // for std::thread
struct Pix;
namespace tesseract {
class TFile;
class ScrollView;
class TBOX;
// Amount of padding to apply in output pixels in feature mode.
const int kFeaturePadding = 2;
// Number of pixels to pad around text boxes.
const int kImagePadding = 4;
// Enum to determine the caching and data sequencing strategy.
enum CachingStrategy {
// Reads all of one file before moving on to the next. Requires samples to be
// shuffled across files. Uses the count of samples in the first file as
// the count in all the files to achieve high-speed random access. As a
// consequence, if subsequent files are smaller, they get entries used more
// than once, and if subsequent files are larger, some entries are not used.
// Best for larger data sets that don't fit in memory.
CS_SEQUENTIAL,
// Reads one sample from each file in rotation. Does not require shuffled
// samples, but is extremely disk-intensive. Samples in smaller files also
// get used more often than samples in larger files.
// Best for smaller data sets that mostly fit in memory.
CS_ROUND_ROBIN,
};
// Class to hold information on a single image:
// Filename, cached image as a Pix*, character boxes, text transcription.
// The text transcription is the ground truth UTF-8 text for the image.
// Character boxes are optional and indicate the desired segmentation of
// the text into recognition units.
class TESS_API ImageData {
public:
ImageData();
// Takes ownership of the pix.
ImageData(bool vertical, Image pix);
~ImageData();
// Builds and returns an ImageData from the basic data. Note that imagedata,
// truth_text, and box_text are all the actual file data, NOT filenames.
static ImageData *Build(const char *name, int page_number, const char *lang,
const char *imagedata, int imagedatasize, const char *truth_text,
const char *box_text);
// Writes to the given file. Returns false in case of error.
bool Serialize(TFile *fp) const;
// Reads from the given file. Returns false in case of error.
bool DeSerialize(TFile *fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(TFile *fp);
// Other accessors.
const std::string &imagefilename() const {
return imagefilename_;
}
void set_imagefilename(const std::string &name) {
imagefilename_ = name;
}
int page_number() const {
return page_number_;
}
void set_page_number(int num) {
page_number_ = num;
}
const std::vector<char> &image_data() const {
return image_data_;
}
const std::string &language() const {
return language_;
}
void set_language(const std::string &lang) {
language_ = lang;
}
const std::string &transcription() const {
return transcription_;
}
const std::vector<TBOX> &boxes() const {
return boxes_;
}
const std::vector<std::string> &box_texts() const {
return box_texts_;
}
const std::string &box_text(int index) const {
return box_texts_[index];
}
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
void SetPix(Image pix);
// Returns the Pix image for *this. Must be pixDestroyed after use.
Image GetPix() const;
// Gets anything and everything with a non-nullptr pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned.
// Also returns (if not nullptr) the width and height of the scaled image.
// The return value is the scaled Pix, which must be pixDestroyed after use,
// and scale_factor (if not nullptr) is set to the scale factor that was
// applied to the image to achieve the target_height.
Image PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
int *scaled_height, std::vector<TBOX> *boxes) const;
int MemoryUsed() const;
// Draws the data in a new window.
void Display() const;
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
const std::vector<int> &box_pages);
private:
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
static void SetPixInternal(Image pix, std::vector<char> *image_data);
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
static Image GetPixInternal(const std::vector<char> &image_data);
// Parses the text string as a box file and adds any discovered boxes that
// match the page number. Returns false on error.
bool AddBoxes(const char *box_text);
private:
std::string imagefilename_; // File to read image from.
int32_t page_number_; // Page number if multi-page tif or -1.
// see https://github.com/tesseract-ocr/tesseract/pull/2965
// EP: reconsider for tess6.0/opencv
#ifdef TESSERACT_IMAGEDATA_AS_PIX
Image internal_pix_;
#endif
std::vector<char> image_data_; // PNG/PNM file data.
std::string language_; // Language code for image.
std::string transcription_; // UTF-8 ground truth of image.
std::vector<TBOX> boxes_; // If non-empty boxes of the image.
std::vector<std::string> box_texts_; // String for text in each box.
bool vertical_text_; // Image has been rotated from vertical.
};
// A collection of ImageData that knows roughly how much memory it is using.
class DocumentData {
public:
TESS_API
explicit DocumentData(const std::string &name);
TESS_API
~DocumentData();
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
TESS_API
bool LoadDocument(const char *filename, int start_page, int64_t max_memory, FileReader reader);
// Sets up the document, without actually loading it.
void SetDocument(const char *filename, int64_t max_memory, FileReader reader);
// Writes all the pages to the given filename. Returns false on error.
TESS_API
bool SaveDocument(const char *filename, FileWriter writer);
// Adds the given page data to this document, counting up memory.
TESS_API
void AddPageToDocument(ImageData *page);
const std::string &document_name() const {
std::lock_guard<std::mutex> lock(general_mutex_);
return document_name_;
}
int NumPages() const {
std::lock_guard<std::mutex> lock(general_mutex_);
return total_pages_;
}
size_t PagesSize() const {
return pages_.size();
}
int64_t memory_used() const {
std::lock_guard<std::mutex> lock(general_mutex_);
return memory_used_;
}
// If the given index is not currently loaded, loads it using a separate
// thread. Note: there are 4 cases:
// Document uncached: IsCached() returns false, total_pages_ < 0.
// Required page is available: IsPageAvailable returns true. In this case,
// total_pages_ > 0 and
// pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
// Pages are loaded, but the required one is not.
// The requested page is being loaded by LoadPageInBackground. In this case,
// index == pages_offset_. Once the loading starts, the pages lock is held
// until it completes, at which point IsPageAvailable will unblock and return
// true.
void LoadPageInBackground(int index);
// Returns a pointer to the page with the given index, modulo the total
// number of pages. Blocks until the background load is completed.
TESS_API
const ImageData *GetPage(int index);
// Returns true if the requested page is available, and provides a pointer,
// which may be nullptr if the document is empty. May block, even though it
// doesn't guarantee to return true.
bool IsPageAvailable(int index, ImageData **page);
// Takes ownership of the given page index. The page is made nullptr in *this.
ImageData *TakePage(int index) {
std::lock_guard<std::mutex> lock(pages_mutex_);
ImageData *page = pages_[index];
pages_[index] = nullptr;
return page;
}
// Returns true if the document is currently loaded or in the process of
// loading.
bool IsCached() const {
return NumPages() >= 0;
}
// Removes all pages from memory and frees the memory, but does not forget
// the document metadata. Returns the memory saved.
int64_t UnCache();
// Shuffles all the pages in the document.
void Shuffle();
private:
// Sets the value of total_pages_ behind a mutex.
void set_total_pages(int total) {
std::lock_guard<std::mutex> lock(general_mutex_);
total_pages_ = total;
}
void set_memory_used(int64_t memory_used) {
std::lock_guard<std::mutex> lock(general_mutex_);
memory_used_ = memory_used;
}
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
// starting at index pages_offset_.
bool ReCachePages();
private:
// A name for this document.
std::string document_name_;
// A group of pages that corresponds in some loose way to a document.
std::vector<ImageData *> pages_;
// Page number of the first index in pages_.
int pages_offset_;
// Total number of pages in document (may exceed size of pages_.)
int total_pages_;
// Total of all pix sizes in the document.
int64_t memory_used_;
// Max memory to use at any time.
int64_t max_memory_;
// Saved reader from LoadDocument to allow re-caching.
FileReader reader_;
// Mutex that protects pages_ and pages_offset_ against multiple parallel
// loads, and provides a wait for page.
std::mutex pages_mutex_;
// Mutex that protects other data members that callers want to access without
// waiting for a load operation.
mutable std::mutex general_mutex_;
// Thread which loads document.
std::thread thread;
};
// A collection of DocumentData that knows roughly how much memory it is using.
// Note that while it supports background read-ahead, it assumes that a single
// thread is accessing documents, ie it is not safe for multiple threads to
// access different documents in parallel, as one may de-cache the other's
// content.
class DocumentCache {
public:
TESS_API
explicit DocumentCache(int64_t max_memory);
TESS_API
~DocumentCache();
// Deletes all existing documents from the cache.
void Clear() {
for (auto *document : documents_) {
delete document;
}
documents_.clear();
num_pages_per_doc_ = 0;
}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
TESS_API
bool LoadDocuments(const std::vector<std::string> &filenames, CachingStrategy cache_strategy,
FileReader reader);
// Adds document to the cache.
bool AddToCache(DocumentData *data);
// Finds and returns a document by name.
DocumentData *FindDocument(const std::string &document_name) const;
// Returns a page by serial number using the current cache_strategy_ to
// determine the mapping from serial number to page.
const ImageData *GetPageBySerial(int serial) {
if (cache_strategy_ == CS_SEQUENTIAL) {
return GetPageSequential(serial);
} else {
return GetPageRoundRobin(serial);
}
}
const std::vector<DocumentData *> &documents() const {
return documents_;
}
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
// strategy, could take a long time.
TESS_API
int TotalPages();
private:
// Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents. Highly disk-intensive, but doesn't need samples
// to be shuffled between files to begin with.
TESS_API
const ImageData *GetPageRoundRobin(int serial);
// Returns a page by serial number, selecting them in sequence from each file.
// Requires the samples to be shuffled between the files to give a random or
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
TESS_API
const ImageData *GetPageSequential(int serial);
// Helper counts the number of adjacent cached neighbour documents_ of index
// looking in direction dir, ie index+dir, index+2*dir etc.
int CountNeighbourDocs(int index, int dir);
// A group of pages that corresponds in some loose way to a document.
std::vector<DocumentData *> documents_;
// Strategy to use for caching and serializing data samples.
CachingStrategy cache_strategy_ = CS_SEQUENTIAL;
// Number of pages in the first document, used as a divisor in
// GetPageSequential to determine the document index.
int num_pages_per_doc_ = 0;
// Max memory allowed in this cache.
int64_t max_memory_ = 0;
};
} // namespace tesseract
#endif // TESSERACT_IMAGE_IMAGEDATA_H_

View File

@ -0,0 +1,259 @@
/**********************************************************************
* File: linlsq.cpp (Formerly llsq.c)
* Description: Linear Least squares fitting code.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "linlsq.h"
#include <cmath> // for std::sqrt
#include <cstdio>
#include "errcode.h"
namespace tesseract {
constexpr ERRCODE EMPTY_LLSQ("Can't delete from an empty LLSQ");
/**********************************************************************
* LLSQ::clear
*
* Function to initialize a LLSQ.
**********************************************************************/
void LLSQ::clear() { // initialize
total_weight = 0.0; // no elements
sigx = 0.0; // update accumulators
sigy = 0.0;
sigxx = 0.0;
sigxy = 0.0;
sigyy = 0.0;
}
/**********************************************************************
* LLSQ::add
*
* Add an element to the accumulator.
**********************************************************************/
void LLSQ::add(double x, double y) { // add an element
total_weight++; // count elements
sigx += x; // update accumulators
sigy += y;
sigxx += x * x;
sigxy += x * y;
sigyy += y * y;
}
// Adds an element with a specified weight.
void LLSQ::add(double x, double y, double weight) {
total_weight += weight;
sigx += x * weight; // update accumulators
sigy += y * weight;
sigxx += x * x * weight;
sigxy += x * y * weight;
sigyy += y * y * weight;
}
// Adds a whole LLSQ.
void LLSQ::add(const LLSQ &other) {
total_weight += other.total_weight;
sigx += other.sigx; // update accumulators
sigy += other.sigy;
sigxx += other.sigxx;
sigxy += other.sigxy;
sigyy += other.sigyy;
}
/**********************************************************************
* LLSQ::remove
*
* Delete an element from the acculuator.
**********************************************************************/
void LLSQ::remove(double x, double y) { // delete an element
if (total_weight <= 0.0) { // illegal
EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
}
total_weight--; // count elements
sigx -= x; // update accumulators
sigy -= y;
sigxx -= x * x;
sigxy -= x * y;
sigyy -= y * y;
}
/**********************************************************************
* LLSQ::m
*
* Return the gradient of the line fit.
**********************************************************************/
double LLSQ::m() const { // get gradient
double covar = covariance();
double x_var = x_variance();
if (x_var != 0.0) {
return covar / x_var;
} else {
return 0.0; // too little
}
}
/**********************************************************************
* LLSQ::c
*
* Return the constant of the line fit.
**********************************************************************/
double LLSQ::c(double m) const { // get constant
if (total_weight > 0.0) {
return (sigy - m * sigx) / total_weight;
} else {
return 0; // too little
}
}
/**********************************************************************
* LLSQ::rms
*
* Return the rms error of the fit.
**********************************************************************/
double LLSQ::rms(double m, double c) const { // get error
double error; // total error
if (total_weight > 0) {
error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (total_weight * c - 2 * sigy);
if (error >= 0) {
error = std::sqrt(error / total_weight); // sqrt of mean
} else {
error = 0;
}
} else {
error = 0; // too little
}
return error;
}
/**********************************************************************
* LLSQ::pearson
*
* Return the pearson product moment correlation coefficient.
**********************************************************************/
double LLSQ::pearson() const { // get correlation
double r = 0.0; // Correlation is 0 if insufficient data.
double covar = covariance();
if (covar != 0.0) {
double var_product = x_variance() * y_variance();
if (var_product > 0.0) {
r = covar / std::sqrt(var_product);
}
}
return r;
}
// Returns the x,y means as an FCOORD.
FCOORD LLSQ::mean_point() const {
if (total_weight > 0.0) {
return FCOORD(sigx / total_weight, sigy / total_weight);
} else {
return FCOORD(0.0f, 0.0f);
}
}
// Returns the sqrt of the mean squared error measured perpendicular from the
// line through mean_point() in the direction dir.
//
// Derivation:
// Lemma: Let v and x_i (i=1..N) be a k-dimensional vectors (1xk matrices).
// Let % be dot product and ' be transpose. Note that:
// Sum[i=1..N] (v % x_i)^2
// = v * [x_1' x_2' ... x_N'] * [x_1' x_2' .. x_N']' * v'
// If x_i have average 0 we have:
// = v * (N * COVARIANCE_MATRIX(X)) * v'
// Expanded for the case that k = 2, where we treat the dimensions
// as x_i and y_i, this is:
// = v * (N * [VAR(X), COV(X,Y); COV(X,Y) VAR(Y)]) * v'
// Now, we are trying to calculate the mean squared error, where v is
// perpendicular to our line of interest:
// Mean squared error
// = E [ (v % (x_i - x_avg))) ^2 ]
// = Sum (v % (x_i - x_avg))^2 / N
// = v * N * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] / N * v'
// = v * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] * v'
// = code below
double LLSQ::rms_orth(const FCOORD &dir) const {
FCOORD v = !dir;
v.normalise();
return std::sqrt(x_variance() * v.x() * v.x() + 2 * covariance() * v.x() * v.y() +
y_variance() * v.y() * v.y());
}
// Returns the direction of the fitted line as a unit vector, using the
// least mean squared perpendicular distance. The line runs through the
// mean_point, i.e. a point p on the line is given by:
// p = mean_point() + lambda * vector_fit() for some real number lambda.
// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
// and may be negated without changing its meaning.
// Fitting a line m + 𝜆v to a set of N points Pi = (xi, yi), where
// m is the mean point (𝝁, 𝝂) and
// v is the direction vector (cos𝜃, sin𝜃)
// The perpendicular distance of each Pi from the line is:
// (Pi - m) x v, where x is the scalar cross product.
// Total squared error is thus:
// E = ∑((xi - 𝝁)sin𝜃 - (yi - 𝝂)cos𝜃)²
// = ∑(xi - 𝝁)²sin²𝜃 - 2∑(xi - 𝝁)(yi - 𝝂)sin𝜃 cos𝜃 + ∑(yi - 𝝂)²cos²𝜃
// = NVar(xi)sin²𝜃 - 2NCovar(xi, yi)sin𝜃 cos𝜃 + NVar(yi)cos²𝜃 (Eq 1)
// where Var(xi) is the variance of xi,
// and Covar(xi, yi) is the covariance of xi, yi.
// Taking the derivative wrt 𝜃 and setting to 0 to obtain the min/max:
// 0 = 2NVar(xi)sin𝜃 cos𝜃 -2NCovar(xi, yi)(cos²𝜃 - sin²𝜃) -2NVar(yi)sin𝜃 cos𝜃
// => Covar(xi, yi)(cos²𝜃 - sin²𝜃) = (Var(xi) - Var(yi))sin𝜃 cos𝜃
// Using double angles:
// 2Covar(xi, yi)cos2𝜃 = (Var(xi) - Var(yi))sin2𝜃 (Eq 2)
// So 𝜃 = 0.5 atan2(2Covar(xi, yi), Var(xi) - Var(yi)) (Eq 3)
// Because it involves 2𝜃 , Eq 2 has 2 solutions 90 degrees apart, but which
// is the min and which is the max? From Eq1:
// E/N = Var(xi)sin²𝜃 - 2Covar(xi, yi)sin𝜃 cos𝜃 + Var(yi)cos²𝜃
// and 90 degrees away, using sin/cos equivalences:
// E'/N = Var(xi)cos²𝜃 + 2Covar(xi, yi)sin𝜃 cos𝜃 + Var(yi)sin²𝜃
// The second error is smaller (making it the minimum) iff
// E'/N < E/N ie:
// (Var(xi) - Var(yi))(cos²𝜃 - sin²𝜃) < -4Covar(xi, yi)sin𝜃 cos𝜃
// Using double angles:
// (Var(xi) - Var(yi))cos2𝜃 < -2Covar(xi, yi)sin2𝜃 (InEq 1)
// But atan2(2Covar(xi, yi), Var(xi) - Var(yi)) picks 2𝜃 such that:
// sgn(cos2𝜃) = sgn(Var(xi) - Var(yi)) and sgn(sin2𝜃) = sgn(Covar(xi, yi))
// so InEq1 can *never* be true, making the atan2 result *always* the min!
// In the degenerate case, where Covar(xi, yi) = 0 AND Var(xi) = Var(yi),
// the 2 solutions have equal error and the inequality is still false.
// Therefore the solution really is as trivial as Eq 3.
// This is equivalent to returning the Principal Component in PCA, or the
// eigenvector corresponding to the largest eigenvalue in the covariance
// matrix. However, atan2 is much simpler! The one reference I found that
// uses this formula is http://web.mit.edu/18.06/www/Essays/tlsfit.pdf but
// that is still a much more complex derivation. It seems Pearson had already
// found this simple solution in 1901.
// http://books.google.com/books?id=WXwvAQAAIAAJ&pg=PA559
FCOORD LLSQ::vector_fit() const {
double x_var = x_variance();
double y_var = y_variance();
double covar = covariance();
double theta = 0.5 * atan2(2.0 * covar, x_var - y_var);
FCOORD result(cos(theta), sin(theta));
return result;
}
} // namespace tesseract

View File

@ -0,0 +1,142 @@
/**********************************************************************
* File: linlsq.h (Formerly llsq.h)
* Description: Linear Least squares fitting code.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCSTRUCT_LINLSQ_H_
#define TESSERACT_CCSTRUCT_LINLSQ_H_
#include "points.h" // for FCOORD
#include <algorithm> // for std::nth_element
#include <cstdint> // for int32_t
namespace tesseract {
class TESS_API LLSQ {
public:
LLSQ() { // constructor
clear(); // set to zeros
}
void clear(); // initialize
// Adds an element with a weight of 1.
void add(double x, double y);
// Adds an element with a specified weight.
void add(double x, double y, double weight);
// Adds a whole LLSQ.
void add(const LLSQ &other);
// Deletes an element with a weight of 1.
void remove(double x, double y);
int32_t count() const { // no of elements
return static_cast<int>(total_weight + 0.5);
}
double m() const; // get gradient
double c(double m) const; // get constant
double rms(double m, double c) const; // get error
double pearson() const; // get correlation coefficient.
// Returns the x,y means as an FCOORD.
FCOORD mean_point() const;
// Returns the average sum of squared perpendicular error from a line
// through mean_point() in the direction dir.
double rms_orth(const FCOORD &dir) const;
// Returns the direction of the fitted line as a unit vector, using the
// least mean squared perpendicular distance. The line runs through the
// mean_point, i.e. a point p on the line is given by:
// p = mean_point() + lambda * vector_fit() for some real number lambda.
// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
// and may be negated without changing its meaning, since a line is only
// unique to a range of pi radians.
// Modernists prefer to think of this as an Eigenvalue problem, but
// Pearson had the simple solution in 1901.
//
// Note that this is equivalent to returning the Principal Component in PCA,
// or the eigenvector corresponding to the largest eigenvalue in the
// covariance matrix.
FCOORD vector_fit() const;
// Returns the covariance.
double covariance() const {
if (total_weight > 0.0) {
return (sigxy - sigx * sigy / total_weight) / total_weight;
} else {
return 0.0;
}
}
double x_variance() const {
if (total_weight > 0.0) {
return (sigxx - sigx * sigx / total_weight) / total_weight;
} else {
return 0.0;
}
}
double y_variance() const {
if (total_weight > 0.0) {
return (sigyy - sigy * sigy / total_weight) / total_weight;
} else {
return 0.0;
}
}
private:
double total_weight; // no of elements or sum of weights.
double sigx; // sum of x
double sigy; // sum of y
double sigxx; // sum x squared
double sigxy; // sum of xy
double sigyy; // sum y squared
};
// Returns the median value of the vector, given that the values are
// circular, with the given modulus. Values may be signed or unsigned,
// eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi).
// NOTE that the array is shuffled, but the time taken is linear.
// An assumption is made that most of the values are spread over no more than
// half the range, but wrap-around is accounted for if the median is near
// the wrap-around point.
// Cannot be a member of vector, as it makes heavy use of LLSQ.
// T must be an integer or float/double type.
template <typename T>
T MedianOfCircularValues(T modulus, std::vector<T> &v) {
LLSQ stats;
T halfrange = static_cast<T>(modulus / 2);
auto num_elements = v.size();
for (auto i : v) {
stats.add(i, i + halfrange);
}
bool offset_needed = stats.y_variance() < stats.x_variance();
if (offset_needed) {
for (auto i : v) {
i += halfrange;
}
}
auto median_index = num_elements / 2;
std::nth_element(v.begin(), v.begin() + median_index, v.end());
if (offset_needed) {
for (auto i : v) {
i -= halfrange;
}
}
return v[median_index];
}
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_LINLSQ_H_

View File

@ -0,0 +1,170 @@
/******************************************************************************
*
* File: matrix.cpp (Formerly matrix.c)
* Description: Ratings matrix code. (Used by associator)
* Author: Mark Seaman, OCR Technology
*
* (c) Copyright 1990, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "matrix.h"
#include "ratngs.h"
#include "tprintf.h"
#include "unicharset.h"
namespace tesseract {
// Destructor.
// It is defined here, so the compiler can create a single vtable
// instead of weak vtables in every compilation unit.
MATRIX::~MATRIX() = default;
// Returns true if there are any real classification results.
bool MATRIX::Classified(int col, int row, int wildcard_id) const {
if (get(col, row) == NOT_CLASSIFIED) {
return false;
}
BLOB_CHOICE_IT b_it(get(col, row));
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOB_CHOICE *choice = b_it.data();
if (choice->IsClassified()) {
return true;
}
}
return false;
}
// Expands the existing matrix in-place to make the band wider, without
// losing any existing data.
void MATRIX::IncreaseBandSize(int bandwidth) {
ResizeWithCopy(dimension(), bandwidth);
}
// Returns a bigger MATRIX with a new column and row in the matrix in order
// to split the blob at the given (ind,ind) diagonal location.
// Entries are relocated to the new MATRIX using the transformation defined
// by MATRIX_COORD::MapForSplit.
// Transfers the pointer data to the new MATRIX and deletes *this.
MATRIX *MATRIX::ConsumeAndMakeBigger(int ind) {
int dim = dimension();
int band_width = bandwidth();
// Check to see if bandwidth needs expanding.
for (int col = ind; col >= 0 && col > ind - band_width; --col) {
if (array_[col * band_width + band_width - 1] != empty_) {
++band_width;
break;
}
}
auto *result = new MATRIX(dim + 1, band_width);
for (int col = 0; col < dim; ++col) {
for (int row = col; row < dim && row < col + bandwidth(); ++row) {
MATRIX_COORD coord(col, row);
coord.MapForSplit(ind);
BLOB_CHOICE_LIST *choices = get(col, row);
if (choices != nullptr) {
// Correct matrix location on each choice.
BLOB_CHOICE_IT bc_it(choices);
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
BLOB_CHOICE *choice = bc_it.data();
choice->set_matrix_cell(coord.col, coord.row);
}
ASSERT_HOST(coord.Valid(*result));
result->put(coord.col, coord.row, choices);
}
}
}
delete this;
return result;
}
// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
// on the lists, but not any LanguageModelState that may be attached to the
// BLOB_CHOICEs.
MATRIX *MATRIX::DeepCopy() const {
int dim = dimension();
int band_width = bandwidth();
auto *result = new MATRIX(dim, band_width);
for (int col = 0; col < dim; ++col) {
for (int row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST *choices = get(col, row);
if (choices != nullptr) {
auto *copy_choices = new BLOB_CHOICE_LIST;
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
result->put(col, row, copy_choices);
}
}
}
return result;
}
// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) const {
tprintf("Ratings Matrix (top 3 choices)\n");
int dim = dimension();
int band_width = bandwidth();
int row, col;
for (col = 0; col < dim; ++col) {
for (row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating == NOT_CLASSIFIED) {
continue;
}
BLOB_CHOICE_IT b_it(rating);
tprintf("col=%d row=%d ", col, row);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s rat=%g cert=%g ", unicharset.id_to_unichar(b_it.data()->unichar_id()),
b_it.data()->rating(), b_it.data()->certainty());
}
tprintf("\n");
}
tprintf("\n");
}
tprintf("\n");
for (col = 0; col < dim; ++col) {
tprintf("\t%d", col);
}
tprintf("\n");
for (row = 0; row < dim; ++row) {
for (col = 0; col <= row; ++col) {
if (col == 0) {
tprintf("%d\t", row);
}
if (row >= col + band_width) {
tprintf(" \t");
continue;
}
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating != NOT_CLASSIFIED) {
BLOB_CHOICE_IT b_it(rating);
int counter = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
++counter;
if (counter == 3) {
break;
}
}
tprintf("\t");
} else {
tprintf(" \t");
}
}
tprintf("\n");
}
}
} // namespace tesseract

View File

@ -0,0 +1,728 @@
/******************************************************************************
* File: matrix.h
* Description: Generic 2-d array/matrix and banded triangular matrix class.
* Author: Ray Smith
* TODO(rays) Separate from ratings matrix, which it also contains:
*
* Description: Ratings matrix class (specialization of banded matrix).
* Segmentation search matrix of lists of BLOB_CHOICE.
* Author: Mark Seaman, OCR Technology
*
* (c) Copyright 1990, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
#ifndef TESSERACT_CCSTRUCT_MATRIX_H_
#define TESSERACT_CCSTRUCT_MATRIX_H_
#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for ReverseN, ClipToRange
#include "kdpair.h" // for KDPairInc
#include "points.h" // for ICOORD
#include "serialis.h" // for TFile
#include <algorithm> // for max, min
#include <cmath> // for sqrt, fabs, isfinite
#include <cstdint> // for int32_t
#include <cstdio> // for FILE
#include <cstring> // for memcpy
namespace tesseract {
class BLOB_CHOICE_LIST;
class UNICHARSET;
#define NOT_CLASSIFIED static_cast<BLOB_CHOICE_LIST *>(nullptr)
// A generic class to hold a 2-D matrix with entries of type T, but can also
// act as a base class for other implementations, such as a triangular or
// banded matrix.
template <class T>
class GENERIC_2D_ARRAY {
public:
// Initializes the array size, and empty element, but cannot allocate memory
// for the subclasses or initialize because calls to the num_elements
// member will be routed to the base class implementation. Subclasses can
// either pass the memory in, or allocate after by calling Resize().
GENERIC_2D_ARRAY(int dim1, int dim2, const T &empty, T *array)
: empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) {
size_allocated_ = dim1 * dim2;
}
// Original constructor for a full rectangular matrix DOES allocate memory
// and initialize it to empty.
GENERIC_2D_ARRAY(int dim1, int dim2, const T &empty) : empty_(empty), dim1_(dim1), dim2_(dim2) {
int new_size = dim1 * dim2;
array_ = new T[new_size];
size_allocated_ = new_size;
for (int i = 0; i < size_allocated_; ++i) {
array_[i] = empty_;
}
}
// Default constructor for array allocation. Use Resize to set the size.
GENERIC_2D_ARRAY()
: array_(nullptr), empty_(static_cast<T>(0)), dim1_(0), dim2_(0), size_allocated_(0) {}
GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY<T> &src)
: array_(nullptr), empty_(static_cast<T>(0)), dim1_(0), dim2_(0), size_allocated_(0) {
*this = src;
}
virtual ~GENERIC_2D_ARRAY() {
delete[] array_;
}
void operator=(const GENERIC_2D_ARRAY<T> &src) {
ResizeNoInit(src.dim1(), src.dim2());
int size = num_elements();
if (size > 0) {
memcpy(array_, src.array_, size * sizeof(array_[0]));
}
}
// Reallocates the array to the given size. Does not keep old data, but does
// not initialize the array either.
// The allocated memory is expanded on the end by pad, allowing deliberate
// access beyond the bounds of the array.
void ResizeNoInit(int size1, int size2, int pad = 0) {
int new_size = size1 * size2 + pad;
if (new_size > size_allocated_) {
delete[] array_;
array_ = new T[new_size];
size_allocated_ = new_size;
}
dim1_ = size1;
dim2_ = size2;
// Fill the padding data so it isn't uninitialized.
for (int i = size1 * size2; i < new_size; ++i) {
array_[i] = empty_;
}
}
// Reallocate the array to the given size. Does not keep old data.
void Resize(int size1, int size2, const T &empty) {
empty_ = empty;
ResizeNoInit(size1, size2);
Clear();
}
// Reallocate the array to the given size, keeping old data.
void ResizeWithCopy(int size1, int size2) {
if (size1 != dim1_ || size2 != dim2_) {
int new_size = size1 * size2;
T *new_array = new T[new_size];
for (int col = 0; col < size1; ++col) {
for (int row = 0; row < size2; ++row) {
int old_index = col * dim2() + row;
int new_index = col * size2 + row;
if (col < dim1_ && row < dim2_) {
new_array[new_index] = array_[old_index];
} else {
new_array[new_index] = empty_;
}
}
}
delete[] array_;
array_ = new_array;
dim1_ = size1;
dim2_ = size2;
size_allocated_ = new_size;
}
}
// Sets all the elements of the array to the empty value.
void Clear() {
int total_size = num_elements();
for (int i = 0; i < total_size; ++i) {
array_[i] = empty_;
}
}
// Writes to the given file. Returns false in case of error.
// Only works with bitwise-serializeable types!
bool Serialize(FILE *fp) const {
if (!SerializeSize(fp)) {
return false;
}
if (!tesseract::Serialize(fp, &empty_)) {
return false;
}
int size = num_elements();
return tesseract::Serialize(fp, &array_[0], size);
}
bool Serialize(TFile *fp) const {
if (!SerializeSize(fp)) {
return false;
}
if (!fp->Serialize(&empty_)) {
return false;
}
int size = num_elements();
return fp->Serialize(&array_[0], size);
}
// Reads from the given file. Returns false in case of error.
// Only works with bitwise-serializeable types!
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE *fp) {
if (!DeSerializeSize(swap, fp)) {
return false;
}
if (!tesseract::DeSerialize(fp, &empty_)) {
return false;
}
if (swap) {
ReverseN(&empty_, sizeof(empty_));
}
int size = num_elements();
if (!tesseract::DeSerialize(fp, &array_[0], size)) {
return false;
}
if (swap) {
for (int i = 0; i < size; ++i) {
ReverseN(&array_[i], sizeof(array_[i]));
}
}
return true;
}
bool DeSerialize(TFile *fp) {
return DeSerializeSize(fp) && fp->DeSerialize(&empty_) &&
fp->DeSerialize(&array_[0], num_elements());
}
// Writes to the given file. Returns false in case of error.
// Assumes a T::Serialize(FILE*) const function.
bool SerializeClasses(FILE *fp) const {
if (!SerializeSize(fp)) {
return false;
}
if (!empty_.Serialize(fp)) {
return false;
}
int size = num_elements();
for (int i = 0; i < size; ++i) {
if (!array_[i].Serialize(fp)) {
return false;
}
}
return true;
}
// Reads from the given file. Returns false in case of error.
// Assumes a T::DeSerialize(bool swap, FILE*) function.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeClasses(bool swap, FILE *fp) {
if (!DeSerializeSize(swap, fp)) {
return false;
}
if (!empty_.DeSerialize(swap, fp)) {
return false;
}
int size = num_elements();
for (int i = 0; i < size; ++i) {
if (!array_[i].DeSerialize(swap, fp)) {
return false;
}
}
return true;
}
// Provide the dimensions of this rectangular matrix.
int dim1() const {
return dim1_;
}
int dim2() const {
return dim2_;
}
// Returns the number of elements in the array.
// Banded/triangular matrices may override.
virtual int num_elements() const {
return dim1_ * dim2_;
}
// Expression to select a specific location in the matrix. The matrix is
// stored COLUMN-major, so the left-most index is the most significant.
// This allows [][] access to use indices in the same order as (,).
virtual int index(int column, int row) const {
return (column * dim2_ + row);
}
// Put a list element into the matrix at a specific location.
void put(ICOORD pos, const T &thing) {
array_[this->index(pos.x(), pos.y())] = thing;
}
void put(int column, int row, const T &thing) {
array_[this->index(column, row)] = thing;
}
// Get the item at a specified location from the matrix.
T get(ICOORD pos) const {
return array_[this->index(pos.x(), pos.y())];
}
T get(int column, int row) const {
return array_[this->index(column, row)];
}
// Return a reference to the element at the specified location.
const T &operator()(int column, int row) const {
return array_[this->index(column, row)];
}
T &operator()(int column, int row) {
return array_[this->index(column, row)];
}
// Allow access using array[column][row]. NOTE that the indices are
// in the same left-to-right order as the () indexing.
T *operator[](int column) {
return &array_[this->index(column, 0)];
}
const T *operator[](int column) const {
return &array_[this->index(column, 0)];
}
// Adds addend to *this, element-by-element.
void operator+=(const GENERIC_2D_ARRAY<T> &addend) {
if (dim2_ == addend.dim2_) {
// Faster if equal size in the major dimension.
int size = std::min(num_elements(), addend.num_elements());
for (int i = 0; i < size; ++i) {
array_[i] += addend.array_[i];
}
} else {
for (int x = 0; x < dim1_; x++) {
for (int y = 0; y < dim2_; y++) {
(*this)(x, y) += addend(x, y);
}
}
}
}
// Subtracts minuend from *this, element-by-element.
void operator-=(const GENERIC_2D_ARRAY<T> &minuend) {
if (dim2_ == minuend.dim2_) {
// Faster if equal size in the major dimension.
int size = std::min(num_elements(), minuend.num_elements());
for (int i = 0; i < size; ++i) {
array_[i] -= minuend.array_[i];
}
} else {
for (int x = 0; x < dim1_; x++) {
for (int y = 0; y < dim2_; y++) {
(*this)(x, y) -= minuend(x, y);
}
}
}
}
// Adds addend to all elements.
void operator+=(const T &addend) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] += addend;
}
}
// Multiplies *this by factor, element-by-element.
void operator*=(const T &factor) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] *= factor;
}
}
// Clips *this to the given range.
void Clip(const T &rangemin, const T &rangemax) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] = ClipToRange(array_[i], rangemin, rangemax);
}
}
// Returns true if all elements of *this are within the given range.
// Only uses operator<
bool WithinBounds(const T &rangemin, const T &rangemax) const {
int size = num_elements();
for (int i = 0; i < size; ++i) {
const T &value = array_[i];
if (value < rangemin || rangemax < value) {
return false;
}
}
return true;
}
// Normalize the whole array.
double Normalize() {
int size = num_elements();
if (size <= 0) {
return 0.0;
}
// Compute the mean.
double mean = 0.0;
for (int i = 0; i < size; ++i) {
mean += array_[i];
}
mean /= size;
// Subtract the mean and compute the standard deviation.
double sd = 0.0;
for (int i = 0; i < size; ++i) {
double normed = array_[i] - mean;
array_[i] = normed;
sd += normed * normed;
}
sd = sqrt(sd / size);
if (sd > 0.0) {
// Divide by the sd.
for (int i = 0; i < size; ++i) {
array_[i] /= sd;
}
}
return sd;
}
// Returns the maximum value of the array.
T Max() const {
int size = num_elements();
if (size <= 0) {
return empty_;
}
// Compute the max.
T max_value = array_[0];
for (int i = 1; i < size; ++i) {
const T &value = array_[i];
if (value > max_value) {
max_value = value;
}
}
return max_value;
}
// Returns the maximum absolute value of the array.
T MaxAbs() const {
int size = num_elements();
if (size <= 0) {
return empty_;
}
// Compute the max.
T max_abs = static_cast<T>(0);
for (int i = 0; i < size; ++i) {
T value = static_cast<T>(fabs(array_[i]));
if (value > max_abs) {
max_abs = value;
}
}
return max_abs;
}
// Accumulates the element-wise sums of squares of src into *this.
void SumSquares(const GENERIC_2D_ARRAY<T> &src, const T &decay_factor) {
T update_factor = 1.0 - decay_factor;
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] = array_[i] * decay_factor + update_factor * src.array_[i] * src.array_[i];
}
}
// Scales each element using the adam algorithm, ie array_[i] by
// sqrt(sqsum[i] + epsilon)).
void AdamUpdate(const GENERIC_2D_ARRAY<T> &sum, const GENERIC_2D_ARRAY<T> &sqsum,
const T &epsilon) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] += sum.array_[i] / (sqrt(sqsum.array_[i]) + epsilon);
}
}
void AssertFinite() const {
int size = num_elements();
for (int i = 0; i < size; ++i) {
ASSERT_HOST(isfinite(array_[i]));
}
}
// REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a
// num_dims-dimensional array/tensor with dimensions given by dims, (ordered
// from most significant to least significant, the same as standard C arrays)
// and moves src_dim to dest_dim, with the initial dest_dim and any dimensions
// in between shifted towards the hole left by src_dim. Example:
// Current data content: array_=[0, 1, 2, ....119]
// perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]...
// but the current dimensions are irrelevant.
// num_dims = 4, dims=[5, 4, 3, 2]
// src_dim=3, dest_dim=1
// tensor=[[[[0, 1][2, 3][4, 5]]
// [[6, 7][8, 9][10, 11]]
// [[12, 13][14, 15][16, 17]]
// [[18, 19][20, 21][22, 23]]]
// [[[24, 25]...
// output dims =[5, 2, 4, 3]
// output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
// [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
// [[[24, 26, 28]...
// which is stored in the array_ as:
// [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...]
// NOTE: the 2 stored matrix dimensions are simply copied from *this. To
// change the dimensions after the transpose, use ResizeNoInit.
// Higher dimensions above 2 are strictly the responsibility of the caller.
void RotatingTranspose(const int *dims, int num_dims, int src_dim, int dest_dim,
GENERIC_2D_ARRAY<T> *result) const {
int max_d = std::max(src_dim, dest_dim);
int min_d = std::min(src_dim, dest_dim);
// In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the
// ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1]
// being contiguous blocks of data that will move together, and
// [d0, min_d -1] being replicas of the transpose operation.
// num_replicas represents the large dimensions unchanged by the operation.
// move_size represents the small dimensions unchanged by the operation.
// src_step represents the stride in the src between each adjacent group
// in the destination.
int num_replicas = 1, move_size = 1, src_step = 1;
for (int d = 0; d < min_d; ++d) {
num_replicas *= dims[d];
}
for (int d = max_d + 1; d < num_dims; ++d) {
move_size *= dims[d];
}
for (int d = src_dim + 1; d < num_dims; ++d) {
src_step *= dims[d];
}
if (src_dim > dest_dim) {
src_step *= dims[src_dim];
}
// wrap_size is the size of a single replica, being the amount that is
// handled num_replicas times.
int wrap_size = move_size;
for (int d = min_d; d <= max_d; ++d) {
wrap_size *= dims[d];
}
result->ResizeNoInit(dim1_, dim2_);
result->empty_ = empty_;
const T *src = array_;
T *dest = result->array_;
for (int replica = 0; replica < num_replicas; ++replica) {
for (int start = 0; start < src_step; start += move_size) {
for (int pos = start; pos < wrap_size; pos += src_step) {
memcpy(dest, src + pos, sizeof(*dest) * move_size);
dest += move_size;
}
}
src += wrap_size;
}
}
// Delete objects pointed to by array_[i].
void delete_matrix_pointers() {
int size = num_elements();
for (int i = 0; i < size; ++i) {
T matrix_cell = array_[i];
if (matrix_cell != empty_) {
delete matrix_cell;
}
}
}
protected:
// Factored helper to serialize the size.
bool SerializeSize(FILE *fp) const {
uint32_t size = dim1_;
if (!tesseract::Serialize(fp, &size)) {
return false;
}
size = dim2_;
return tesseract::Serialize(fp, &size);
}
bool SerializeSize(TFile *fp) const {
uint32_t size = dim1_;
if (!fp->Serialize(&size)) {
return false;
}
size = dim2_;
return fp->Serialize(&size);
}
// Factored helper to deserialize the size.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeSize(bool swap, FILE *fp) {
uint32_t size1, size2;
if (!tesseract::DeSerialize(fp, &size1)) {
return false;
}
if (!tesseract::DeSerialize(fp, &size2)) {
return false;
}
if (swap) {
ReverseN(&size1, sizeof(size1));
ReverseN(&size2, sizeof(size2));
}
// Arbitrarily limit the number of elements to protect against bad data.
if (size1 > UINT16_MAX) {
return false;
}
if (size2 > UINT16_MAX) {
return false;
}
Resize(size1, size2, empty_);
return true;
}
bool DeSerializeSize(TFile *fp) {
int32_t size1, size2;
if (!fp->DeSerialize(&size1)) {
return false;
}
if (!fp->DeSerialize(&size2)) {
return false;
}
// Arbitrarily limit the number of elements to protect against bad data.
if (size1 > UINT16_MAX) {
return false;
}
if (size2 > UINT16_MAX) {
return false;
}
Resize(size1, size2, empty_);
return true;
}
T *array_;
T empty_; // The unused cell.
int dim1_; // Size of the 1st dimension in indexing functions.
int dim2_; // Size of the 2nd dimension in indexing functions.
// The total size to which the array can be expanded before a realloc is
// needed. If Resize is used, memory is retained so it can be re-expanded
// without a further alloc, and this stores the allocated size.
int size_allocated_;
};
// A generic class to store a banded triangular matrix with entries of type T.
// In this array, the nominally square matrix is dim1_ x dim1_, and dim2_ is
// the number of bands, INCLUDING the diagonal. The storage is thus of size
// dim1_ * dim2_ and index(col, row) = col * dim2_ + row - col, and an
// assert will fail if row < col or row - col >= dim2.
template <class T>
class BandTriMatrix : public GENERIC_2D_ARRAY<T> {
public:
// Allocate a piece of memory to hold a 2d-array of the given dimension.
// Initialize all the elements of the array to empty instead of assuming
// that a default constructor can be used.
BandTriMatrix(int dim1, int dim2, const T &empty) : GENERIC_2D_ARRAY<T>(dim1, dim2, empty) {}
// The default destructor will do.
// Provide the dimensions of this matrix.
// dimension is the size of the nominally square matrix.
int dimension() const {
return this->dim1_;
}
// bandwidth is the number of bands in the matrix, INCLUDING the diagonal.
int bandwidth() const {
return this->dim2_;
}
// Expression to select a specific location in the matrix. The matrix is
// stored COLUMN-major, so the left-most index is the most significant.
// This allows [][] access to use indices in the same order as (,).
int index(int column, int row) const override {
ASSERT_HOST(row >= column);
ASSERT_HOST(row - column < this->dim2_);
return column * this->dim2_ + row - column;
}
// Appends array2 corner-to-corner to *this, making an array of dimension
// equal to the sum of the individual dimensions.
// array2 is not destroyed, but is left empty, as all elements are moved
// to *this.
void AttachOnCorner(BandTriMatrix<T> *array2) {
int new_dim1 = this->dim1_ + array2->dim1_;
int new_dim2 = std::max(this->dim2_, array2->dim2_);
T *new_array = new T[new_dim1 * new_dim2];
for (int col = 0; col < new_dim1; ++col) {
for (int j = 0; j < new_dim2; ++j) {
int new_index = col * new_dim2 + j;
if (col < this->dim1_ && j < this->dim2_) {
new_array[new_index] = this->get(col, col + j);
} else if (col >= this->dim1_ && j < array2->dim2_) {
new_array[new_index] = array2->get(col - this->dim1_, col - this->dim1_ + j);
array2->put(col - this->dim1_, col - this->dim1_ + j, nullptr);
} else {
new_array[new_index] = this->empty_;
}
}
}
delete[] this->array_;
this->array_ = new_array;
this->dim1_ = new_dim1;
this->dim2_ = new_dim2;
}
};
class MATRIX : public BandTriMatrix<BLOB_CHOICE_LIST *> {
public:
MATRIX(int dimension, int bandwidth)
: BandTriMatrix<BLOB_CHOICE_LIST *>(dimension, bandwidth, NOT_CLASSIFIED) {}
~MATRIX() override;
// Returns true if there are any real classification results.
bool Classified(int col, int row, int wildcard_id) const;
// Expands the existing matrix in-place to make the band wider, without
// losing any existing data.
void IncreaseBandSize(int bandwidth);
// Returns a bigger MATRIX with a new column and row in the matrix in order
// to split the blob at the given (ind,ind) diagonal location.
// Entries are relocated to the new MATRIX using the transformation defined
// by MATRIX_COORD::MapForSplit.
// Transfers the pointer data to the new MATRIX and deletes *this.
MATRIX *ConsumeAndMakeBigger(int ind);
// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
// on the lists, but not any LanguageModelState that may be attached to the
// BLOB_CHOICEs.
MATRIX *DeepCopy() const;
// Print a shortened version of the contents of the matrix.
void print(const UNICHARSET &unicharset) const;
};
struct MATRIX_COORD {
static void Delete(void *arg) {
auto *c = static_cast<MATRIX_COORD *>(arg);
delete c;
}
// Default constructor required by GenericHeap.
MATRIX_COORD() : col(0), row(0) {}
MATRIX_COORD(int c, int r) : col(c), row(r) {}
~MATRIX_COORD() = default;
bool Valid(const MATRIX &m) const {
return 0 <= col && col < m.dimension() && col <= row && row < col + m.bandwidth() &&
row < m.dimension();
}
// Remaps the col,row pair to split the blob at the given (ind,ind) diagonal
// location.
// Entries at (i,j) for i in [0,ind] and j in [ind,dim) move to (i,j+1),
// making a new row at ind.
// Entries at (i,j) for i in [ind+1,dim) and j in [i,dim) move to (i+i,j+1),
// making a new column at ind+1.
void MapForSplit(int ind) {
ASSERT_HOST(row >= col);
if (col > ind) {
++col;
}
if (row >= ind) {
++row;
}
ASSERT_HOST(row >= col);
}
int col;
int row;
};
// The MatrixCoordPair contains a MATRIX_COORD and its priority.
using MatrixCoordPair = KDPairInc<float, MATRIX_COORD>;
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_MATRIX_H_

View File

@ -0,0 +1,77 @@
/**********************************************************************
* File: mod128.cpp (Formerly dir128.c)
* Description: Code to convert a DIR128 to an ICOORD.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mod128.h"
namespace tesseract {
static const int16_t idirtab[] = {
1000, 0, 998, 49, 995, 98, 989, 146, 980, 195, 970, 242, 956, 290, 941,
336, 923, 382, 903, 427, 881, 471, 857, 514, 831, 555, 803, 595, 773, 634,
740, 671, 707, 707, 671, 740, 634, 773, 595, 803, 555, 831, 514, 857, 471,
881, 427, 903, 382, 923, 336, 941, 290, 956, 242, 970, 195, 980, 146, 989,
98, 995, 49, 998, 0, 1000, -49, 998, -98, 995, -146, 989, -195, 980, -242,
970, -290, 956, -336, 941, -382, 923, -427, 903, -471, 881, -514, 857, -555, 831,
-595, 803, -634, 773, -671, 740, -707, 707, -740, 671, -773, 634, -803, 595, -831,
555, -857, 514, -881, 471, -903, 427, -923, 382, -941, 336, -956, 290, -970, 242,
-980, 195, -989, 146, -995, 98, -998, 49, -1000, 0, -998, -49, -995, -98, -989,
-146, -980, -195, -970, -242, -956, -290, -941, -336, -923, -382, -903, -427, -881, -471,
-857, -514, -831, -555, -803, -595, -773, -634, -740, -671, -707, -707, -671, -740, -634,
-773, -595, -803, -555, -831, -514, -857, -471, -881, -427, -903, -382, -923, -336, -941,
-290, -956, -242, -970, -195, -980, -146, -989, -98, -995, -49, -998, 0, -1000, 49,
-998, 98, -995, 146, -989, 195, -980, 242, -970, 290, -956, 336, -941, 382, -923,
427, -903, 471, -881, 514, -857, 555, -831, 595, -803, 634, -773, 671, -740, 707,
-707, 740, -671, 773, -634, 803, -595, 831, -555, 857, -514, 881, -471, 903, -427,
923, -382, 941, -336, 956, -290, 970, -242, 980, -195, 989, -146, 995, -98, 998,
-49};
static const ICOORD *dirtab = reinterpret_cast<const ICOORD *>(idirtab);
/**********************************************************************
* DIR128::DIR128
*
* Quantize the direction of an FCOORD to make a DIR128.
**********************************************************************/
DIR128::DIR128( // from fcoord
const FCOORD fc // vector to quantize
) {
int high, low, current; // binary search
low = 0;
if (fc.y() == 0) {
if (fc.x() >= 0) {
dir = 0;
} else {
dir = MODULUS / 2;
}
return;
}
high = MODULUS;
do {
current = (high + low) / 2;
if (dirtab[current] * fc >= 0) {
low = current;
} else {
high = current;
}
} while (high - low > 1);
dir = low;
}
} // namespace tesseract

View File

@ -0,0 +1,90 @@
/**********************************************************************
* File: mod128.h (Formerly dir128.h)
* Description: Header for class which implements modulo arithmetic.
* Author: Ray Smith
* Created: Tue Mar 26 17:48:13 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef MOD128_H
#define MOD128_H
#include "points.h"
namespace tesseract {
#define MODULUS 128 /*range of directions */
#define DIRBITS 7 // no of bits used
#define DIRSCALE 1000 // length of vector
class DIR128 {
public:
DIR128() = default;
DIR128( // constructor
int16_t value) { // value to assign
value %= MODULUS; // modulo arithmetic
if (value < 0) {
value += MODULUS; // done properly
}
dir = static_cast<int8_t>(value);
}
DIR128(const FCOORD fc); // quantize vector
DIR128 &operator=( // assign of int16_t
int16_t value) { // value to assign
value %= MODULUS; // modulo arithmetic
if (value < 0) {
value += MODULUS; // done properly
}
dir = static_cast<int8_t>(value);
return *this;
}
int8_t operator-( // subtraction
const DIR128 &minus) const // for signed result
{
// result
int16_t result = dir - minus.dir;
if (result > MODULUS / 2) {
result -= MODULUS; // get in range
} else if (result < -MODULUS / 2) {
result += MODULUS;
}
return static_cast<int8_t>(result);
}
DIR128 operator+( // addition
const DIR128 &add) const // of itself
{
DIR128 result; // sum
result = dir + add.dir; // let = do the work
return result;
}
DIR128 &operator+=( // same as +
const DIR128 &add) {
*this = dir + add.dir; // let = do the work
return *this;
}
int8_t get_dir() const { // access function
return dir;
}
private:
int8_t dir; // a direction
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,575 @@
/**********************************************************************
* File: normalis.cpp (Formerly denorm.c)
* Description: Code for the DENORM class.
* Author: Ray Smith
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "normalis.h"
#include <allheaders.h>
#include "blobs.h"
#include "helpers.h"
#include "matrix.h"
#include "ocrblock.h"
#include "unicharset.h"
#include "werd.h"
#include <cfloat> // for FLT_MAX
#include <cstdlib>
namespace tesseract {
// Tolerance in pixels used for baseline and xheight on non-upper/lower scripts.
const int kSloppyTolerance = 4;
// Final tolerance in pixels added to the computed xheight range.
const float kFinalPixelTolerance = 0.125f;
DENORM::DENORM() {
Init();
}
DENORM::DENORM(const DENORM &src) {
rotation_ = nullptr;
*this = src;
}
DENORM &DENORM::operator=(const DENORM &src) {
Clear();
inverse_ = src.inverse_;
predecessor_ = src.predecessor_;
pix_ = src.pix_;
block_ = src.block_;
if (src.rotation_ == nullptr) {
rotation_ = nullptr;
} else {
rotation_ = new FCOORD(*src.rotation_);
}
x_origin_ = src.x_origin_;
y_origin_ = src.y_origin_;
x_scale_ = src.x_scale_;
y_scale_ = src.y_scale_;
final_xshift_ = src.final_xshift_;
final_yshift_ = src.final_yshift_;
return *this;
}
DENORM::~DENORM() {
Clear();
}
// Initializes the denorm for a transformation. For details see the large
// comment in normalis.h.
// Arguments:
// block: if not nullptr, then this is the first transformation, and
// block->re_rotation() needs to be used after the Denorm
// transformation to get back to the image coords.
// rotation: if not nullptr, apply this rotation after translation to the
// origin and scaling. (Usually a classify rotation.)
// predecessor: if not nullptr, then predecessor has been applied to the
// input space and needs to be undone to complete the inverse.
// The above pointers are not owned by this DENORM and are assumed to live
// longer than this denorm, except rotation, which is deep copied on input.
//
// x_origin: The x origin which will be mapped to final_xshift in the result.
// y_origin: The y origin which will be mapped to final_yshift in the result.
// Added to result of row->baseline(x) if not nullptr.
//
// x_scale: scale factor for the x-coordinate.
// y_scale: scale factor for the y-coordinate. Ignored if segs is given.
// Note that these scale factors apply to the same x and y system as the
// x-origin and y-origin apply, ie after any block rotation, but before
// the rotation argument is applied.
//
// final_xshift: The x component of the final translation.
// final_yshift: The y component of the final translation.
void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation,
const DENORM *predecessor, float x_origin, float y_origin,
float x_scale, float y_scale, float final_xshift,
float final_yshift) {
Clear();
block_ = block;
if (rotation == nullptr) {
rotation_ = nullptr;
} else {
rotation_ = new FCOORD(*rotation);
}
predecessor_ = predecessor;
x_origin_ = x_origin;
y_origin_ = y_origin;
x_scale_ = x_scale;
y_scale_ = y_scale;
final_xshift_ = final_xshift;
final_yshift_ = final_yshift;
}
// Helper for SetupNonLinear computes an image of shortest run-lengths from
// the x/y edges provided.
// Based on "A nonlinear normalization method for handprinted Kanji character
// recognition -- line density equalization" by Hiromitsu Yamada et al.
// Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding
// ______________ input x_coords and y_coords.
// | _________ | <empty>
// | | _ | | 1, 6
// | | | | | | 1, 3, 4, 6
// | | | | | | 1, 3, 4, 6
// | | | | | | 1, 3, 4, 6
// | | |_| | | 1, 3, 4, 6
// | |_________| | 1, 6
// |_____________| <empty>
// E 1 1 1 1 1 E
// m 7 7 2 7 7 m
// p 6 p
// t 7 t
// y y
// The output image contains the min of the x and y run-length (distance
// between edges) at each coordinate in the image thus:
// ______________
// |7 1_1_1_1_1 7|
// |1|5 5 1 5 5|1|
// |1|2 2|1|2 2|1|
// |1|2 2|1|2 2|1|
// |1|2 2|1|2 2|1|
// |1|2 2|1|2 2|1|
// |1|5_5_1_5_5|1|
// |7_1_1_1_1_1_7|
// Note that the input coords are all integer, so all partial pixels are dealt
// with elsewhere. Although it is nice for outlines to be properly connected
// and continuous, there is no requirement that they be as such, so they could
// have been derived from a flaky source, such as greyscale.
// This function works only within the provided box, and it is assumed that the
// input x_coords and y_coords have already been translated to have the bottom-
// left of box as the origin. Although an output, the minruns should have been
// pre-initialized to be the same size as box. Each element will contain the
// minimum of x and y run-length as shown above.
static void ComputeRunlengthImage(const TBOX &box,
const std::vector<std::vector<int>> &x_coords,
const std::vector<std::vector<int>> &y_coords,
GENERIC_2D_ARRAY<int> *minruns) {
int width = box.width();
int height = box.height();
ASSERT_HOST(minruns->dim1() == width);
ASSERT_HOST(minruns->dim2() == height);
// Set a 2-d image array to the run lengths at each pixel.
for (int ix = 0; ix < width; ++ix) {
int y = 0;
for (auto y_coord : y_coords[ix]) {
int y_edge = ClipToRange(y_coord, 0, height);
int gap = y_edge - y;
// Every pixel between the last and current edge get set to the gap.
while (y < y_edge) {
(*minruns)(ix, y) = gap;
++y;
}
}
// Pretend there is a bounding box of edges all around the image.
int gap = height - y;
while (y < height) {
(*minruns)(ix, y) = gap;
++y;
}
}
// Now set the image pixels the the MIN of the x and y runlengths.
for (int iy = 0; iy < height; ++iy) {
int x = 0;
for (auto x_coord : x_coords[iy]) {
int x_edge = ClipToRange(x_coord, 0, width);
int gap = x_edge - x;
while (x < x_edge) {
if (gap < (*minruns)(x, iy)) {
(*minruns)(x, iy) = gap;
}
++x;
}
}
int gap = width - x;
while (x < width) {
if (gap < (*minruns)(x, iy)) {
(*minruns)(x, iy) = gap;
}
++x;
}
}
}
// Converts the run-length image (see above to the edge density profiles used
// for scaling, thus:
// ______________
// |7 1_1_1_1_1 7| = 5.28
// |1|5 5 1 5 5|1| = 3.8
// |1|2 2|1|2 2|1| = 5
// |1|2 2|1|2 2|1| = 5
// |1|2 2|1|2 2|1| = 5
// |1|2 2|1|2 2|1| = 5
// |1|5_5_1_5_5|1| = 3.8
// |7_1_1_1_1_1_7| = 5.28
// 6 4 4 8 4 4 6
// . . . . . . .
// 2 4 4 0 4 4 2
// 8 8
// Each profile is the sum of the reciprocals of the pixels in the image in
// the appropriate row or column, and these are then normalized to sum to 1.
// On output hx, hy contain an extra element, which will eventually be used
// to guarantee that the top/right edge of the box (and anything beyond) always
// gets mapped to the maximum target coordinate.
static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<int> &minruns,
std::vector<float> &hx, std::vector<float> &hy) {
int width = box.width();
int height = box.height();
hx.resize(width + 1);
hy.resize(height + 1);
double total = 0.0;
for (int iy = 0; iy < height; ++iy) {
for (int ix = 0; ix < width; ++ix) {
int run = minruns(ix, iy);
if (run == 0) {
run = 1;
}
float density = 1.0f / run;
hx[ix] += density;
hy[iy] += density;
}
total += hy[iy];
}
// Normalize each profile to sum to 1.
if (total > 0.0) {
for (int ix = 0; ix < width; ++ix) {
hx[ix] /= total;
}
for (int iy = 0; iy < height; ++iy) {
hy[iy] /= total;
}
}
// There is an extra element in each array, so initialize to 1.
hx[width] = 1.0f;
hy[height] = 1.0f;
}
// Sets up the DENORM to execute a non-linear transformation based on
// preserving an even distribution of stroke edges. The transformation
// operates only within the given box.
// x_coords is a collection of the x-coords of vertical edges for each
// y-coord starting at box.bottom().
// y_coords is a collection of the y-coords of horizontal edges for each
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
// The second-level vectors must all be sorted in ascending order.
// See comments on the helper functions above for more details.
void DENORM::SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
float target_height, float final_xshift, float final_yshift,
const std::vector<std::vector<int>> &x_coords,
const std::vector<std::vector<int>> &y_coords) {
Clear();
predecessor_ = predecessor;
// x_map_ and y_map_ store a mapping from input x and y coordinate to output
// x and y coordinate, based on scaling to the supplied target_width and
// target_height.
x_map_ = new std::vector<float>;
y_map_ = new std::vector<float>;
// Set a 2-d image array to the run lengths at each pixel.
int width = box.width();
int height = box.height();
GENERIC_2D_ARRAY<int> minruns(width, height, 0);
ComputeRunlengthImage(box, x_coords, y_coords, &minruns);
// Edge density is the sum of the inverses of the run lengths. Compute
// edge density projection profiles.
ComputeEdgeDensityProfiles(box, minruns, *x_map_, *y_map_);
// Convert the edge density profiles to the coordinates by multiplying by
// the desired size and accumulating.
(*x_map_)[width] = target_width;
for (int x = width - 1; x >= 0; --x) {
(*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width;
}
(*y_map_)[height] = target_height;
for (int y = height - 1; y >= 0; --y) {
(*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height;
}
x_origin_ = box.left();
y_origin_ = box.bottom();
final_xshift_ = final_xshift;
final_yshift_ = final_yshift;
}
// Transforms the given coords one step forward to normalized space, without
// using any block rotation or predecessor.
void DENORM::LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const {
FCOORD src_pt(pt.x, pt.y);
FCOORD float_result;
LocalNormTransform(src_pt, &float_result);
transformed->x = IntCastRounded(float_result.x());
transformed->y = IntCastRounded(float_result.y());
}
void DENORM::LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const {
FCOORD translated(pt.x() - x_origin_, pt.y() - y_origin_);
if (x_map_ != nullptr && y_map_ != nullptr) {
int x = ClipToRange(IntCastRounded(translated.x()), 0, static_cast<int>(x_map_->size() - 1));
translated.set_x((*x_map_)[x]);
int y = ClipToRange(IntCastRounded(translated.y()), 0, static_cast<int>(y_map_->size() - 1));
translated.set_y((*y_map_)[y]);
} else {
translated.set_x(translated.x() * x_scale_);
translated.set_y(translated.y() * y_scale_);
if (rotation_ != nullptr) {
translated.rotate(*rotation_);
}
}
transformed->set_x(translated.x() + final_xshift_);
transformed->set_y(translated.y() + final_yshift_);
}
// Transforms the given coords forward to normalized space using the
// full transformation sequence defined by the block rotation, the
// predecessors, deepest first, and finally this. If first_norm is not nullptr,
// then the first and deepest transformation used is first_norm, ending
// with this, and the block rotation will not be applied.
void DENORM::NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const {
FCOORD src_pt(pt.x, pt.y);
FCOORD float_result;
NormTransform(first_norm, src_pt, &float_result);
transformed->x = IntCastRounded(float_result.x());
transformed->y = IntCastRounded(float_result.y());
}
void DENORM::NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const {
FCOORD src_pt(pt);
if (first_norm != this) {
if (predecessor_ != nullptr) {
predecessor_->NormTransform(first_norm, pt, &src_pt);
} else if (block_ != nullptr) {
FCOORD fwd_rotation(block_->re_rotation().x(), -block_->re_rotation().y());
src_pt.rotate(fwd_rotation);
}
}
LocalNormTransform(src_pt, transformed);
}
// Transforms the given coords one step back to source space, without
// using to any block rotation or predecessor.
void DENORM::LocalDenormTransform(const TPOINT &pt, TPOINT *original) const {
FCOORD src_pt(pt.x, pt.y);
FCOORD float_result;
LocalDenormTransform(src_pt, &float_result);
original->x = IntCastRounded(float_result.x());
original->y = IntCastRounded(float_result.y());
}
void DENORM::LocalDenormTransform(const FCOORD &pt, FCOORD *original) const {
FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_);
if (x_map_ != nullptr && y_map_ != nullptr) {
auto pos = std::upper_bound(x_map_->begin(), x_map_->end(), rotated.x());
if (pos > x_map_->begin()) {
--pos;
}
auto x = pos - x_map_->begin();
original->set_x(x + x_origin_);
pos = std::upper_bound(y_map_->begin(), y_map_->end(), rotated.y());
if (pos > y_map_->begin()) {
--pos;
}
auto y = pos - y_map_->begin();
original->set_y(y + y_origin_);
} else {
if (rotation_ != nullptr) {
FCOORD inverse_rotation(rotation_->x(), -rotation_->y());
rotated.rotate(inverse_rotation);
}
original->set_x(rotated.x() / x_scale_ + x_origin_);
float y_scale = y_scale_;
original->set_y(rotated.y() / y_scale + y_origin_);
}
}
// Transforms the given coords all the way back to source image space using
// the full transformation sequence defined by this and its predecessors
// recursively, shallowest first, and finally any block re_rotation.
// If last_denorm is not nullptr, then the last transformation used will
// be last_denorm, and the block re_rotation will never be executed.
void DENORM::DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const {
FCOORD src_pt(pt.x, pt.y);
FCOORD float_result;
DenormTransform(last_denorm, src_pt, &float_result);
original->x = IntCastRounded(float_result.x());
original->y = IntCastRounded(float_result.y());
}
void DENORM::DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const {
LocalDenormTransform(pt, original);
if (last_denorm != this) {
if (predecessor_ != nullptr) {
predecessor_->DenormTransform(last_denorm, *original, original);
} else if (block_ != nullptr) {
original->rotate(block_->re_rotation());
}
}
}
// Normalize a blob using blob transformations. Less accurate, but
// more accurately copies the old way.
void DENORM::LocalNormBlob(TBLOB *blob) const {
ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_));
blob->Move(translation);
if (y_scale_ != 1.0f) {
blob->Scale(y_scale_);
}
if (rotation_ != nullptr) {
blob->Rotate(*rotation_);
}
translation.set_x(IntCastRounded(final_xshift_));
translation.set_y(IntCastRounded(final_yshift_));
blob->Move(translation);
}
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used.
void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox,
float *min_xht, float *max_xht, float *yshift) const {
// Default return -- accept anything.
*yshift = 0.0f;
*min_xht = 0.0f;
*max_xht = FLT_MAX;
if (!unicharset.top_bottom_useful()) {
return;
}
// Clip the top and bottom to the limit of normalized feature space.
int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
// A tolerance of yscale corresponds to 1 pixel in the image.
double tolerance = y_scale();
// If the script doesn't have upper and lower-case characters, widen the
// tolerance to allow sloppy baseline/x-height estimates.
if (!unicharset.script_has_upper_lower()) {
tolerance = y_scale() * kSloppyTolerance;
}
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
// Calculate the scale factor we'll use to get to image y-pixels
double midx = (bbox.left() + bbox.right()) / 2.0;
double ydiff = (bbox.top() - bbox.bottom()) + 2.0;
FCOORD mid_bot(midx, bbox.bottom()), tmid_bot;
FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high;
DenormTransform(nullptr, mid_bot, &tmid_bot);
DenormTransform(nullptr, mid_high, &tmid_high);
// bln_y_measure * yscale = image_y_measure
double yscale = tmid_high.pt_to_pt_dist(tmid_bot) / ydiff;
// Calculate y-shift
int bln_yshift = 0, bottom_shift = 0, top_shift = 0;
if (bottom < min_bottom - tolerance) {
bottom_shift = bottom - min_bottom;
} else if (bottom > max_bottom + tolerance) {
bottom_shift = bottom - max_bottom;
}
if (top < min_top - tolerance) {
top_shift = top - min_top;
} else if (top > max_top + tolerance) {
top_shift = top - max_top;
}
if ((top_shift >= 0 && bottom_shift > 0) || (top_shift < 0 && bottom_shift < 0)) {
bln_yshift = (top_shift + bottom_shift) / 2;
}
*yshift = bln_yshift * yscale;
// To help very high cap/xheight ratio fonts accept the correct x-height,
// and to allow the large caps in small caps to accept the xheight of the
// small caps, add kBlnBaselineOffset to chars with a maximum max, and have
// a top already at a significantly high position.
if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2) {
max_top += kBlnBaselineOffset;
}
top -= bln_yshift;
int height = top - kBlnBaselineOffset;
double min_height = min_top - kBlnBaselineOffset - tolerance;
double max_height = max_top - kBlnBaselineOffset + tolerance;
// We shouldn't try calculations if the characters are very short (for example
// for punctuation).
if (min_height > kBlnXHeight / 8 && height > 0) {
float result = height * kBlnXHeight * yscale / min_height;
*max_xht = result + kFinalPixelTolerance;
result = height * kBlnXHeight * yscale / max_height;
*min_xht = result - kFinalPixelTolerance;
}
}
// Prints the content of the DENORM for debug purposes.
void DENORM::Print() const {
if (pix_ != nullptr) {
tprintf("Pix dimensions %d x %d x %d\n", pixGetWidth(pix_), pixGetHeight(pix_),
pixGetDepth(pix_));
}
if (inverse_) {
tprintf("Inverse\n");
}
if (block_ && block_->re_rotation().x() != 1.0f) {
tprintf("Block rotation %g, %g\n", block_->re_rotation().x(), block_->re_rotation().y());
}
tprintf("Input Origin = (%g, %g)\n", x_origin_, y_origin_);
if (x_map_ != nullptr && y_map_ != nullptr) {
tprintf("x map:\n");
for (auto x : *x_map_) {
tprintf("%g ", x);
}
tprintf("\ny map:\n");
for (auto y : *y_map_) {
tprintf("%g ", y);
}
tprintf("\n");
} else {
tprintf("Scale = (%g, %g)\n", x_scale_, y_scale_);
if (rotation_ != nullptr) {
tprintf("Rotation = (%g, %g)\n", rotation_->x(), rotation_->y());
}
}
tprintf("Final Origin = (%g, %g)\n", final_xshift_, final_xshift_);
if (predecessor_ != nullptr) {
tprintf("Predecessor:\n");
predecessor_->Print();
}
}
// ============== Private Code ======================
// Free allocated memory and clear pointers.
void DENORM::Clear() {
delete x_map_;
x_map_ = nullptr;
delete y_map_;
y_map_ = nullptr;
delete rotation_;
rotation_ = nullptr;
}
// Setup default values.
void DENORM::Init() {
inverse_ = false;
pix_ = nullptr;
block_ = nullptr;
rotation_ = nullptr;
predecessor_ = nullptr;
x_map_ = nullptr;
y_map_ = nullptr;
x_origin_ = 0.0f;
y_origin_ = 0.0f;
x_scale_ = 1.0f;
y_scale_ = 1.0f;
final_xshift_ = 0.0f;
final_yshift_ = static_cast<float>(kBlnBaselineOffset);
}
} // namespace tesseract

View File

@ -0,0 +1,310 @@
/**********************************************************************
* File: normalis.h (Formerly denorm.h)
* Description: Code for the DENORM class.
* Author: Ray Smith
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef NORMALIS_H
#define NORMALIS_H
#include "image.h"
#include <tesseract/export.h>
#include <vector>
struct Pix;
namespace tesseract {
const int kBlnCellHeight = 256; // Full-height for baseline normalization.
const int kBlnXHeight = 128; // x-height for baseline normalization.
const int kBlnBaselineOffset = 64; // offset for baseline normalization.
class BLOCK;
class FCOORD;
class TBOX;
class UNICHARSET;
struct TBLOB;
struct TPOINT;
// Possible normalization methods. Use NEGATIVE values as these also
// double up as markers for the last sub-classifier.
enum NormalizationMode {
NM_BASELINE = -3, // The original BL normalization mode.
NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
};
class TESS_API DENORM {
public:
DENORM();
// Copying a DENORM is allowed.
DENORM(const DENORM &);
DENORM &operator=(const DENORM &);
~DENORM();
// Setup the normalization transformation parameters.
// The normalizations applied to a blob are as follows:
// 1. An optional block layout rotation that was applied during layout
// analysis to make the textlines horizontal.
// 2. A normalization transformation (LocalNormTransform):
// Subtract the "origin"
// Apply an x,y scaling.
// Apply an optional rotation.
// Add back a final translation.
// The origin is in the block-rotated space, and is usually something like
// the x-middle of the word at the baseline.
// 3. Zero or more further normalization transformations that are applied
// in sequence, with a similar pattern to the first normalization transform.
//
// A DENORM holds the parameters of a single normalization, and can execute
// both the LocalNormTransform (a forwards normalization), and the
// LocalDenormTransform which is an inverse transform or de-normalization.
// A DENORM may point to a predecessor DENORM, which is actually the earlier
// normalization, so the full normalization sequence involves executing all
// predecessors first and then the transform in "this".
// Let x be image coordinates and that we have normalization classes A, B, C
// where we first apply A then B then C to get normalized x':
// x' = CBAx
// Then the backwards (to original coordinates) would be:
// x = A^-1 B^-1 C^-1 x'
// and A = B->predecessor_ and B = C->predecessor_
// NormTransform executes all predecessors recursively, and then this.
// NormTransform would be used to transform an image-based feature to
// normalized space for use in a classifier
// DenormTransform inverts this and then all predecessors. It can be
// used to get back to the original image coordinates from normalized space.
// The LocalNormTransform member executes just the transformation
// in "this" without the layout rotation or any predecessors. It would be
// used to run each successive normalization, eg the word normalization,
// and later the character normalization.
// Arguments:
// block: if not nullptr, then this is the first transformation, and
// block->re_rotation() needs to be used after the Denorm
// transformation to get back to the image coords.
// rotation: if not nullptr, apply this rotation after translation to the
// origin and scaling. (Usually a classify rotation.)
// predecessor: if not nullptr, then predecessor has been applied to the
// input space and needs to be undone to complete the inverse.
// The above pointers are not owned by this DENORM and are assumed to live
// longer than this denorm, except rotation, which is deep copied on input.
//
// x_origin: The x origin which will be mapped to final_xshift in the result.
// y_origin: The y origin which will be mapped to final_yshift in the result.
// Added to result of row->baseline(x) if not nullptr.
//
// x_scale: scale factor for the x-coordinate.
// y_scale: scale factor for the y-coordinate. Ignored if segs is given.
// Note that these scale factors apply to the same x and y system as the
// x-origin and y-origin apply, ie after any block rotation, but before
// the rotation argument is applied.
//
// final_xshift: The x component of the final translation.
// final_yshift: The y component of the final translation.
//
// In theory, any of the commonly used normalizations can be setup here:
// * Traditional baseline normalization on a word:
// SetupNormalization(block, nullptr, nullptr,
// box.x_middle(), baseline,
// kBlnXHeight / x_height, kBlnXHeight / x_height,
// 0, kBlnBaselineOffset);
// * "Numeric mode" baseline normalization on a word, in which the blobs
// are positioned with the bottom as the baseline is achieved by making
// a separate DENORM for each blob.
// SetupNormalization(block, nullptr, nullptr,
// box.x_middle(), box.bottom(),
// kBlnXHeight / x_height, kBlnXHeight / x_height,
// 0, kBlnBaselineOffset);
// * Anisotropic character normalization used by IntFx.
// SetupNormalization(nullptr, nullptr, denorm,
// centroid_x, centroid_y,
// 51.2 / ry, 51.2 / rx, 128, 128);
// * Normalize blob height to x-height (current OSD):
// SetupNormalization(nullptr, &rotation, nullptr,
// box.rotational_x_middle(rotation),
// box.rotational_y_middle(rotation),
// kBlnXHeight / box.rotational_height(rotation),
// kBlnXHeight / box.rotational_height(rotation),
// 0, kBlnBaselineOffset);
// * Secondary normalization for classification rotation (current):
// FCOORD rotation = block->classify_rotation();
// float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
// SetupNormalization(nullptr, &rotation, denorm,
// box.rotational_x_middle(rotation),
// box.rotational_y_middle(rotation),
// target_height / box.rotational_height(rotation),
// target_height / box.rotational_height(rotation),
// 0, kBlnBaselineOffset);
// * Proposed new normalizations for CJK: Between them there is then
// no need for further normalization at all, and the character fills the cell.
// ** Replacement for baseline normalization on a word:
// Scales height and width independently so that modal height and pitch
// fill the cell respectively.
// float cap_height = x_height / CCStruct::kXHeightCapRatio;
// SetupNormalization(block, nullptr, nullptr,
// box.x_middle(), cap_height / 2.0f,
// kBlnCellHeight / fixed_pitch,
// kBlnCellHeight / cap_height,
// 0, 0);
// ** Secondary normalization for classification (with rotation) (proposed):
// Requires a simple translation to the center of the appropriate character
// cell, no further scaling and a simple rotation (or nothing) about the
// cell center.
// FCOORD rotation = block->classify_rotation();
// SetupNormalization(nullptr, &rotation, denorm,
// fixed_pitch_cell_center,
// 0.0f,
// 1.0f,
// 1.0f,
// 0, 0);
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
float x_origin, float y_origin, float x_scale, float y_scale,
float final_xshift, float final_yshift);
// Sets up the DENORM to execute a non-linear transformation based on
// preserving an even distribution of stroke edges. The transformation
// operates only within the given box, scaling input coords within the box
// non-linearly to a box of target_width by target_height, with all other
// coords being clipped to the box edge. As with SetupNormalization above,
// final_xshift and final_yshift are applied after scaling, and the bottom-
// left of box is used as a pre-scaling origin.
// x_coords is a collection of the x-coords of vertical edges for each
// y-coord starting at box.bottom().
// y_coords is a collection of the y-coords of horizontal edges for each
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
// The second-level vectors must all be sorted in ascending order.
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
float target_height, float final_xshift, float final_yshift,
const std::vector<std::vector<int>> &x_coords,
const std::vector<std::vector<int>> &y_coords);
// Transforms the given coords one step forward to normalized space, without
// using any block rotation or predecessor.
void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const;
void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const;
// Transforms the given coords forward to normalized space using the
// full transformation sequence defined by the block rotation, the
// predecessors, deepest first, and finally this. If first_norm is not
// nullptr, then the first and deepest transformation used is first_norm,
// ending with this, and the block rotation will not be applied.
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const;
void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const;
// Transforms the given coords one step back to source space, without
// using to any block rotation or predecessor.
void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const;
void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const;
// Transforms the given coords all the way back to source image space using
// the full transformation sequence defined by this and its predecessors
// recursively, shallowest first, and finally any block re_rotation.
// If last_denorm is not nullptr, then the last transformation used will
// be last_denorm, and the block re_rotation will never be executed.
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const;
void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const;
// Normalize a blob using blob transformations. Less accurate, but
// more accurately copies the old way.
void LocalNormBlob(TBLOB *blob) const;
// Fills in the x-height range accepted by the given unichar_id in blob
// coordinates, given its bounding box in the usual baseline-normalized
// coordinates, with some initial crude x-height estimate (such as word
// size) and this denoting the transformation that was used.
// Also returns the amount the character must have shifted up or down.
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht,
float *max_xht, float *yshift) const;
// Prints the content of the DENORM for debug purposes.
void Print() const;
Image pix() const {
return pix_;
}
void set_pix(Image pix) {
pix_ = pix;
}
bool inverse() const {
return inverse_;
}
void set_inverse(bool value) {
inverse_ = value;
}
const DENORM *RootDenorm() const {
if (predecessor_ != nullptr) {
return predecessor_->RootDenorm();
}
return this;
}
const DENORM *predecessor() const {
return predecessor_;
}
// Accessors - perhaps should not be needed.
float x_scale() const {
return x_scale_;
}
float y_scale() const {
return y_scale_;
}
const BLOCK *block() const {
return block_;
}
void set_block(const BLOCK *block) {
block_ = block;
}
private:
// Free allocated memory and clear pointers.
void Clear();
// Setup default values.
void Init();
// Best available image.
Image pix_;
// True if the source image is white-on-black.
bool inverse_;
// Block the word came from. If not null, block->re_rotation() takes the
// "untransformed" coordinates even further back to the original image.
// Used only on the first DENORM in a chain.
const BLOCK *block_;
// Rotation to apply between translation to the origin and scaling.
const FCOORD *rotation_;
// Previous transformation in a chain.
const DENORM *predecessor_;
// Non-linear transformation maps directly from each integer offset from the
// origin to the corresponding x-coord. Owned by the DENORM.
std::vector<float> *x_map_;
// Non-linear transformation maps directly from each integer offset from the
// origin to the corresponding y-coord. Owned by the DENORM.
std::vector<float> *y_map_;
// x-coordinate to be mapped to final_xshift_ in the result.
float x_origin_;
// y-coordinate to be mapped to final_yshift_ in the result.
float y_origin_;
// Scale factors for x and y coords. Applied to pre-rotation system.
float x_scale_;
float y_scale_;
// Destination coords of the x_origin_ and y_origin_.
float final_xshift_;
float final_yshift_;
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,514 @@
/**********************************************************************
* File: ocrblock.cpp (Formerly block.c)
* Description: BLOCK member functions and iterator functions.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "ocrblock.h"
#include "stepblob.h"
#include "tprintf.h"
#include <cstdlib>
#include <memory> // std::unique_ptr
namespace tesseract {
/**
* BLOCK::BLOCK
*
* Constructor for a simple rectangular block.
*/
BLOCK::BLOCK(const char *name, ///< filename
bool prop, ///< proportional
int16_t kern, ///< kerning
int16_t space, ///< spacing
int16_t xmin, ///< bottom left
int16_t ymin, int16_t xmax, ///< top right
int16_t ymax)
: pdblk(xmin, ymin, xmax, ymax)
, filename(name)
, re_rotation_(1.0f, 0.0f)
, classify_rotation_(1.0f, 0.0f)
, skew_(1.0f, 0.0f) {
ICOORDELT_IT left_it = &pdblk.leftside;
ICOORDELT_IT right_it = &pdblk.rightside;
proportional = prop;
kerning = kern;
spacing = space;
font_class = -1; // not assigned
cell_over_xheight_ = 2.0f;
pdblk.hand_poly = nullptr;
left_it.set_to_list(&pdblk.leftside);
right_it.set_to_list(&pdblk.rightside);
// make default box
left_it.add_to_end(new ICOORDELT(xmin, ymin));
left_it.add_to_end(new ICOORDELT(xmin, ymax));
right_it.add_to_end(new ICOORDELT(xmax, ymin));
right_it.add_to_end(new ICOORDELT(xmax, ymax));
}
/**
* decreasing_top_order
*
* Sort Comparator: Return <0 if row1 top < row2 top
*/
static int decreasing_top_order(const void *row1, const void *row2) {
return (*reinterpret_cast<ROW *const *>(row2))->bounding_box().top() -
(*reinterpret_cast<ROW *const *>(row1))->bounding_box().top();
}
/**
* BLOCK::rotate
*
* Rotate the polygon by the given rotation and recompute the bounding_box.
*/
void BLOCK::rotate(const FCOORD &rotation) {
pdblk.poly_block()->rotate(rotation);
pdblk.box = *pdblk.poly_block()->bounding_box();
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX BLOCK::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
TBOX box;
// This is a read-only iteration of the rows in the block.
ROW_IT it(const_cast<ROW_LIST *>(&rows));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
}
return box;
}
/**
* BLOCK::reflect_polygon_in_y_axis
*
* Reflects the polygon in the y-axis and recompute the bounding_box.
* Does nothing to any contained rows/words/blobs etc.
*/
void BLOCK::reflect_polygon_in_y_axis() {
pdblk.poly_block()->reflect_in_y_axis();
pdblk.box = *pdblk.poly_block()->bounding_box();
}
/**
* BLOCK::sort_rows
*
* Order rows so that they are in order of decreasing Y coordinate
*/
void BLOCK::sort_rows() { // order on "top"
ROW_IT row_it(&rows);
row_it.sort(decreasing_top_order);
}
/**
* BLOCK::compress
*
* Delete space between the rows. (And maybe one day, compress the rows)
* Fill space of block from top down, left aligning rows.
*/
void BLOCK::compress() { // squash it up
#define ROW_SPACING 5
ROW_IT row_it(&rows);
ROW *row;
ICOORD row_spacing(0, ROW_SPACING);
ICOORDELT_IT icoordelt_it;
sort_rows();
pdblk.box = TBOX(pdblk.box.topleft(), pdblk.box.topleft());
pdblk.box.move_bottom_edge(ROW_SPACING);
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row = row_it.data();
row->move(pdblk.box.botleft() - row_spacing - row->bounding_box().topleft());
pdblk.box += row->bounding_box();
}
pdblk.leftside.clear();
icoordelt_it.set_to_list(&pdblk.leftside);
icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.bottom()));
icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.left(), pdblk.box.top()));
pdblk.rightside.clear();
icoordelt_it.set_to_list(&pdblk.rightside);
icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.bottom()));
icoordelt_it.add_to_end(new ICOORDELT(pdblk.box.right(), pdblk.box.top()));
}
/**
* BLOCK::check_pitch
*
* Check whether the block is fixed or prop, set the flag, and set
* the pitch if it is fixed.
*/
void BLOCK::check_pitch() { // check prop
// tprintf("Missing FFT fixed pitch stuff!\n");
pitch = -1;
}
/**
* BLOCK::compress
*
* Compress and move in a single operation.
*/
void BLOCK::compress( // squash it up
const ICOORD vec // and move
) {
pdblk.box.move(vec);
compress();
}
/**
* BLOCK::print
*
* Print the info on a block
*/
void BLOCK::print( // print list of sides
FILE *, ///< file to print on
bool dump ///< print full detail
) {
ICOORDELT_IT it = &pdblk.leftside; // iterator
pdblk.box.print();
tprintf("Proportional= %s\n", proportional ? "TRUE" : "FALSE");
tprintf("Kerning= %d\n", kerning);
tprintf("Spacing= %d\n", spacing);
tprintf("Fixed_pitch=%d\n", pitch);
tprintf("Filename= %s\n", filename.c_str());
if (dump) {
tprintf("Left side coords are:\n");
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
}
tprintf("\n");
tprintf("Right side coords are:\n");
it.set_to_list(&pdblk.rightside);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
}
tprintf("\n");
}
}
/**
* BLOCK::operator=
*
* Assignment - duplicate the block structure, but with an EMPTY row list.
*/
BLOCK &BLOCK::operator=( // assignment
const BLOCK &source // from this
) {
this->ELIST_LINK::operator=(source);
pdblk = source.pdblk;
proportional = source.proportional;
kerning = source.kerning;
spacing = source.spacing;
filename = source.filename; // STRINGs assign ok
if (!rows.empty()) {
rows.clear();
}
re_rotation_ = source.re_rotation_;
classify_rotation_ = source.classify_rotation_;
skew_ = source.skew_;
return *this;
}
// This function is for finding the approximate (horizontal) distance from
// the x-coordinate of the left edge of a symbol to the left edge of the
// text block which contains it. We are passed:
// segments - output of PB_LINE_IT::get_line() which contains x-coordinate
// intervals for the scan line going through the symbol's y-coordinate.
// Each element of segments is of the form (x()=start_x, y()=length).
// x - the x coordinate of the symbol we're interested in.
// margin - return value, the distance from x,y to the left margin of the
// block containing it.
// If all segments were to the right of x, we return false and 0.
static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
bool found = false;
*margin = 0;
if (segments->empty()) {
return found;
}
ICOORDELT_IT seg_it(segments);
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
int cur_margin = x - seg_it.data()->x();
if (cur_margin >= 0) {
if (!found) {
*margin = cur_margin;
} else if (cur_margin < *margin) {
*margin = cur_margin;
}
found = true;
}
}
return found;
}
// This function is for finding the approximate (horizontal) distance from
// the x-coordinate of the right edge of a symbol to the right edge of the
// text block which contains it. We are passed:
// segments - output of PB_LINE_IT::get_line() which contains x-coordinate
// intervals for the scan line going through the symbol's y-coordinate.
// Each element of segments is of the form (x()=start_x, y()=length).
// x - the x coordinate of the symbol we're interested in.
// margin - return value, the distance from x,y to the right margin of the
// block containing it.
// If all segments were to the left of x, we return false and 0.
static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
bool found = false;
*margin = 0;
if (segments->empty()) {
return found;
}
ICOORDELT_IT seg_it(segments);
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
if (cur_margin >= 0) {
if (!found) {
*margin = cur_margin;
} else if (cur_margin < *margin) {
*margin = cur_margin;
}
found = true;
}
}
return found;
}
// Compute the distance from the left and right ends of each row to the
// left and right edges of the block's polyblock. Illustration:
// ____________________________ _______________________
// | Howdy neighbor! | |rectangular blocks look|
// | This text is written to| |more like stacked pizza|
// |illustrate how useful poly- |boxes. |
// |blobs are in ----------- ------ The polyblob|
// |dealing with| _________ |for a BLOCK rec-|
// |harder layout| /===========\ |ords the possibly|
// |issues. | | _ _ | |skewed pseudo-|
// | You see this| | |_| \|_| | |rectangular |
// |text is flowed| | } | |boundary that|
// |around a mid-| \ ____ | |forms the ideal-|
// |cloumn portrait._____ \ / __|ized text margin|
// | Polyblobs exist| \ / |from which we should|
// |to account for insets| | | |measure paragraph|
// |which make otherwise| ----- |indentation. |
// ----------------------- ----------------------
//
// If we identify a drop-cap, we measure the left margin for the lines
// below the first line relative to one space past the drop cap. The
// first line's margin and those past the drop cap area are measured
// relative to the enclosing polyblock.
//
// TODO(rays): Before this will work well, we'll need to adjust the
// polyblob tighter around the text near images, as in:
// UNLV_AUTO:mag.3G0 page 2
// UNLV_AUTO:mag.3G4 page 16
void BLOCK::compute_row_margins() {
if (row_list()->empty() || row_list()->singleton()) {
return;
}
// If Layout analysis was not called, default to this.
POLY_BLOCK rect_block(pdblk.bounding_box(), PT_FLOWING_TEXT);
POLY_BLOCK *pblock = &rect_block;
if (pdblk.poly_block() != nullptr) {
pblock = pdblk.poly_block();
}
// Step One: Determine if there is a drop-cap.
// TODO(eger): Fix up drop cap code for RTL languages.
ROW_IT r_it(row_list());
ROW *first_row = r_it.data();
ROW *second_row = r_it.data_relative(1);
// initialize the bottom of a fictitious drop cap far above the first line.
int drop_cap_bottom = first_row->bounding_box().top() + first_row->bounding_box().height();
int drop_cap_right = first_row->bounding_box().left();
int mid_second_line = second_row->bounding_box().top() - second_row->bounding_box().height() / 2;
WERD_IT werd_it(r_it.data()->word_list()); // words of line one
if (!werd_it.empty()) {
C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
TBOX bbox = cblob_it.data()->bounding_box();
if (bbox.bottom() <= mid_second_line) {
// we found a real drop cap
first_row->set_has_drop_cap(true);
if (drop_cap_bottom > bbox.bottom()) {
drop_cap_bottom = bbox.bottom();
}
if (drop_cap_right < bbox.right()) {
drop_cap_right = bbox.right();
}
}
}
}
// Step Two: Calculate the margin from the text of each row to the block
// (or drop-cap) boundaries.
PB_LINE_IT lines(pblock);
r_it.set_to_list(row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW *row = r_it.data();
TBOX row_box = row->bounding_box();
int left_y = row->base_line(row_box.left()) + row->x_height();
int left_margin;
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y));
LeftMargin(segments_left.get(), row_box.left(), &left_margin);
if (row_box.top() >= drop_cap_bottom) {
int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
if (drop_cap_distance < 0) {
drop_cap_distance = 0;
}
if (drop_cap_distance < left_margin) {
left_margin = drop_cap_distance;
}
}
int right_y = row->base_line(row_box.right()) + row->x_height();
int right_margin;
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y));
RightMargin(segments_right.get(), row_box.right(), &right_margin);
row->set_lmargin(left_margin);
row->set_rmargin(right_margin);
}
}
/**********************************************************************
* PrintSegmentationStats
*
* Prints segmentation stats for the given block list.
**********************************************************************/
void PrintSegmentationStats(BLOCK_LIST *block_list) {
int num_blocks = 0;
int num_rows = 0;
int num_words = 0;
int num_blobs = 0;
BLOCK_IT block_it(block_list);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
BLOCK *block = block_it.data();
++num_blocks;
ROW_IT row_it(block->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
++num_rows;
ROW *row = row_it.data();
// Iterate over all werds in the row.
WERD_IT werd_it(row->word_list());
for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
WERD *werd = werd_it.data();
++num_words;
num_blobs += werd->cblob_list()->length();
}
}
}
tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", num_blocks,
num_rows, num_words, num_blobs);
}
/**********************************************************************
* ExtractBlobsFromSegmentation
*
* Extracts blobs from the given block list and adds them to the output list.
* The block list must have been created by performing a page segmentation.
**********************************************************************/
void ExtractBlobsFromSegmentation(BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list) {
C_BLOB_IT return_list_it(output_blob_list);
BLOCK_IT block_it(blocks);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
BLOCK *block = block_it.data();
ROW_IT row_it(block->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
ROW *row = row_it.data();
// Iterate over all werds in the row.
WERD_IT werd_it(row->word_list());
for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
WERD *werd = werd_it.data();
return_list_it.move_to_last();
return_list_it.add_list_after(werd->cblob_list());
return_list_it.move_to_last();
return_list_it.add_list_after(werd->rej_cblob_list());
}
}
}
}
/**********************************************************************
* RefreshWordBlobsFromNewBlobs()
*
* Refreshes the words in the block_list by using blobs in the
* new_blobs list.
* Block list must have word segmentation in it.
* It consumes the blobs provided in the new_blobs list. The blobs leftover in
* the new_blobs list after the call weren't matched to any blobs of the words
* in block list.
* The output not_found_blobs is a list of blobs from the original segmentation
* in the block_list for which no corresponding new blobs were found.
**********************************************************************/
void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs,
C_BLOB_LIST *not_found_blobs) {
// Now iterate over all the blobs in the segmentation_block_list_, and just
// replace the corresponding c-blobs inside the werds.
BLOCK_IT block_it(block_list);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
BLOCK *block = block_it.data();
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
continue; // Don't touch non-text blocks.
}
// Iterate over all rows in the block.
ROW_IT row_it(block->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
ROW *row = row_it.data();
// Iterate over all werds in the row.
WERD_IT werd_it(row->word_list());
WERD_LIST new_words;
WERD_IT new_words_it(&new_words);
for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
WERD *werd = werd_it.extract();
WERD *new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, not_found_blobs);
if (new_werd) {
// Insert this new werd into the actual row's werd-list. Remove the
// existing one.
new_words_it.add_after_then_move(new_werd);
delete werd;
} else {
// Reinsert the older word back, for lack of better options.
// This is critical since dropping the words messes up segmentation:
// eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
new_words_it.add_after_then_move(werd);
}
}
// Get rid of the old word list & replace it with the new one.
row->word_list()->clear();
werd_it.move_to_first();
werd_it.add_list_after(&new_words);
}
}
}
} // namespace tesseract

View File

@ -0,0 +1,227 @@
/**********************************************************************
* File: ocrblock.h (Formerly block.h)
* Description: Page block class definition.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef OCRBLOCK_H
#define OCRBLOCK_H
#include "ocrpara.h"
#include "ocrrow.h"
#include "pdblock.h"
namespace tesseract {
class BLOCK; // forward decl
ELISTIZEH(BLOCK)
class TESS_API BLOCK : public ELIST_LINK
// page block
{
friend class BLOCK_RECT_IT; // block iterator
public:
BLOCK() : re_rotation_(1.0f, 0.0f), classify_rotation_(1.0f, 0.0f), skew_(1.0f, 0.0f) {}
BLOCK(const char *name, ///< filename
bool prop, ///< proportional
int16_t kern, ///< kerning
int16_t space, ///< spacing
int16_t xmin, ///< bottom left
int16_t ymin,
int16_t xmax, ///< top right
int16_t ymax);
~BLOCK() = default;
/**
* set space size etc.
* @param prop proportional
* @param kern inter char size
* @param space inter word size
* @param ch_pitch pitch if fixed
*/
void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch) {
proportional = prop;
kerning = static_cast<int8_t>(kern);
spacing = space;
pitch = ch_pitch;
}
/// set char size
void set_xheight(int32_t height) {
xheight = height;
}
/// set font class
void set_font_class(int16_t font) {
font_class = font;
}
/// return proportional
bool prop() const {
return proportional;
}
bool right_to_left() const {
return right_to_left_;
}
void set_right_to_left(bool value) {
right_to_left_ = value;
}
/// return pitch
int32_t fixed_pitch() const {
return pitch;
}
/// return kerning
int16_t kern() const {
return kerning;
}
/// return font class
int16_t font() const {
return font_class;
}
/// return spacing
int16_t space() const {
return spacing;
}
/// return filename
const char *name() const {
return filename.c_str();
}
/// return xheight
int32_t x_height() const {
return xheight;
}
float cell_over_xheight() const {
return cell_over_xheight_;
}
void set_cell_over_xheight(float ratio) {
cell_over_xheight_ = ratio;
}
/// get rows
ROW_LIST *row_list() {
return &rows;
}
// Compute the margins between the edges of each row and this block's
// polyblock, and store the results in the rows.
void compute_row_margins();
// get paragraphs
PARA_LIST *para_list() {
return &paras_;
}
/// get blobs
C_BLOB_LIST *blob_list() {
return &c_blobs;
}
C_BLOB_LIST *reject_blobs() {
return &rej_blobs;
}
FCOORD re_rotation() const {
return re_rotation_; // How to transform coords back to image.
}
void set_re_rotation(const FCOORD &rotation) {
re_rotation_ = rotation;
}
FCOORD classify_rotation() const {
return classify_rotation_; // Apply this before classifying.
}
void set_classify_rotation(const FCOORD &rotation) {
classify_rotation_ = rotation;
}
FCOORD skew() const {
return skew_; // Direction of true horizontal.
}
void set_skew(const FCOORD &skew) {
skew_ = skew;
}
const ICOORD &median_size() const {
return median_size_;
}
void set_median_size(int x, int y) {
median_size_.set_x(x);
median_size_.set_y(y);
}
Image render_mask(TBOX *mask_box) {
return pdblk.render_mask(re_rotation_, mask_box);
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
// Reflects the polygon in the y-axis and recomputes the bounding_box.
// Does nothing to any contained rows/words/blobs etc.
void reflect_polygon_in_y_axis();
void rotate(const FCOORD &rotation);
/// decreasing y order
void sort_rows();
/// shrink white space
void compress();
/// check proportional
void check_pitch();
/// shrink white space and move by vector
void compress(const ICOORD vec);
/// dump whole table
void print(FILE *fp, bool dump);
BLOCK &operator=(const BLOCK &source);
PDBLK pdblk; ///< Page Description Block
private:
bool proportional = false; ///< proportional
bool right_to_left_ = false; ///< major script is right to left.
int8_t kerning = 0; ///< inter blob gap
int16_t spacing = 0; ///< inter word gap
int16_t pitch = 0; ///< pitch of non-props
int16_t font_class = 0; ///< correct font class
int32_t xheight = 0; ///< height of chars
float cell_over_xheight_ = 0.0f; ///< Ratio of cell height to xheight.
std::string filename; ///< name of block
ROW_LIST rows; ///< rows in block
PARA_LIST paras_; ///< paragraphs of block
C_BLOB_LIST c_blobs; ///< before textord
C_BLOB_LIST rej_blobs; ///< duff stuff
FCOORD re_rotation_; ///< How to transform coords back to image.
FCOORD classify_rotation_; ///< Apply this before classifying.
FCOORD skew_; ///< Direction of true horizontal.
ICOORD median_size_; ///< Median size of blobs.
};
// A function to print segmentation stats for the given block list.
void PrintSegmentationStats(BLOCK_LIST *block_list);
// Extracts blobs fromo the given block list and adds them to the output list.
// The block list must have been created by performing a page segmentation.
void ExtractBlobsFromSegmentation(BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list);
// Refreshes the words in the block_list by using blobs in the
// new_blobs list.
// Block list must have word segmentation in it.
// It consumes the blobs provided in the new_blobs list. The blobs leftover in
// the new_blobs list after the call weren't matched to any blobs of the words
// in block list.
// The output not_found_blobs is a list of blobs from the original segmentation
// in the block_list for which no corresponding new blobs were found.
void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs,
C_BLOB_LIST *not_found_blobs);
} // namespace tesseract
#endif

View File

@ -0,0 +1,93 @@
/////////////////////////////////////////////////////////////////////
// File: ocrpara.cpp
// Description: OCR Paragraph Output Type
// Author: David Eger
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ocrpara.h"
#include "host.h" // For NearlyEqual()
#include <cstdio>
namespace tesseract {
using tesseract::JUSTIFICATION_CENTER;
using tesseract::JUSTIFICATION_LEFT;
using tesseract::JUSTIFICATION_RIGHT;
using tesseract::JUSTIFICATION_UNKNOWN;
static const char *ParagraphJustificationToString(tesseract::ParagraphJustification justification) {
switch (justification) {
case JUSTIFICATION_LEFT:
return "LEFT";
case JUSTIFICATION_RIGHT:
return "RIGHT";
case JUSTIFICATION_CENTER:
return "CENTER";
default:
return "UNKNOWN";
}
}
bool ParagraphModel::ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const {
switch (justification_) {
case JUSTIFICATION_LEFT:
return NearlyEqual(lmargin + lindent, margin_ + first_indent_, tolerance_);
case JUSTIFICATION_RIGHT:
return NearlyEqual(rmargin + rindent, margin_ + first_indent_, tolerance_);
case JUSTIFICATION_CENTER:
return NearlyEqual(lindent, rindent, tolerance_ * 2);
default:
// shouldn't happen
return false;
}
}
bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const {
switch (justification_) {
case JUSTIFICATION_LEFT:
return NearlyEqual(lmargin + lindent, margin_ + body_indent_, tolerance_);
case JUSTIFICATION_RIGHT:
return NearlyEqual(rmargin + rindent, margin_ + body_indent_, tolerance_);
case JUSTIFICATION_CENTER:
return NearlyEqual(lindent, rindent, tolerance_ * 2);
default:
// shouldn't happen
return false;
}
}
bool ParagraphModel::Comparable(const ParagraphModel &other) const {
if (justification_ != other.justification_) {
return false;
}
if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) {
return true;
}
int tolerance = (tolerance_ + other.tolerance_) / 4;
return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) &&
NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, tolerance);
}
std::string ParagraphModel::ToString() const {
char buffer[200];
const char *alignment = ParagraphJustificationToString(justification_);
snprintf(buffer, sizeof(buffer), "margin: %d, first_indent: %d, body_indent: %d, alignment: %s",
margin_, first_indent_, body_indent_, alignment);
return std::string(buffer);
}
} // namespace tesseract

View File

@ -0,0 +1,203 @@
/////////////////////////////////////////////////////////////////////
// File: ocrpara.h
// Description: OCR Paragraph Output Type
// Author: David Eger
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_OCRPARA_H_
#define TESSERACT_CCSTRUCT_OCRPARA_H_
#include "elst.h"
#include <tesseract/publictypes.h>
namespace tesseract {
class ParagraphModel;
struct PARA : public ELIST_LINK {
public:
PARA()
: model(nullptr)
, is_list_item(false)
, is_very_first_or_continuation(false)
, has_drop_cap(false) {}
// We do not own the model, we just reference it.
// model may be nullptr if there is not a good model for this paragraph.
const ParagraphModel *model;
bool is_list_item;
// The first paragraph on a page often lacks a first line indent, but should
// still be modeled by the same model as other body text paragraphs on the
// page.
bool is_very_first_or_continuation;
// Does this paragraph begin with a drop cap?
bool has_drop_cap;
};
ELISTIZEH(PARA)
// A geometric model of paragraph indentation and alignment.
//
// Measurements are in pixels. The meaning of the integer arguments changes
// depending upon the value of justification. Distances less than or equal
// to tolerance apart we take as "equivalent" for the purpose of model
// matching, and in the examples below, we assume tolerance is zero.
//
// justification = LEFT:
// margin the "ignored" margin to the left block edge.
// first_indent indent from the left margin to a typical first text line.
// body_indent indent from the left margin of a typical body text line.
//
// justification = RIGHT:
// margin the "ignored" margin to the right block edge.
// first_indent indent from the right margin to a typical first text line.
// body_indent indent from the right margin of a typical body text line.
//
// justification = CENTER:
// margin ignored
// first_indent ignored
// body_indent ignored
//
// ====== Extended example, assuming each letter is ten pixels wide: =======
//
// +--------------------------------+
// | Awesome | ParagraphModel(CENTER, 0, 0, 0)
// | Centered Title |
// | Paragraph Detection |
// | OCR TEAM |
// | 10 November 2010 |
// | |
// | Look here, I have a paragraph.| ParagraphModel(LEFT, 0, 20, 0)
// |This paragraph starts at the top|
// |of the page and takes 3 lines. |
// | Here I have a second paragraph| ParagraphModel(LEFT, 0, 20, 0)
// |which indicates that the first |
// |paragraph is not a continuation |
// |from a previous page, as it is |
// |indented just like this second |
// |paragraph. |
// | Here is a block quote. It | ParagraphModel(LEFT, 30, 0, 0)
// | looks like the prior text |
// | but it is indented more |
// | and is fully justified. |
// | So how does one deal with | ParagraphModel(LEFT, 0, 20, 0)
// |centered text, block quotes, |
// |normal paragraphs, and lists |
// |like what follows? |
// |1. Make a plan. | ParagraphModel(LEFT, 0, 0, 30)
// |2. Use a heuristic, for example,| ParagraphModel(LEFT, 0, 0, 30)
// | looking for lines where the |
// | first word of the next line |
// | would fit on the previous |
// | line. |
// |8. Try to implement the plan in | ParagraphModel(LEFT, 0, 0, 30)
// | Python and try it out. |
// |4. Determine how to fix the | ParagraphModel(LEFT, 0, 0, 30)
// | mistakes. |
// |5. Repeat. | ParagraphModel(LEFT, 0, 0, 30)
// | For extra painful penalty work| ParagraphModel(LEFT, 0, 20, 0)
// |you can try to identify source |
// |code. Ouch! |
// +--------------------------------+
class TESS_API ParagraphModel {
public:
ParagraphModel(tesseract::ParagraphJustification justification, int margin, int first_indent,
int body_indent, int tolerance)
: justification_(justification)
, margin_(margin)
, first_indent_(first_indent)
, body_indent_(body_indent)
, tolerance_(tolerance) {
// Make one of {first_indent, body_indent} is 0.
int added_margin = first_indent;
if (body_indent < added_margin) {
added_margin = body_indent;
}
margin_ += added_margin;
first_indent_ -= added_margin;
body_indent_ -= added_margin;
}
ParagraphModel()
: justification_(tesseract::JUSTIFICATION_UNKNOWN)
, margin_(0)
, first_indent_(0)
, body_indent_(0)
, tolerance_(0) {}
// ValidFirstLine() and ValidBodyLine() take arguments describing a text line
// in a block of text which we are trying to model:
// lmargin, lindent: these add up to the distance from the leftmost ink
// in the text line to the surrounding text block's left
// edge.
// rmargin, rindent: these add up to the distance from the rightmost ink
// in the text line to the surrounding text block's right
// edge.
// The caller determines the division between "margin" and "indent", which
// only actually affect whether we think the line may be centered.
//
// If the amount of whitespace matches the amount of whitespace expected on
// the relevant side of the line (within tolerance_) we say it matches.
// Return whether a given text line could be a first paragraph line according
// to this paragraph model.
bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const;
// Return whether a given text line could be a first paragraph line according
// to this paragraph model.
bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const;
tesseract::ParagraphJustification justification() const {
return justification_;
}
int margin() const {
return margin_;
}
int first_indent() const {
return first_indent_;
}
int body_indent() const {
return body_indent_;
}
int tolerance() const {
return tolerance_;
}
bool is_flush() const {
return (justification_ == tesseract::JUSTIFICATION_LEFT ||
justification_ == tesseract::JUSTIFICATION_RIGHT) &&
abs(first_indent_ - body_indent_) <= tolerance_;
}
// Return whether this model is likely to agree with the other model on most
// paragraphs they are marked.
bool Comparable(const ParagraphModel &other) const;
std::string ToString() const;
private:
tesseract::ParagraphJustification justification_;
int margin_;
int first_indent_;
int body_indent_;
int tolerance_;
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_OCRPARA_H_

View File

@ -0,0 +1,245 @@
/**********************************************************************
* File: ocrrow.cpp (Formerly row.c)
* Description: Code for the ROW class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "blobbox.h"
#include "ocrrow.h"
namespace tesseract {
/**********************************************************************
* ROW::ROW
*
* Constructor to build a ROW. Only the stats stuff are given here.
* The words are added directly.
**********************************************************************/
ROW::ROW( // constructor
int32_t spline_size, // no of segments
int32_t *xstarts, // segment boundaries
double *coeffs, // coefficients
float x_height, // line height
float ascenders, // ascender size
float descenders, // descender drop
int16_t kern, // char gap
int16_t space // word gap
)
: baseline(spline_size, xstarts, coeffs), para_(nullptr) {
kerning = kern; // just store stuff
spacing = space;
xheight = x_height;
ascrise = ascenders;
bodysize = 0.0f;
descdrop = descenders;
has_drop_cap_ = false;
lmargin_ = 0;
rmargin_ = 0;
}
/**********************************************************************
* ROW::ROW
*
* Constructor to build a ROW. Only the stats stuff are given here.
* The words are added directly.
**********************************************************************/
ROW::ROW( // constructor
TO_ROW *to_row, // source row
int16_t kern, // char gap
int16_t space // word gap
)
: para_(nullptr) {
kerning = kern; // just store stuff
spacing = space;
xheight = to_row->xheight;
bodysize = to_row->body_size;
ascrise = to_row->ascrise;
descdrop = to_row->descdrop;
baseline = to_row->baseline;
has_drop_cap_ = false;
lmargin_ = 0;
rmargin_ = 0;
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX ROW::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
TBOX box;
// This is a read-only iteration of the words in the row.
WERD_IT it(const_cast<WERD_LIST *>(&words));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
}
return box;
}
/**********************************************************************
* ROW::recalc_bounding_box
*
* Set the bounding box correctly
**********************************************************************/
void ROW::recalc_bounding_box() { // recalculate BB
WERD *word; // current word
WERD_IT it = &words; // words of ROW
int16_t left; // of word
int16_t prev_left; // old left
if (!it.empty()) {
word = it.data();
prev_left = word->bounding_box().left();
it.forward();
while (!it.at_first()) {
word = it.data();
left = word->bounding_box().left();
if (left < prev_left) {
it.move_to_first();
// words in BB order
it.sort(word_comparator);
break;
}
prev_left = left;
it.forward();
}
}
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
word = it.data();
if (it.at_first()) {
word->set_flag(W_BOL, true);
} else {
// not start of line
word->set_flag(W_BOL, false);
}
if (it.at_last()) {
word->set_flag(W_EOL, true);
} else {
// not end of line
word->set_flag(W_EOL, false);
}
// extend BB as reqd
bound_box += word->bounding_box();
}
}
/**********************************************************************
* ROW::move
*
* Reposition row by vector
**********************************************************************/
void ROW::move( // reposition row
const ICOORD vec // by vector
) {
WERD_IT it(&words); // word iterator
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->move(vec);
}
bound_box.move(vec);
baseline.move(vec);
}
/**********************************************************************
* ROW::print
*
* Display members
**********************************************************************/
void ROW::print( // print
FILE *fp // file to print on
) const {
tprintf("Kerning= %d\n", kerning);
tprintf("Spacing= %d\n", spacing);
bound_box.print();
tprintf("Xheight= %f\n", xheight);
tprintf("Ascrise= %f\n", ascrise);
tprintf("Descdrop= %f\n", descdrop);
tprintf("has_drop_cap= %d\n", has_drop_cap_);
tprintf("lmargin= %d, rmargin= %d\n", lmargin_, rmargin_);
}
/**********************************************************************
* ROW::plot
*
* Draw the ROW in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void ROW::plot( // draw it
ScrollView *window, // window to draw in
ScrollView::Color colour // colour to draw in
) {
WERD *word; // current word
WERD_IT it = &words; // words of ROW
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
word = it.data();
word->plot(window, colour); // all in one colour
}
}
/**********************************************************************
* ROW::plot
*
* Draw the ROW in rainbow colours.
**********************************************************************/
void ROW::plot( // draw it
ScrollView *window // window to draw in
) {
WERD *word; // current word
WERD_IT it = &words; // words of ROW
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
word = it.data();
word->plot(window); // in rainbow colours
}
}
#endif // !GRAPHICS_DISABLED
/**********************************************************************
* ROW::operator=
*
* Assign rows by duplicating the row structure but NOT the WERDLIST
**********************************************************************/
ROW &ROW::operator=(const ROW &source) {
this->ELIST_LINK::operator=(source);
kerning = source.kerning;
spacing = source.spacing;
xheight = source.xheight;
bodysize = source.bodysize;
ascrise = source.ascrise;
descdrop = source.descdrop;
if (!words.empty()) {
words.clear();
}
baseline = source.baseline; // QSPLINES must do =
bound_box = source.bound_box;
has_drop_cap_ = source.has_drop_cap_;
lmargin_ = source.lmargin_;
rmargin_ = source.rmargin_;
para_ = source.para_;
return *this;
}
} // namespace tesseract

View File

@ -0,0 +1,176 @@
/**********************************************************************
* File: ocrrow.h (Formerly row.h)
* Description: Code for the ROW class.
* Author: Ray Smith
* Created: Tue Oct 08 15:58:04 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef OCRROW_H
#define OCRROW_H
#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
#include "quspline.h" // for QSPLINE
#include "rect.h" // for TBOX
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include "werd.h" // for WERD_LIST
#include <cstdint> // for int16_t, int32_t
#include <cstdio> // for FILE
namespace tesseract {
class ICOORD;
class TO_ROW;
struct PARA;
class ROW : public ELIST_LINK {
friend void tweak_row_baseline(ROW *, double, double);
public:
ROW() = default;
ROW( // constructor
int32_t spline_size, // no of segments
int32_t *xstarts, // segment boundaries
double *coeffs, // coefficients //ascender size
float x_height, float ascenders,
float descenders, // descender size
int16_t kern, // char gap
int16_t space); // word gap
ROW( // constructor
TO_ROW *row, // textord row
int16_t kern, // char gap
int16_t space); // word gap
WERD_LIST *word_list() { // get words
return &words;
}
float base_line( // compute baseline
float xpos) const { // at the position
// get spline value
return static_cast<float>(baseline.y(xpos));
}
float x_height() const { // return x height
return xheight;
}
void set_x_height(float new_xheight) { // set x height
xheight = new_xheight;
}
int32_t kern() const { // return kerning
return kerning;
}
float body_size() const { // return body size
return bodysize;
}
void set_body_size(float new_size) { // set body size
bodysize = new_size;
}
int32_t space() const { // return spacing
return spacing;
}
float ascenders() const { // return size
return ascrise;
}
float descenders() const { // return size
return descdrop;
}
TBOX bounding_box() const { // return bounding box
return bound_box;
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
void set_lmargin(int16_t lmargin) {
lmargin_ = lmargin;
}
void set_rmargin(int16_t rmargin) {
rmargin_ = rmargin;
}
int16_t lmargin() const {
return lmargin_;
}
int16_t rmargin() const {
return rmargin_;
}
void set_has_drop_cap(bool has) {
has_drop_cap_ = has;
}
bool has_drop_cap() const {
return has_drop_cap_;
}
void set_para(PARA *p) {
para_ = p;
}
PARA *para() const {
return para_;
}
void recalc_bounding_box(); // recalculate BB
void move( // reposition row
const ICOORD vec); // by vector
void print( // print
FILE *fp) const; // file to print on
#ifndef GRAPHICS_DISABLED
void plot( // draw one
ScrollView *window, // window to draw in
ScrollView::Color colour); // uniform colour
void plot( // draw one
ScrollView *window); // in rainbow colours
void plot_baseline( // draw the baseline
ScrollView *window, // window to draw in
ScrollView::Color colour) { // colour to draw
// draw it
baseline.plot(window, colour);
}
#endif // !GRAPHICS_DISABLED
ROW &operator=(const ROW &source);
private:
// Copy constructor (currently unused, therefore private).
ROW(const ROW &source) = delete;
int32_t kerning; // inter char gap
int32_t spacing; // inter word gap
TBOX bound_box; // bounding box
float xheight; // height of line
float ascrise; // size of ascenders
float descdrop; //-size of descenders
float bodysize; // CJK character size. (equals to
// xheight+ascrise by default)
WERD_LIST words; // words
QSPLINE baseline; // baseline spline
// These get set after blocks have been determined.
bool has_drop_cap_;
int16_t lmargin_; // Distance to left polyblock margin.
int16_t rmargin_; // Distance to right polyblock margin.
// This gets set during paragraph analysis.
PARA *para_; // Paragraph of which this row is part.
};
ELISTIZEH(ROW)
} // namespace tesseract
#endif

View File

@ -0,0 +1,212 @@
/**********************************************************************
* File: otsuthr.cpp
* Description: Simple Otsu thresholding for binarizing images.
* Author: Ray Smith
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "otsuthr.h"
#include <allheaders.h>
#include <cstring>
#include "helpers.h"
#if defined(USE_OPENCL)
# include "openclwrapper.h" // for OpenclDevice
#endif
namespace tesseract {
// Computes the Otsu threshold(s) for the given image rectangle, making one
// for each channel. Each channel is always one byte per pixel.
// Returns an array of threshold values and an array of hi_values, such
// that a pixel value >threshold[channel] is considered foreground if
// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
// that there is no apparent foreground. At least one hi_value will not be -1.
// The return value is the number of channels in the input image, being
// the size of the output thresholds and hi_values arrays.
int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
std::vector<int> &hi_values) {
int num_channels = pixGetDepth(src_pix) / 8;
// Of all channels with no good hi_value, keep the best so we can always
// produce at least one answer.
int best_hi_value = 1;
int best_hi_index = 0;
bool any_good_hivalue = false;
double best_hi_dist = 0.0;
thresholds.resize(num_channels);
hi_values.resize(num_channels);
// only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL
// all of channel 0 then all of channel 1...
std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
// Calculate Histogram on GPU
OpenclDevice od;
if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
left == 0) {
od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
height, kHistogramSize, &histogramAllChannels[0]);
// Calculate Threshold from Histogram on cpu
for (int ch = 0; ch < num_channels; ++ch) {
thresholds[ch] = -1;
hi_values[ch] = -1;
int *histogram = &histogramAllChannels[kHistogramSize * ch];
int H;
int best_omega_0;
int best_t = OtsuStats(histogram, &H, &best_omega_0);
if (best_omega_0 == 0 || best_omega_0 == H) {
// This channel is empty.
continue;
}
// To be a convincing foreground we must have a small fraction of H
// or to be a convincing background we must have a large fraction of H.
// In between we assume this channel contains no thresholding information.
int hi_value = best_omega_0 < H * 0.5;
(*thresholds)[ch] = best_t;
if (best_omega_0 > H * 0.75) {
any_good_hivalue = true;
hi_values[ch] = 0;
} else if (best_omega_0 < H * 0.25) {
any_good_hivalue = true;
hi_values[ch] = 1;
} else {
// In case all channels are like this, keep the best of the bad lot.
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
if (hi_dist > best_hi_dist) {
best_hi_dist = hi_dist;
best_hi_value = hi_value;
best_hi_index = ch;
}
}
}
} else {
#endif
for (int ch = 0; ch < num_channels; ++ch) {
thresholds[ch] = -1;
hi_values[ch] = -1;
// Compute the histogram of the image rectangle.
int histogram[kHistogramSize];
HistogramRect(src_pix, ch, left, top, width, height, histogram);
int H;
int best_omega_0;
int best_t = OtsuStats(histogram, &H, &best_omega_0);
if (best_omega_0 == 0 || best_omega_0 == H) {
// This channel is empty.
continue;
}
// To be a convincing foreground we must have a small fraction of H
// or to be a convincing background we must have a large fraction of H.
// In between we assume this channel contains no thresholding information.
int hi_value = best_omega_0 < H * 0.5;
thresholds[ch] = best_t;
if (best_omega_0 > H * 0.75) {
any_good_hivalue = true;
hi_values[ch] = 0;
} else if (best_omega_0 < H * 0.25) {
any_good_hivalue = true;
hi_values[ch] = 1;
} else {
// In case all channels are like this, keep the best of the bad lot.
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
if (hi_dist > best_hi_dist) {
best_hi_dist = hi_dist;
best_hi_value = hi_value;
best_hi_index = ch;
}
}
}
#ifdef USE_OPENCL
}
#endif // USE_OPENCL
if (!any_good_hivalue) {
// Use the best of the ones that were not good enough.
hi_values[best_hi_index] = best_hi_value;
}
return num_channels;
}
// Computes the histogram for the given image rectangle, and the given
// single channel. Each channel is always one byte per pixel.
// Histogram is always a kHistogramSize(256) element array to count
// occurrences of each pixel value.
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
int *histogram) {
int num_channels = pixGetDepth(src_pix) / 8;
channel = ClipToRange(channel, 0, num_channels - 1);
int bottom = top + height;
memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
int src_wpl = pixGetWpl(src_pix);
l_uint32 *srcdata = pixGetData(src_pix);
for (int y = top; y < bottom; ++y) {
const l_uint32 *linedata = srcdata + y * src_wpl;
for (int x = 0; x < width; ++x) {
int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel);
++histogram[pixel];
}
}
}
// Computes the Otsu threshold(s) for the given histogram.
// Also returns H = total count in histogram, and
// omega0 = count of histogram below threshold.
int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
int H = 0;
double mu_T = 0.0;
for (int i = 0; i < kHistogramSize; ++i) {
H += histogram[i];
mu_T += static_cast<double>(i) * histogram[i];
}
// Now maximize sig_sq_B over t.
// http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
int best_t = -1;
int omega_0, omega_1;
int best_omega_0 = 0;
double best_sig_sq_B = 0.0;
double mu_0, mu_1, mu_t;
omega_0 = 0;
mu_t = 0.0;
for (int t = 0; t < kHistogramSize - 1; ++t) {
omega_0 += histogram[t];
mu_t += t * static_cast<double>(histogram[t]);
if (omega_0 == 0) {
continue;
}
omega_1 = H - omega_0;
if (omega_1 == 0) {
break;
}
mu_0 = mu_t / omega_0;
mu_1 = (mu_T - mu_t) / omega_1;
double sig_sq_B = mu_1 - mu_0;
sig_sq_B *= sig_sq_B * omega_0 * omega_1;
if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
best_sig_sq_B = sig_sq_B;
best_t = t;
best_omega_0 = omega_0;
}
}
if (H_out != nullptr) {
*H_out = H;
}
if (omega0_out != nullptr) {
*omega0_out = best_omega_0;
}
return best_t;
}
} // namespace tesseract.

View File

@ -0,0 +1,58 @@
///////////////////////////////////////////////////////////////////////
// File: otsuthr.h
// Description: Simple Otsu thresholding for binarizing images.
// Author: Ray Smith
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_OTSUTHR_H_
#define TESSERACT_CCMAIN_OTSUTHR_H_
#include "image.h"
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
const int kHistogramSize = 256; // The size of a histogram of pixel values.
// Computes the Otsu threshold(s) for the given image rectangle, making one
// for each channel. Each channel is always one byte per pixel.
// Returns an array of threshold values and an array of hi_values, such
// that a pixel value >threshold[channel] is considered foreground if
// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
// that there is no apparent foreground. At least one hi_value will not be -1.
// The return value is the number of channels in the input image, being
// the size of the output thresholds and hi_values arrays.
int OtsuThreshold(Image src_pix, int left, int top, int width, int height,
std::vector<int> &thresholds,
std::vector<int> &hi_values);
// Computes the histogram for the given image rectangle, and the given
// single channel. Each channel is always one byte per pixel.
// Histogram is always a kHistogramSize(256) element array to count
// occurrences of each pixel value.
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
int *histogram);
// Computes the Otsu threshold(s) for the given histogram.
// Also returns H = total count in histogram, and
// omega0 = count of histogram below threshold.
int OtsuStats(const int *histogram, int *H_out, int *omega0_out);
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_OTSUTHR_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,798 @@
/**********************************************************************
* File: pageres.h (Formerly page_res.h)
* Description: Results classes used by control.c
* Author: Phil Cheatle
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef PAGERES_H
#define PAGERES_H
#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS
#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH
#include "genericvector.h" // for PointerVector
#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH
#include "matrix.h" // for MATRIX
#include "normalis.h" // for DENORM
#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only)
#include "rect.h" // for TBOX
#include "rejctmap.h" // for REJMAP
#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI...
#include "werd.h" // for WERD, W_BOL, W_EOL
#include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID
#include <cstdint> // for int32_t, int16_t
#include <functional> // for std::function
#include <set> // for std::pair
#include <vector> // for std::vector
#include <sys/types.h> // for int8_t
struct Pix;
namespace tesseract {
class BLOCK;
class BLOCK_LIST;
class BLOCK_RES;
class ROW;
class ROW_RES;
class SEAM;
class WERD_RES;
struct TWERD;
class BoxWord;
class Tesseract;
struct FontInfo;
/* Forward declarations */
class BLOCK_RES;
ELISTIZEH(BLOCK_RES)
CLISTIZEH(BLOCK_RES)
class ROW_RES;
ELISTIZEH(ROW_RES)
class WERD_RES;
ELISTIZEH(WERD_RES)
/*************************************************************************
* PAGE_RES - Page results
*************************************************************************/
class PAGE_RES { // page result
public:
int32_t char_count;
int32_t rej_count;
BLOCK_RES_LIST block_res_list;
bool rejected;
// Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to
// the next word. This pointer is not owned by PAGE_RES class.
WERD_CHOICE **prev_word_best_choice;
// Sums of blame reasons computed by the blamer.
std::vector<int> blame_reasons;
// Debug information about all the misadaptions on this page.
// Each BlamerBundle contains an index into this vector, so that words that
// caused misadaption could be marked. However, since words could be
// deleted/split/merged, the log is stored on the PAGE_RES level.
std::vector<std::string> misadaption_log;
inline void Init() {
char_count = 0;
rej_count = 0;
rejected = false;
prev_word_best_choice = nullptr;
blame_reasons.resize(IRR_NUM_REASONS);
}
PAGE_RES() {
Init();
} // empty constructor
PAGE_RES(bool merge_similar_words,
BLOCK_LIST *block_list, // real blocks
WERD_CHOICE **prev_word_best_choice_ptr);
~PAGE_RES() = default;
};
/*************************************************************************
* BLOCK_RES - Block results
*************************************************************************/
class BLOCK_RES : public ELIST_LINK {
public:
BLOCK *block; // real block
int32_t char_count; // chars in block
int32_t rej_count; // rejected chars
int16_t font_class; //
int16_t row_count;
float x_height;
bool font_assigned; // block already
// processed
ROW_RES_LIST row_res_list;
BLOCK_RES() = default;
BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block
~BLOCK_RES() = default;
};
/*************************************************************************
* ROW_RES - Row results
*************************************************************************/
class ROW_RES : public ELIST_LINK {
public:
ROW *row; // real row
int32_t char_count; // chars in block
int32_t rej_count; // rejected chars
int32_t whole_word_rej_count; // rejs in total rej wds
WERD_RES_LIST word_res_list;
ROW_RES() = default;
ROW_RES(bool merge_similar_words, ROW *the_row); // real row
~ROW_RES() = default;
};
/*************************************************************************
* WERD_RES - Word results
*************************************************************************/
enum CRUNCH_MODE { CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE };
// WERD_RES is a collection of publicly accessible members that gathers
// information about a word result.
class TESS_API WERD_RES : public ELIST_LINK {
public:
// Which word is which?
// There are 3 coordinate spaces in use here: a possibly rotated pixel space,
// the original image coordinate space, and the BLN space in which the
// baseline of a word is at kBlnBaselineOffset, the xheight is kBlnXHeight,
// and the x-middle of the word is at 0.
// In the rotated pixel space, coordinates correspond to the input image,
// but may be rotated about the origin by a multiple of 90 degrees,
// and may therefore be negative.
// In any case a rotation by denorm.block()->re_rotation() will take them
// back to the original image.
// The other differences between words all represent different stages of
// processing during recognition.
// ---------------------------INPUT-------------------------------------
// The word is the input C_BLOBs in the rotated pixel space.
// word is NOT owned by the WERD_RES unless combination is true.
// All the other word pointers ARE owned by the WERD_RES.
WERD *word = nullptr; // Input C_BLOB word.
// -------------SETUP BY SetupFor*Recognition---READONLY-INPUT------------
// The bln_boxes contains the bounding boxes (only) of the input word, in the
// BLN space. The lengths of word and bln_boxes
// match as they are both before any chopping.
// TODO(rays) determine if docqual does anything useful and delete bln_boxes
// if it doesn't.
tesseract::BoxWord *bln_boxes = nullptr; // BLN input bounding boxes.
// The ROW that this word sits in. NOT owned by the WERD_RES.
ROW *blob_row = nullptr;
// The denorm provides the transformation to get back to the rotated image
// coords from the chopped_word/rebuild_word BLN coords, but each blob also
// has its own denorm.
DENORM denorm; // For use on chopped_word.
// Unicharset used by the classifier output in best_choice and raw_choice.
const UNICHARSET *uch_set = nullptr; // For converting back to utf8.
// ----Initialized by SetupFor*Recognition---BUT OUTPUT FROM RECOGNITION----
// ----Setup to a (different!) state expected by the various classifiers----
// TODO(rays) Tidy and make more consistent.
// The chopped_word is also in BLN space, and represents the fully chopped
// character fragments that make up the word.
// The length of chopped_word matches length of seam_array + 1 (if set).
TWERD *chopped_word = nullptr; // BLN chopped fragments output.
// Vector of SEAM* holding chopping points matching chopped_word.
std::vector<SEAM *> seam_array;
// Widths of blobs in chopped_word.
std::vector<int> blob_widths;
// Gaps between blobs in chopped_word. blob_gaps[i] is the gap between
// blob i and blob i+1.
std::vector<int> blob_gaps;
// Stores the lstm choices of every timestep
std::vector<std::vector<std::pair<const char *, float>>> timesteps;
// Stores the lstm choices of every timestep segmented by character
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
// Symbolchoices acquired during CTC
std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
// Stores if the timestep vector starts with a space
bool leading_space = false;
// Stores value when the word ends
int end = 0;
// Ratings matrix contains classifier choices for each classified combination
// of blobs. The dimension is the same as the number of blobs in chopped_word
// and the leading diagonal corresponds to classifier results of the blobs
// in chopped_word. The state_ members of best_choice, raw_choice and
// best_choices all correspond to this ratings matrix and allow extraction
// of the blob choices for any given WERD_CHOICE.
MATRIX *ratings = nullptr; // Owned pointer.
// Pointer to the first WERD_CHOICE in best_choices. This is the result that
// will be output from Tesseract. Note that this is now a borrowed pointer
// and should NOT be deleted.
WERD_CHOICE *best_choice = nullptr; // Borrowed pointer.
// The best raw_choice found during segmentation search. Differs from the
// best_choice by being the best result according to just the character
// classifier, not taking any language model information into account.
// Unlike best_choice, the pointer IS owned by this WERD_RES.
WERD_CHOICE *raw_choice = nullptr; // Owned pointer.
// Alternative results found during chopping/segmentation search stages.
// Note that being an ELIST, best_choices owns the WERD_CHOICEs.
WERD_CHOICE_LIST best_choices;
// Truth bounding boxes, text and incorrect choice reason.
BlamerBundle *blamer_bundle = nullptr;
// --------------OUTPUT FROM RECOGNITION-------------------------------
// --------------Not all fields are necessarily set.-------------------
// ---best_choice, raw_choice *must* end up set, with a box_word-------
// ---In complete output, the number of blobs in rebuild_word matches---
// ---the number of boxes in box_word, the number of unichar_ids in---
// ---best_choice, the number of ints in best_state, and the number---
// ---of strings in correct_text--------------------------------------
// ---SetupFake Sets everything to appropriate values if the word is---
// ---known to be bad before recognition.------------------------------
// The rebuild_word is also in BLN space, but represents the final best
// segmentation of the word. Its length is therefore the same as box_word.
TWERD *rebuild_word = nullptr; // BLN best segmented word.
// The box_word is in the original image coordinate space. It is the
// bounding boxes of the rebuild_word, after denormalization.
// The length of box_word matches rebuild_word, best_state (if set) and
// correct_text (if set), as well as best_choice and represents the
// number of classified units in the output.
tesseract::BoxWord *box_word = nullptr; // Denormalized output boxes.
// The Tesseract that was used to recognize this word. Just a borrowed
// pointer. Note: Tesseract's class definition is in a higher-level library.
// We avoid introducing a cyclic dependency by not using the Tesseract
// within WERD_RES. We are just storing it to provide access to it
// for the top-level multi-language controller, and maybe for output of
// the recognized language.
// tesseract points to data owned elsewhere.
tesseract::Tesseract *tesseract = nullptr;
// The best_state stores the relationship between chopped_word and
// rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i]
// adjacent blobs in chopped_word. The seams in seam_array are hidden
// within a rebuild_word blob and revealed between them.
std::vector<int> best_state; // Number of blobs in each best blob.
// The correct_text is used during training and adaption to carry the
// text to the training system without the need for a unicharset. There
// is one entry in the vector for each blob in rebuild_word and box_word.
std::vector<std::string> correct_text;
// Less-well documented members.
// TODO(rays) Add more documentation here.
WERD_CHOICE *ep_choice = nullptr; // ep text TODO(rays) delete this.
REJMAP reject_map; // best_choice rejects
bool tess_failed = false;
/*
If tess_failed is true, one of the following tests failed when Tess
returned:
- The outword blob list was not the same length as the best_choice string;
- The best_choice string contained ALL blanks;
- The best_choice string was zero length
*/
bool tess_accepted = false; // Tess thinks its ok?
bool tess_would_adapt = false; // Tess would adapt?
bool done = false; // ready for output?
bool small_caps = false; // word appears to be small caps
bool odd_size = false; // word is bigger than line or leader dots.
// The fontinfos are pointers to data owned by the classifier.
const FontInfo *fontinfo = nullptr;
const FontInfo *fontinfo2 = nullptr;
int8_t fontinfo_id_count = 0; // number of votes
int8_t fontinfo_id2_count = 0; // number of votes
bool guessed_x_ht = true;
bool guessed_caps_ht = true;
CRUNCH_MODE unlv_crunch_mode = CR_NONE;
float x_height = 0.0f; // post match estimate
float caps_height = 0.0f; // post match estimate
float baseline_shift = 0.0f; // post match estimate.
// Certainty score for the spaces either side of this word (LSTM mode).
// MIN this value with the actual word certainty.
float space_certainty = 0.0f;
/*
To deal with fuzzy spaces we need to be able to combine "words" to form
combinations when we suspect that the gap is a non-space. The (new) text
ord code generates separate words for EVERY fuzzy gap - flags in the word
indicate whether the gap is below the threshold (fuzzy kern) and is thus
NOT a real word break by default, or above the threshold (fuzzy space) and
this is a real word break by default.
The WERD_RES list contains all these words PLUS "combination" words built
out of (copies of) the words split by fuzzy kerns. The separate parts have
their "part_of_combo" flag set true and should be IGNORED on a default
reading of the list.
Combination words are FOLLOWED by the sequence of part_of_combo words
which they combine.
*/
bool combination = false; // of two fuzzy gap wds
bool part_of_combo = false; // part of a combo
bool reject_spaces = false; // Reject spacing?
WERD_RES() = default;
WERD_RES(WERD *the_word) {
word = the_word;
}
// Deep copies everything except the ratings MATRIX.
// To get that use deep_copy below.
WERD_RES(const WERD_RES &source) : ELIST_LINK(source) {
// combination is used in function Clear which is called from operator=.
combination = false;
*this = source; // see operator=
}
~WERD_RES();
// Returns the UTF-8 string for the given blob index in the best_choice word,
// given that we know whether we are in a right-to-left reading context.
// This matters for mirrorable characters such as parentheses. We recognize
// characters purely based on their shape on the page, and by default produce
// the corresponding unicode for a left-to-right context.
const char *BestUTF8(int blob_index, bool in_rtl_context) const {
if (blob_index < 0 || best_choice == nullptr || blob_index >= best_choice->length()) {
return nullptr;
}
UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size()) {
return nullptr;
}
UNICHAR_ID mirrored = uch_set->get_mirror(id);
if (in_rtl_context && mirrored > 0) {
id = mirrored;
}
return uch_set->id_to_unichar_ext(id);
}
// Returns the UTF-8 string for the given blob index in the raw_choice word.
const char *RawUTF8(int blob_index) const {
if (blob_index < 0 || blob_index >= raw_choice->length()) {
return nullptr;
}
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size()) {
return nullptr;
}
return uch_set->id_to_unichar(id);
}
UNICHARSET::Direction SymbolDirection(int blob_index) const {
if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0) {
return UNICHARSET::U_OTHER_NEUTRAL;
}
return uch_set->get_direction(best_choice->unichar_id(blob_index));
}
bool AnyRtlCharsInWord() const {
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
return false;
}
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
continue; // Ignore illegal chars.
}
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
return true;
}
}
return false;
}
bool AnyLtrCharsInWord() const {
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
return false;
}
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
continue; // Ignore illegal chars.
}
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) {
return true;
}
}
return false;
}
// Return whether the blobs in this WERD_RES 0, 1,... come from an engine
// that gave us the unichars in reading order (as opposed to strict left
// to right).
bool UnicharsInReadingOrder() const {
return best_choice->unichars_in_script_order();
}
void Clear();
void ClearResults();
void ClearWordChoices();
void ClearRatings();
// Deep copies everything except the ratings MATRIX.
// To get that use deep_copy below.
WERD_RES &operator=(const WERD_RES &source); // from this
void CopySimpleFields(const WERD_RES &source);
// Initializes a blank (default constructed) WERD_RES from one that has
// already been recognized.
// Use SetupFor*Recognition afterwards to complete the setup and make
// it ready for a retry recognition.
void InitForRetryRecognition(const WERD_RES &source);
// Sets up the members used in recognition: bln_boxes, chopped_word,
// seam_array, denorm. Returns false if
// the word is empty and sets up fake results. If use_body_size is
// true and row->body_size is set, then body_size will be used for
// blob normalization instead of xheight + ascrise. This flag is for
// those languages that are using CJK pitch model and thus it has to
// be true if and only if tesseract->textord_use_cjk_fp_model is
// true.
// If allow_detailed_fx is true, the feature extractor will receive fine
// precision outline information, allowing smoother features and better
// features on low resolution images.
// The norm_mode sets the default mode for normalization in absence
// of any of the above flags. It should really be a tesseract::OcrEngineMode
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
// Returns false if the word is empty and sets up fake results.
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
bool use_body_size, bool allow_detailed_fx, ROW *row,
const BLOCK *block);
// Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
// accumulators from a made chopped word. We presume the fields are already
// empty.
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in);
// Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void SetupFake(const UNICHARSET &uch);
// Set the word as having the script of the input unicharset.
void SetupWordScript(const UNICHARSET &unicharset_in);
// Sets up the blamer_bundle if it is not null, using the initialized denorm.
void SetupBlamerBundle();
// Computes the blob_widths and blob_gaps from the chopped_word.
void SetupBlobWidthsAndGaps();
// Updates internal data to account for a new SEAM (chop) at the given
// blob_number. Fixes the ratings matrix and states in the choices, as well
// as the blob widths and gaps.
void InsertSeam(int blob_number, SEAM *seam);
// Returns true if all the word choices except the first have adjust_factors
// worse than the given threshold.
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const;
// Returns true if the current word is ambiguous (by number of answers or
// by dangerous ambigs.)
bool IsAmbiguous();
// Returns true if the ratings matrix size matches the sum of each of the
// segmentation states.
bool StatesAllValid();
// Prints a list of words found if debug is true or the word result matches
// the word_to_debug.
void DebugWordChoices(bool debug, const char *word_to_debug);
// Prints the top choice along with the accepted/done flags.
void DebugTopChoice(const char *msg) const;
// Removes from best_choices all choices which are not within a reasonable
// range of the best choice.
void FilterWordChoices(int debug_level);
// Computes a set of distance thresholds used to control adaption.
// Compares the best choice for the current word to the best raw choice
// to determine which characters were classified incorrectly by the
// classifier. Then places a separate threshold into thresholds for each
// character in the word. If the classifier was correct, max_rating is placed
// into thresholds. If the classifier was incorrect, the mean match rating
// (error percentage) of the classifier's incorrect choice minus some margin
// is placed into thresholds. This can then be used by the caller to try to
// create a new template for the desired class that will classify the
// character with a rating better than the threshold value. The match rating
// placed into thresholds is never allowed to be below min_rating in order to
// prevent trying to make overly tight templates.
// min_rating limits how tight to make a template.
// max_rating limits how loose to make a template.
// rating_margin denotes the amount of margin to put in template.
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating,
float rating_margin, float *thresholds);
// Saves a copy of the word_choice if it has the best unadjusted rating.
// Returns true if the word_choice was the new best.
bool LogNewRawChoice(WERD_CHOICE *word_choice);
// Consumes word_choice by adding it to best_choices, (taking ownership) if
// the certainty for word_choice is some distance of the best choice in
// best_choices, or by deleting the word_choice and returning false.
// The best_choices list is kept in sorted order by rating. Duplicates are
// removed, and the list is kept no longer than max_num_choices in length.
// Returns true if the word_choice is still a valid pointer.
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice);
// Prints a brief list of all the best choices.
void PrintBestChoices() const;
// Returns the sum of the widths of the blob between start_blob and last_blob
// inclusive.
int GetBlobsWidth(int start_blob, int last_blob) const;
// Returns the width of a gap between the specified blob and the next one.
int GetBlobsGap(int blob_index) const;
// Returns the BLOB_CHOICE corresponding to the given index in the
// best choice word taken from the appropriate cell in the ratings MATRIX.
// Borrowed pointer, so do not delete. May return nullptr if there is no
// BLOB_CHOICE matching the unichar_id at the given index.
BLOB_CHOICE *GetBlobChoice(int index) const;
// Returns the BLOB_CHOICE_LIST corresponding to the given index in the
// best choice word taken from the appropriate cell in the ratings MATRIX.
// Borrowed pointer, so do not delete.
BLOB_CHOICE_LIST *GetBlobChoices(int index) const;
// Moves the results fields from word to this. This takes ownership of all
// the data, so src can be destructed.
// word1.ConsumeWordResult(word);
// delete word;
// is simpler and faster than:
// word1 = *word;
// delete word;
// as it doesn't need to copy and reallocate anything.
void ConsumeWordResults(WERD_RES *word);
// Replace the best choice and rebuild box word.
// choice must be from the current best_choices list.
void ReplaceBestChoice(WERD_CHOICE *choice);
// Builds the rebuild_word and sets the best_state from the chopped_word and
// the best_choice->state.
void RebuildBestState();
// Copies the chopped_word to the rebuild_word, faking a best_state as well.
// Also sets up the output box_word.
void CloneChoppedToRebuild();
// Sets/replaces the box_word with one made from the rebuild_word.
void SetupBoxWord();
// Sets up the script positions in the best_choice using the best_choice
// to get the unichars, and the unicharset to get the target positions.
void SetScriptPositions();
// Sets all the blobs in all the words (best choice and alternates) to be
// the given position. (When a sub/superscript is recognized as a separate
// word, it falls victim to the rule that a whole word cannot be sub or
// superscript, so this function overrides that problem.)
void SetAllScriptPositions(tesseract::ScriptPos position);
// Classifies the word with some already-calculated BLOB_CHOICEs.
// The choices are an array of blob_count pointers to BLOB_CHOICE,
// providing a single classifier result for each blob.
// The BLOB_CHOICEs are consumed and the word takes ownership.
// The number of blobs in the box_word must match blob_count.
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices);
// Creates a WERD_CHOICE for the word using the top choices from the leading
// diagonal of the ratings matrix.
void FakeWordFromRatings(PermuterType permuter);
// Copies the best_choice strings to the correct_text for adaption/training.
void BestChoiceToCorrectText();
// Merges 2 adjacent blobs in the result if the permanent callback
// class_cb returns other than INVALID_UNICHAR_ID, AND the permanent
// callback box_cb is nullptr or returns true, setting the merged blob
// result to the class returned from class_cb.
// Returns true if anything was merged.
bool ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb,
std::function<bool(const TBOX &, const TBOX &)> box_cb);
// Merges 2 adjacent blobs in the result (index and index+1) and corrects
// all the data to account for the change.
void MergeAdjacentBlobs(int index);
// Callback helper for fix_quotes returns a double quote if both
// arguments are quote, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2);
void fix_quotes();
// Callback helper for fix_hyphens returns UNICHAR_ID of - if both
// arguments are hyphen, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2);
// Callback helper for fix_hyphens returns true if box1 and box2 overlap
// (assuming both on the same textline, are in order and a chopped em dash.)
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2);
void fix_hyphens();
// Callback helper for merge_tess_fails returns a space if both
// arguments are space, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2);
void merge_tess_fails();
// Returns a really deep copy of *src, including the ratings MATRIX.
static WERD_RES *deep_copy(const WERD_RES *src) {
auto *result = new WERD_RES(*src);
// That didn't copy the ratings, but we want a copy if there is one to
// begin with.
if (src->ratings != nullptr) {
result->ratings = src->ratings->DeepCopy();
}
return result;
}
// Copy blobs from word_res onto this word (eliminating spaces between).
// Since this may be called bidirectionally OR both the BOL and EOL flags.
void copy_on(WERD_RES *word_res) { // from this word
word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
word->copy_on(word_res->word);
}
// Returns true if the collection of count pieces, starting at start, are all
// natural connected components, ie there are no real chops involved.
bool PiecesAllNatural(int start, int count) const;
};
/*************************************************************************
* PAGE_RES_IT - Page results iterator
*************************************************************************/
class TESS_API PAGE_RES_IT {
public:
PAGE_RES *page_res; // page being iterated
PAGE_RES_IT() = default;
PAGE_RES_IT(PAGE_RES *the_page_res) { // page result
page_res = the_page_res;
restart_page(); // ready to scan
}
// Do two PAGE_RES_ITs point at the same word?
// This is much cheaper than cmp().
bool operator==(const PAGE_RES_IT &other) const {
return word_res == other.word_res && row_res == other.row_res && block_res == other.block_res;
}
bool operator!=(const PAGE_RES_IT &other) const {
return !(*this == other);
}
// Given another PAGE_RES_IT to the same page,
// this before other: -1
// this equal to other: 0
// this later than other: 1
int cmp(const PAGE_RES_IT &other) const;
WERD_RES *restart_page() {
return start_page(false); // Skip empty blocks.
}
WERD_RES *restart_page_with_empties() {
return start_page(true); // Allow empty blocks.
}
WERD_RES *start_page(bool empty_ok);
WERD_RES *restart_row();
// ============ Methods that mutate the underling structures ===========
// Note that these methods will potentially invalidate other PAGE_RES_ITs
// and are intended to be used only while a single PAGE_RES_IT is active.
// This problem needs to be taken into account if these mutation operators
// are ever provided to PageIterator or its subclasses.
// Inserts the new_word and a corresponding WERD_RES before the current
// position. The simple fields of the WERD_RES are copied from clone_res and
// the resulting WERD_RES is returned for further setup with best_choice etc.
WERD_RES *InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word);
// Replaces the current WERD/WERD_RES with the given words. The given words
// contain fake blobs that indicate the position of the characters. These are
// replaced with real blobs from the current word as much as possible.
void ReplaceCurrentWord(PointerVector<WERD_RES> *words);
// Deletes the current WERD_RES and its underlying WERD.
void DeleteCurrentWord();
// Makes the current word a fuzzy space if not already fuzzy. Updates
// corresponding part of combo if required.
void MakeCurrentWordFuzzy();
WERD_RES *forward() { // Get next word.
return internal_forward(false, false);
}
// Move forward, but allow empty blocks to show as single nullptr words.
WERD_RES *forward_with_empties() {
return internal_forward(false, true);
}
WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph
WERD_RES *forward_block(); // get first word in next non-empty block
WERD_RES *prev_word() const { // previous word
return prev_word_res;
}
ROW_RES *prev_row() const { // row of prev word
return prev_row_res;
}
BLOCK_RES *prev_block() const { // block of prev word
return prev_block_res;
}
WERD_RES *word() const { // current word
return word_res;
}
ROW_RES *row() const { // row of current word
return row_res;
}
BLOCK_RES *block() const { // block of cur. word
return block_res;
}
WERD_RES *next_word() const { // next word
return next_word_res;
}
ROW_RES *next_row() const { // row of next word
return next_row_res;
}
BLOCK_RES *next_block() const { // block of next word
return next_block_res;
}
void rej_stat_word(); // for page/block/row
void ResetWordIterator();
private:
WERD_RES *internal_forward(bool new_block, bool empty_ok);
WERD_RES *prev_word_res; // previous word
ROW_RES *prev_row_res; // row of prev word
BLOCK_RES *prev_block_res; // block of prev word
WERD_RES *word_res; // current word
ROW_RES *row_res; // row of current word
BLOCK_RES *block_res; // block of cur. word
WERD_RES *next_word_res; // next word
ROW_RES *next_row_res; // row of next word
BLOCK_RES *next_block_res; // block of next word
BLOCK_RES_IT block_res_it; // iterators
ROW_RES_IT row_res_it;
WERD_RES_IT word_res_it;
// Iterators used to get the state of word_res_it for the current word.
// Since word_res_it is 2 words further on, this is otherwise hard to do.
WERD_RES_IT wr_it_of_current_word;
WERD_RES_IT wr_it_of_next_word;
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,43 @@
///////////////////////////////////////////////////////////////////////
// File: params_training_featdef.cpp
// Description: Utility functions for params training features.
// Author: David Eger
// Created: Mon Jun 11 11:26:42 PDT 2012
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include <cstring>
#include "params_training_featdef.h"
namespace tesseract {
int ParamsTrainingFeatureByName(const char *name) {
if (name == nullptr) {
return -1;
}
int array_size =
sizeof(kParamsTrainingFeatureTypeName) / sizeof(kParamsTrainingFeatureTypeName[0]);
for (int i = 0; i < array_size; i++) {
if (kParamsTrainingFeatureTypeName[i] == nullptr) {
continue;
}
if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) {
return i;
}
}
return -1;
}
} // namespace tesseract

View File

@ -0,0 +1,154 @@
///////////////////////////////////////////////////////////////////////
// File: params_training_featdef.h
// Description: Feature definitions for params training.
// Author: Rika Antonova
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
#define TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_
#include <cstring> // for memset
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of unichars in the small and medium sized words
static const int kMaxSmallWordUnichars = 3;
static const int kMaxMediumWordUnichars = 6;
// Raw features extracted from a single OCR hypothesis.
// The features are normalized (by outline length or number of unichars as
// appropriate) real-valued quantities with unbounded range and
// unknown distribution.
// Normalization / binarization of these features is done at a later stage.
// Note: when adding new fields to this enum make sure to modify
// kParamsTrainingFeatureTypeName
enum kParamsTrainingFeatureType {
// Digits
PTRAIN_DIGITS_SHORT, // 0
PTRAIN_DIGITS_MED, // 1
PTRAIN_DIGITS_LONG, // 2
// Number or pattern (NUMBER_PERM, USER_PATTERN_PERM)
PTRAIN_NUM_SHORT, // 3
PTRAIN_NUM_MED, // 4
PTRAIN_NUM_LONG, // 5
// Document word (DOC_DAWG_PERM)
PTRAIN_DOC_SHORT, // 6
PTRAIN_DOC_MED, // 7
PTRAIN_DOC_LONG, // 8
// Word (SYSTEM_DAWG_PERM, USER_DAWG_PERM, COMPOUND_PERM)
PTRAIN_DICT_SHORT, // 9
PTRAIN_DICT_MED, // 10
PTRAIN_DICT_LONG, // 11
// Frequent word (FREQ_DAWG_PERM)
PTRAIN_FREQ_SHORT, // 12
PTRAIN_FREQ_MED, // 13
PTRAIN_FREQ_LONG, // 14
PTRAIN_SHAPE_COST_PER_CHAR, // 15
PTRAIN_NGRAM_COST_PER_CHAR, // 16
PTRAIN_NUM_BAD_PUNC, // 17
PTRAIN_NUM_BAD_CASE, // 18
PTRAIN_XHEIGHT_CONSISTENCY, // 19
PTRAIN_NUM_BAD_CHAR_TYPE, // 20
PTRAIN_NUM_BAD_SPACING, // 21
PTRAIN_NUM_BAD_FONT, // 22
PTRAIN_RATING_PER_CHAR, // 23
PTRAIN_NUM_FEATURE_TYPES
};
static const char *const kParamsTrainingFeatureTypeName[] = {
"PTRAIN_DIGITS_SHORT", // 0
"PTRAIN_DIGITS_MED", // 1
"PTRAIN_DIGITS_LONG", // 2
"PTRAIN_NUM_SHORT", // 3
"PTRAIN_NUM_MED", // 4
"PTRAIN_NUM_LONG", // 5
"PTRAIN_DOC_SHORT", // 6
"PTRAIN_DOC_MED", // 7
"PTRAIN_DOC_LONG", // 8
"PTRAIN_DICT_SHORT", // 9
"PTRAIN_DICT_MED", // 10
"PTRAIN_DICT_LONG", // 11
"PTRAIN_FREQ_SHORT", // 12
"PTRAIN_FREQ_MED", // 13
"PTRAIN_FREQ_LONG", // 14
"PTRAIN_SHAPE_COST_PER_CHAR", // 15
"PTRAIN_NGRAM_COST_PER_CHAR", // 16
"PTRAIN_NUM_BAD_PUNC", // 17
"PTRAIN_NUM_BAD_CASE", // 18
"PTRAIN_XHEIGHT_CONSISTENCY", // 19
"PTRAIN_NUM_BAD_CHAR_TYPE", // 20
"PTRAIN_NUM_BAD_SPACING", // 21
"PTRAIN_NUM_BAD_FONT", // 22
"PTRAIN_RATING_PER_CHAR", // 23
};
// Returns the index of the given feature (by name),
// or -1 meaning the feature is unknown.
int ParamsTrainingFeatureByName(const char *name);
// Entry with features extracted from a single OCR hypothesis for a word.
struct ParamsTrainingHypothesis {
ParamsTrainingHypothesis() : cost(0.0) {
memset(features, 0, sizeof(features));
}
ParamsTrainingHypothesis(const ParamsTrainingHypothesis &other) {
memcpy(features, other.features, sizeof(features));
str = other.str;
cost = other.cost;
}
ParamsTrainingHypothesis &operator=(const ParamsTrainingHypothesis &other) {
memcpy(features, other.features, sizeof(features));
str = other.str;
cost = other.cost;
return *this;
}
std::string str; // string corresponding to word hypothesis (for debugging)
float features[PTRAIN_NUM_FEATURE_TYPES];
float cost; // path cost computed by segsearch
};
// A list of hypotheses explored during one run of segmentation search.
using ParamsTrainingHypothesisList = std::vector<ParamsTrainingHypothesis>;
// A bundle that accumulates all of the hypothesis lists explored during all
// of the runs of segmentation search on a word (e.g. a list of hypotheses
// explored on PASS1, PASS2, fix xheight pass, etc).
class ParamsTrainingBundle {
public:
ParamsTrainingBundle() = default;
// Starts a new hypothesis list.
// Should be called at the beginning of a new run of the segmentation search.
void StartHypothesisList() {
hyp_list_vec.emplace_back();
}
// Adds a new ParamsTrainingHypothesis to the current hypothesis list
// and returns the reference to the newly added entry.
ParamsTrainingHypothesis &AddHypothesis(const ParamsTrainingHypothesis &other) {
if (hyp_list_vec.empty()) {
StartHypothesisList();
}
hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other));
return hyp_list_vec.back().back();
}
std::vector<ParamsTrainingHypothesisList> hyp_list_vec;
};
} // namespace tesseract
#endif // TESSERACT_WORDREC_PARAMS_TRAINING_FEATDEF_H_

View File

@ -0,0 +1,388 @@
/**********************************************************************
* File: pdblock.cpp
* Description: PDBLK member functions and iterator functions.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "pdblock.h"
#include <allheaders.h>
#include <cinttypes> // for PRId32
#include <cstdlib>
#include <memory> // std::unique_ptr
namespace tesseract {
#define BLOCK_LABEL_HEIGHT 150 // char height of block id
constexpr ERRCODE BADBLOCKLINE("Y coordinate in block out of bounds");
constexpr ERRCODE LOSTBLOCKLINE("Can't find rectangle for line");
/**********************************************************************
* PDBLK::PDBLK
*
* Constructor for a simple rectangular block.
**********************************************************************/
PDBLK::PDBLK( // rectangular block
int16_t xmin, // bottom left
int16_t ymin, int16_t xmax, // top right
int16_t ymax)
: box(ICOORD(xmin, ymin), ICOORD(xmax, ymax)) {
// boundaries
ICOORDELT_IT left_it = &leftside;
ICOORDELT_IT right_it = &rightside;
hand_poly = nullptr;
left_it.set_to_list(&leftside);
right_it.set_to_list(&rightside);
// make default box
left_it.add_to_end(new ICOORDELT(xmin, ymin));
left_it.add_to_end(new ICOORDELT(xmin, ymax));
right_it.add_to_end(new ICOORDELT(xmax, ymin));
right_it.add_to_end(new ICOORDELT(xmax, ymax));
index_ = 0;
}
/**********************************************************************
* PDBLK::set_sides
*
* Sets left and right vertex lists
**********************************************************************/
void PDBLK::set_sides( // set vertex lists
ICOORDELT_LIST *left, // left vertices
ICOORDELT_LIST *right // right vertices
) {
// boundaries
ICOORDELT_IT left_it = &leftside;
ICOORDELT_IT right_it = &rightside;
leftside.clear();
left_it.move_to_first();
left_it.add_list_before(left);
rightside.clear();
right_it.move_to_first();
right_it.add_list_before(right);
}
/**********************************************************************
* PDBLK::contains
*
* Return true if the given point is within the block.
**********************************************************************/
bool PDBLK::contains( // test containment
ICOORD pt // point to test
) {
BLOCK_RECT_IT it = this; // rectangle iterator
ICOORD bleft, tright; // corners of rectangle
for (it.start_block(); !it.cycled_rects(); it.forward()) {
// get rectangle
it.bounding_box(bleft, tright);
// inside rect
if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() &&
pt.y() <= tright.y()) {
return true; // is inside
}
}
return false; // not inside
}
/**********************************************************************
* PDBLK::move
*
* Reposition block
**********************************************************************/
void PDBLK::move( // reposition block
const ICOORD vec // by vector
) {
ICOORDELT_IT it(&leftside);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
*(it.data()) += vec;
}
it.set_to_list(&rightside);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
*(it.data()) += vec;
}
box.move(vec);
}
// Returns a binary Pix mask with a 1 pixel for every pixel within the
// block. Rotates the coordinate system by rerotation prior to rendering.
Image PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
TBOX rotated_box(box);
rotated_box.rotate(rerotation);
Image pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
if (hand_poly != nullptr) {
// We are going to rotate, so get a deep copy of the points and
// make a new POLY_BLOCK with it.
ICOORDELT_LIST polygon;
polygon.deep_copy(hand_poly->points(), ICOORDELT::deep_copy);
POLY_BLOCK image_block(&polygon, hand_poly->isA());
image_block.rotate(rerotation);
// Block outline is a polygon, so use a PB_LINE_IT to get the
// rasterized interior. (Runs of interior pixels on a line.)
auto *lines = new PB_LINE_IT(&image_block);
for (int y = box.bottom(); y < box.top(); ++y) {
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y));
if (!segments->empty()) {
ICOORDELT_IT s_it(segments.get());
// Each element of segments is a start x and x size of the
// run of interior pixels.
for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) {
int start = s_it.data()->x();
int xext = s_it.data()->y();
// Set the run of pixels to 1.
pixRasterop(pix, start - rotated_box.left(),
rotated_box.height() - 1 - (y - rotated_box.bottom()), xext, 1, PIX_SET,
nullptr, 0, 0);
}
}
}
delete lines;
} else {
// Just fill the whole block as there is only a bounding box.
pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), PIX_SET, nullptr, 0, 0);
}
if (mask_box != nullptr) {
*mask_box = rotated_box;
}
return pix;
}
/**********************************************************************
* PDBLK::plot
*
* Plot the outline of a block in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void PDBLK::plot( // draw outline
ScrollView *window, // window to draw in
int32_t serial, // serial number
ScrollView::Color colour // colour to draw in
) {
ICOORD startpt; // start of outline
ICOORD endpt; // end of outline
ICOORD prevpt; // previous point
ICOORDELT_IT it = &leftside; // iterator
// set the colour
window->Pen(colour);
window->TextAttributes("Times", BLOCK_LABEL_HEIGHT, false, false, false);
if (hand_poly != nullptr) {
hand_poly->plot(window, serial);
} else if (!leftside.empty()) {
startpt = *(it.data()); // bottom left corner
// tprintf("Block %d bottom left is (%d,%d)\n",
// serial,startpt.x(),startpt.y());
char temp_buff[34];
# if !defined(_WIN32) || defined(__MINGW32__)
snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, serial);
# else
_ultoa(serial, temp_buff, 10);
# endif
window->Text(startpt.x(), startpt.y(), temp_buff);
window->SetCursor(startpt.x(), startpt.y());
do {
prevpt = *(it.data()); // previous point
it.forward(); // move to next point
// draw round corner
window->DrawTo(prevpt.x(), it.data()->y());
window->DrawTo(it.data()->x(), it.data()->y());
} while (!it.at_last()); // until end of list
endpt = *(it.data()); // end point
// other side of boundary
window->SetCursor(startpt.x(), startpt.y());
it.set_to_list(&rightside);
prevpt = startpt;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
// draw round corner
window->DrawTo(prevpt.x(), it.data()->y());
window->DrawTo(it.data()->x(), it.data()->y());
prevpt = *(it.data()); // previous point
}
// close boundary
window->DrawTo(endpt.x(), endpt.y());
}
}
#endif
/**********************************************************************
* PDBLK::operator=
*
* Assignment - duplicate the block structure, but with an EMPTY row list.
**********************************************************************/
PDBLK &PDBLK::operator=( // assignment
const PDBLK &source // from this
) {
// this->ELIST_LINK::operator=(source);
if (!leftside.empty()) {
leftside.clear();
}
if (!rightside.empty()) {
rightside.clear();
}
leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy);
rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy);
box = source.box;
return *this;
}
/**********************************************************************
* BLOCK_RECT_IT::BLOCK_RECT_IT
*
* Construct a block rectangle iterator.
**********************************************************************/
BLOCK_RECT_IT::BLOCK_RECT_IT(
// iterate rectangles
PDBLK *blkptr // from block
)
: left_it(&blkptr->leftside), right_it(&blkptr->rightside) {
block = blkptr; // remember block
// non empty list
if (!blkptr->leftside.empty()) {
start_block(); // ready for iteration
}
}
/**********************************************************************
* BLOCK_RECT_IT::set_to_block
*
* Start a new block.
**********************************************************************/
void BLOCK_RECT_IT::set_to_block( // start (new) block
PDBLK *blkptr) { // block to start
block = blkptr; // remember block
// set iterators
left_it.set_to_list(&blkptr->leftside);
right_it.set_to_list(&blkptr->rightside);
if (!blkptr->leftside.empty()) {
start_block(); // ready for iteration
}
}
/**********************************************************************
* BLOCK_RECT_IT::start_block
*
* Restart a block.
**********************************************************************/
void BLOCK_RECT_IT::start_block() { // start (new) block
left_it.move_to_first();
right_it.move_to_first();
left_it.mark_cycle_pt();
right_it.mark_cycle_pt();
ymin = left_it.data()->y(); // bottom of first box
ymax = left_it.data_relative(1)->y();
if (right_it.data_relative(1)->y() < ymax) {
// smallest step
ymax = right_it.data_relative(1)->y();
}
}
/**********************************************************************
* BLOCK_RECT_IT::forward
*
* Move to the next rectangle in the block.
**********************************************************************/
void BLOCK_RECT_IT::forward() { // next rectangle
if (!left_it.empty()) { // non-empty list
if (left_it.data_relative(1)->y() == ymax) {
left_it.forward(); // move to meet top
}
if (right_it.data_relative(1)->y() == ymax) {
right_it.forward();
}
// last is special
if (left_it.at_last() || right_it.at_last()) {
left_it.move_to_first(); // restart
right_it.move_to_first();
// now at bottom
ymin = left_it.data()->y();
} else {
ymin = ymax; // new bottom
}
// next point
ymax = left_it.data_relative(1)->y();
if (right_it.data_relative(1)->y() < ymax) {
// least step forward
ymax = right_it.data_relative(1)->y();
}
}
}
/**********************************************************************
* BLOCK_LINE_IT::get_line
*
* Get the the start and width of a line in the block.
**********************************************************************/
int16_t BLOCK_LINE_IT::get_line( // get a line
int16_t y, // line to get
int16_t &xext // output extent
) {
ICOORD bleft; // bounding box
ICOORD tright; // of block & rect
// get block box
block->bounding_box(bleft, tright);
if (y < bleft.y() || y >= tright.y()) {
// block->print(stderr,false);
BADBLOCKLINE.error("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y);
}
// get rectangle box
rect_it.bounding_box(bleft, tright);
// inside rectangle
if (y >= bleft.y() && y < tright.y()) {
// width of line
xext = tright.x() - bleft.x();
return bleft.x(); // start of line
}
for (rect_it.start_block(); !rect_it.cycled_rects(); rect_it.forward()) {
// get rectangle box
rect_it.bounding_box(bleft, tright);
// inside rectangle
if (y >= bleft.y() && y < tright.y()) {
// width of line
xext = tright.x() - bleft.x();
return bleft.x(); // start of line
}
}
LOSTBLOCKLINE.error("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y);
return 0; // dummy to stop warning
}
} // namespace tesseract

View File

@ -0,0 +1,184 @@
/**********************************************************************
* File: pdblock.h (Formerly pdblk.h)
* Description: Page block class definition.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef PDBLOCK_H
#define PDBLOCK_H
#include "clst.h"
#include "polyblk.h"
struct Pix;
namespace tesseract {
class PDBLK; // forward decl
CLISTIZEH(PDBLK)
/// page block
class PDBLK {
friend class BLOCK_RECT_IT; ///< block iterator
friend class BLOCK; ///< Page Block
public:
/// empty constructor
PDBLK() {
hand_poly = nullptr;
index_ = 0;
}
/// simple constructor
PDBLK(int16_t xmin, ///< bottom left
int16_t ymin,
int16_t xmax, ///< top right
int16_t ymax);
/// set vertex lists
///@param left list of left vertices
///@param right list of right vertices
void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right);
/// destructor
~PDBLK() {
delete hand_poly;
}
POLY_BLOCK *poly_block() const {
return hand_poly;
}
/// set the poly block
void set_poly_block(POLY_BLOCK *blk) {
hand_poly = blk;
}
/// get box
void bounding_box(ICOORD &bottom_left, // bottom left
ICOORD &top_right) const { // topright
bottom_left = box.botleft();
top_right = box.topright();
}
/// get real box
const TBOX &bounding_box() const {
return box;
}
int index() const {
return index_;
}
void set_index(int value) {
index_ = value;
}
/// is pt inside block
bool contains(ICOORD pt);
/// reposition block
void move(const ICOORD vec); // by vector
// Returns a binary Pix mask with a 1 pixel for every pixel within the
// block. Rotates the coordinate system by rerotation prior to rendering.
// If not nullptr, mask_box is filled with the position box of the returned
// mask image.
Image render_mask(const FCOORD &rerotation, TBOX *mask_box);
#ifndef GRAPHICS_DISABLED
/// draw histogram
///@param window window to draw in
///@param serial serial number
///@param colour colour to draw in
void plot(ScrollView *window, int32_t serial, ScrollView::Color colour);
#endif // !GRAPHICS_DISABLED
/// assignment
///@param source from this
PDBLK &operator=(const PDBLK &source);
protected:
POLY_BLOCK *hand_poly; ///< weird as well
ICOORDELT_LIST leftside; ///< left side vertices
ICOORDELT_LIST rightside; ///< right side vertices
TBOX box; ///< bounding box
int index_; ///< Serial number of this block.
};
class BLOCK_RECT_IT // rectangle iterator
{
public:
/// constructor
///@param blkptr block to iterate
BLOCK_RECT_IT(PDBLK *blkptr);
/// start (new) block
void set_to_block(PDBLK *blkptr); // block to iterate
/// start iteration
void start_block();
/// next rectangle
void forward();
/// test end
bool cycled_rects() const {
return left_it.cycled_list() && right_it.cycled_list();
}
/// current rectangle
///@param bleft bottom left
///@param tright top right
void bounding_box(ICOORD &bleft, ICOORD &tright) {
// bottom left
bleft = ICOORD(left_it.data()->x(), ymin);
// top right
tright = ICOORD(right_it.data()->x(), ymax);
}
private:
int16_t ymin = 0; ///< bottom of rectangle
int16_t ymax = 0; ///< top of rectangle
PDBLK *block = nullptr; ///< block to iterate
ICOORDELT_IT left_it; ///< boundary iterators
ICOORDELT_IT right_it;
};
/// rectangle iterator
class BLOCK_LINE_IT {
public:
/// constructor
///@param blkptr from block
BLOCK_LINE_IT(PDBLK *blkptr) : rect_it(blkptr) {
block = blkptr; // remember block
}
/// start (new) block
///@param blkptr block to start
void set_to_block(PDBLK *blkptr) {
block = blkptr; // remember block
// set iterator
rect_it.set_to_block(blkptr);
}
/// get a line
///@param y line to get
///@param xext output extent
int16_t get_line(int16_t y, int16_t &xext);
private:
PDBLK *block; ///< block to iterate
BLOCK_RECT_IT rect_it; ///< rectangle iterator
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,158 @@
/**********************************************************************
* File: points.cpp (Formerly coords.c)
* Description: Member functions for coordinate classes.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#define _USE_MATH_DEFINES // for M_PI
#include "points.h"
#include "helpers.h"
#include "serialis.h"
#include <algorithm>
#include <cmath> // for M_PI
#include <cstdlib>
namespace tesseract {
bool FCOORD::normalise() { // Convert to unit vec
float len = length();
if (len < 0.0000000001) {
return false;
}
xcoord /= len;
ycoord /= len;
return true;
}
bool ICOORD::DeSerialize(TFile *f) {
return f->DeSerialize(&xcoord) && f->DeSerialize(&ycoord);
}
bool ICOORD::Serialize(TFile *f) const {
return f->Serialize(&xcoord) && f->Serialize(&ycoord);
}
// Set from the given x,y, shrinking the vector to fit if needed.
void ICOORD::set_with_shrink(int x, int y) {
// Fit the vector into an ICOORD, which is 16 bit.
int factor = 1;
int max_extent = std::max(abs(x), abs(y));
if (max_extent > INT16_MAX) {
factor = max_extent / INT16_MAX + 1;
}
xcoord = x / factor;
ycoord = y / factor;
}
// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0
// respectively.
static int sign(int x) {
if (x < 0) {
return -1;
} else {
return x > 0 ? 1 : 0;
}
}
// Writes to the given file. Returns false in case of error.
bool ICOORD::Serialize(FILE *fp) const {
return tesseract::Serialize(fp, &xcoord) && tesseract::Serialize(fp, &ycoord);
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool ICOORD::DeSerialize(bool swap, FILE *fp) {
if (!tesseract::DeSerialize(fp, &xcoord)) {
return false;
}
if (!tesseract::DeSerialize(fp, &ycoord)) {
return false;
}
if (swap) {
ReverseN(&xcoord, sizeof(xcoord));
ReverseN(&ycoord, sizeof(ycoord));
}
return true;
}
// Setup for iterating over the pixels in a vector by the well-known
// Bresenham rendering algorithm.
// Starting with major/2 in the accumulator, on each step add major_step,
// and then add minor to the accumulator. When the accumulator >= major
// subtract major and step a minor step.
void ICOORD::setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const {
int abs_x = abs(xcoord);
int abs_y = abs(ycoord);
if (abs_x >= abs_y) {
// X-direction is major.
major_step->xcoord = sign(xcoord);
major_step->ycoord = 0;
minor_step->xcoord = 0;
minor_step->ycoord = sign(ycoord);
*major = abs_x;
*minor = abs_y;
} else {
// Y-direction is major.
major_step->xcoord = 0;
major_step->ycoord = sign(ycoord);
minor_step->xcoord = sign(xcoord);
minor_step->ycoord = 0;
*major = abs_y;
*minor = abs_x;
}
}
// Returns the standard feature direction corresponding to this.
// See binary_angle_plus_pi below for a description of the direction.
uint8_t FCOORD::to_direction() const {
return binary_angle_plus_pi(angle());
}
// Sets this with a unit vector in the given standard feature direction.
void FCOORD::from_direction(uint8_t direction) {
double radians = angle_from_direction(direction);
xcoord = cos(radians);
ycoord = sin(radians);
}
// Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a
// standard feature direction as an unsigned angle in 256ths of a circle
// measured anticlockwise from (-1, 0).
uint8_t FCOORD::binary_angle_plus_pi(double radians) {
return Modulo(IntCastRounded((radians + M_PI) * 128.0 / M_PI), 256);
}
// Inverse of binary_angle_plus_pi returns an angle in radians for the
// given standard feature direction.
double FCOORD::angle_from_direction(uint8_t direction) {
return direction * M_PI / 128.0 - M_PI;
}
// Returns the point on the given line nearest to this, ie the point such
// that the vector point->this is perpendicular to the line.
// The line is defined as a line_point and a dir_vector for its direction.
FCOORD FCOORD::nearest_pt_on_line(const FCOORD &line_point, const FCOORD &dir_vector) const {
FCOORD point_vector(*this - line_point);
// The dot product (%) is |dir_vector||point_vector|cos theta, so dividing by
// the square of the length of dir_vector gives us the fraction of dir_vector
// to add to line1 to get the appropriate point, so
// result = line1 + lambda dir_vector.
double lambda = point_vector % dir_vector / dir_vector.sqlength();
return line_point + (dir_vector * lambda);
}
} // namespace tesseract

View File

@ -0,0 +1,726 @@
/**********************************************************************
* File: points.h (Formerly coords.h)
* Description: Coordinate class definitions.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POINTS_H
#define POINTS_H
#include "elst.h"
#include "errcode.h" // for ASSERT_HOST
#include <tesseract/export.h> // for DLLSYM
#include <cmath> // for sqrt, atan2
#include <cstdio>
namespace tesseract {
class FCOORD;
/// integer coordinate
class ICOORD {
friend class FCOORD;
public:
/// empty constructor
ICOORD() {
xcoord = ycoord = 0; // default zero
}
/// constructor
///@param xin x value
///@param yin y value
ICOORD(int16_t xin, int16_t yin) {
xcoord = xin;
ycoord = yin;
}
/// destructor
~ICOORD() = default;
bool DeSerialize(TFile *f);
bool Serialize(TFile *f) const;
/// access function
int16_t x() const {
return xcoord;
}
/// access_function
int16_t y() const {
return ycoord;
}
/// rewrite function
void set_x(int16_t xin) {
xcoord = xin; // write new value
}
/// rewrite function
void set_y(int16_t yin) { // value to set
ycoord = yin;
}
/// Set from the given x,y, shrinking the vector to fit if needed.
void set_with_shrink(int x, int y);
/// find sq length
float sqlength() const {
return (float)(xcoord * xcoord + ycoord * ycoord);
}
/// find length
float length() const {
return std::sqrt(sqlength());
}
/// sq dist between pts
float pt_to_pt_sqdist(const ICOORD &pt) const {
ICOORD gap;
gap.xcoord = xcoord - pt.xcoord;
gap.ycoord = ycoord - pt.ycoord;
return gap.sqlength();
}
/// Distance between pts
float pt_to_pt_dist(const ICOORD &pt) const {
return std::sqrt(pt_to_pt_sqdist(pt));
}
/// find angle
float angle() const {
return (float)std::atan2(ycoord, xcoord);
}
/// test equality
bool operator==(const ICOORD &other) const {
return xcoord == other.xcoord && ycoord == other.ycoord;
}
/// test inequality
bool operator!=(const ICOORD &other) const {
return xcoord != other.xcoord || ycoord != other.ycoord;
}
/// rotate 90 deg anti
friend ICOORD operator!(const ICOORD &);
/// unary minus
friend ICOORD operator-(const ICOORD &);
/// add
friend ICOORD operator+(const ICOORD &, const ICOORD &);
/// add
friend ICOORD &operator+=(ICOORD &, const ICOORD &);
/// subtract
friend ICOORD operator-(const ICOORD &, const ICOORD &);
/// subtract
friend ICOORD &operator-=(ICOORD &, const ICOORD &);
/// scalar product
friend int32_t operator%(const ICOORD &, const ICOORD &);
/// cross product
friend int32_t operator*(const ICOORD &, const ICOORD &);
/// multiply
friend ICOORD operator*(const ICOORD &, int16_t);
/// multiply
friend ICOORD operator*(int16_t, const ICOORD &);
/// multiply
friend ICOORD &operator*=(ICOORD &, int16_t);
/// divide
friend ICOORD operator/(const ICOORD &, int16_t);
/// divide
friend ICOORD &operator/=(ICOORD &, int16_t);
/// rotate
///@param vec by vector
void rotate(const FCOORD &vec);
/// Setup for iterating over the pixels in a vector by the well-known
/// Bresenham rendering algorithm.
/// Starting with major/2 in the accumulator, on each step move by
/// major_step, and then add minor to the accumulator. When
/// accumulator >= major subtract major and also move by minor_step.
void setup_render(ICOORD *major_step, ICOORD *minor_step, int *major, int *minor) const;
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE *fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE *fp);
protected:
int16_t xcoord; ///< x value
int16_t ycoord; ///< y value
};
class ICOORDELT : public ELIST_LINK,
public ICOORD
// embedded coord list
{
public:
/// empty constructor
ICOORDELT() = default;
/// constructor from ICOORD
ICOORDELT(ICOORD icoord) : ICOORD(icoord) {}
/// constructor
///@param xin x value
///@param yin y value
ICOORDELT(int16_t xin, int16_t yin) {
xcoord = xin;
ycoord = yin;
}
static ICOORDELT *deep_copy(const ICOORDELT *src) {
auto *elt = new ICOORDELT;
*elt = *src;
return elt;
}
};
ELISTIZEH(ICOORDELT)
class TESS_API FCOORD {
public:
/// empty constructor
FCOORD() = default;
/// constructor
///@param xvalue x value
///@param yvalue y value
FCOORD(float xvalue, float yvalue) {
xcoord = xvalue; // set coords
ycoord = yvalue;
}
FCOORD( // make from ICOORD
ICOORD icoord) { // coords to set
xcoord = icoord.xcoord;
ycoord = icoord.ycoord;
}
float x() const { // get coords
return xcoord;
}
float y() const {
return ycoord;
}
/// rewrite function
void set_x(float xin) {
xcoord = xin; // write new value
}
/// rewrite function
void set_y(float yin) { // value to set
ycoord = yin;
}
/// find sq length
float sqlength() const {
return xcoord * xcoord + ycoord * ycoord;
}
/// find length
float length() const {
return std::sqrt(sqlength());
}
/// sq dist between pts
float pt_to_pt_sqdist(const FCOORD &pt) const {
FCOORD gap;
gap.xcoord = xcoord - pt.xcoord;
gap.ycoord = ycoord - pt.ycoord;
return gap.sqlength();
}
/// Distance between pts
float pt_to_pt_dist(const FCOORD &pt) const {
return std::sqrt(pt_to_pt_sqdist(pt));
}
/// find angle
float angle() const {
return std::atan2(ycoord, xcoord);
}
// Returns the standard feature direction corresponding to this.
// See binary_angle_plus_pi below for a description of the direction.
uint8_t to_direction() const;
// Sets this with a unit vector in the given standard feature direction.
void from_direction(uint8_t direction);
// Converts an angle in radians (from ICOORD::angle or FCOORD::angle) to a
// standard feature direction as an unsigned angle in 256ths of a circle
// measured anticlockwise from (-1, 0).
static uint8_t binary_angle_plus_pi(double angle);
// Inverse of binary_angle_plus_pi returns an angle in radians for the
// given standard feature direction.
static double angle_from_direction(uint8_t direction);
// Returns the point on the given line nearest to this, ie the point such
// that the vector point->this is perpendicular to the line.
// The line is defined as a line_point and a dir_vector for its direction.
// dir_vector need not be a unit vector.
FCOORD nearest_pt_on_line(const FCOORD &line_point, const FCOORD &dir_vector) const;
/// Convert to unit vec
bool normalise();
/// test equality
bool operator==(const FCOORD &other) {
return xcoord == other.xcoord && ycoord == other.ycoord;
}
/// test inequality
bool operator!=(const FCOORD &other) {
return xcoord != other.xcoord || ycoord != other.ycoord;
}
/// rotate 90 deg anti
friend FCOORD operator!(const FCOORD &);
/// unary minus
friend FCOORD operator-(const FCOORD &);
/// add
friend FCOORD operator+(const FCOORD &, const FCOORD &);
/// add
friend FCOORD &operator+=(FCOORD &, const FCOORD &);
/// subtract
friend FCOORD operator-(const FCOORD &, const FCOORD &);
/// subtract
friend FCOORD &operator-=(FCOORD &, const FCOORD &);
/// scalar product
friend float operator%(const FCOORD &, const FCOORD &);
/// cross product
friend float operator*(const FCOORD &, const FCOORD &);
/// multiply
friend FCOORD operator*(const FCOORD &, float);
/// multiply
friend FCOORD operator*(float, const FCOORD &);
/// multiply
friend FCOORD &operator*=(FCOORD &, float);
/// divide
friend FCOORD operator/(const FCOORD &, float);
/// rotate
///@param vec by vector
void rotate(const FCOORD vec);
// unrotate - undo a rotate(vec)
// @param vec by vector
void unrotate(const FCOORD &vec);
/// divide
friend FCOORD &operator/=(FCOORD &, float);
private:
float xcoord; // 2 floating coords
float ycoord;
};
/**********************************************************************
* operator!
*
* Rotate an ICOORD 90 degrees anticlockwise.
**********************************************************************/
inline ICOORD operator!( // rotate 90 deg anti
const ICOORD &src // thing to rotate
) {
ICOORD result; // output
result.xcoord = -src.ycoord;
result.ycoord = src.xcoord;
return result;
}
/**********************************************************************
* operator-
*
* Unary minus of an ICOORD.
**********************************************************************/
inline ICOORD operator-( // unary minus
const ICOORD &src // thing to minus
) {
ICOORD result; // output
result.xcoord = -src.xcoord;
result.ycoord = -src.ycoord;
return result;
}
/**********************************************************************
* operator+
*
* Add 2 ICOORDS.
**********************************************************************/
inline ICOORD operator+( // sum vectors
const ICOORD &op1, // operands
const ICOORD &op2) {
ICOORD sum; // result
sum.xcoord = op1.xcoord + op2.xcoord;
sum.ycoord = op1.ycoord + op2.ycoord;
return sum;
}
/**********************************************************************
* operator+=
*
* Add 2 ICOORDS.
**********************************************************************/
inline ICOORD &operator+=( // sum vectors
ICOORD &op1, // operands
const ICOORD &op2) {
op1.xcoord += op2.xcoord;
op1.ycoord += op2.ycoord;
return op1;
}
/**********************************************************************
* operator-
*
* Subtract 2 ICOORDS.
**********************************************************************/
inline ICOORD operator-( // subtract vectors
const ICOORD &op1, // operands
const ICOORD &op2) {
ICOORD sum; // result
sum.xcoord = op1.xcoord - op2.xcoord;
sum.ycoord = op1.ycoord - op2.ycoord;
return sum;
}
/**********************************************************************
* operator-=
*
* Subtract 2 ICOORDS.
**********************************************************************/
inline ICOORD &operator-=( // subtract vectors
ICOORD &op1, // operands
const ICOORD &op2) {
op1.xcoord -= op2.xcoord;
op1.ycoord -= op2.ycoord;
return op1;
}
/**********************************************************************
* operator%
*
* Scalar product of 2 ICOORDS.
**********************************************************************/
inline int32_t operator%( // scalar product
const ICOORD &op1, // operands
const ICOORD &op2) {
return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
}
/**********************************************************************
* operator*
*
* Cross product of 2 ICOORDS.
**********************************************************************/
inline int32_t operator*( // cross product
const ICOORD &op1, // operands
const ICOORD &op2) {
return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
}
/**********************************************************************
* operator*
*
* Scalar multiply of an ICOORD.
**********************************************************************/
inline ICOORD operator*( // scalar multiply
const ICOORD &op1, // operands
int16_t scale) {
ICOORD result; // output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
inline ICOORD operator*( // scalar multiply
int16_t scale,
const ICOORD &op1 // operands
) {
ICOORD result; // output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
/**********************************************************************
* operator*=
*
* Scalar multiply of an ICOORD.
**********************************************************************/
inline ICOORD &operator*=( // scalar multiply
ICOORD &op1, // operands
int16_t scale) {
op1.xcoord *= scale;
op1.ycoord *= scale;
return op1;
}
/**********************************************************************
* operator/
*
* Scalar divide of an ICOORD.
**********************************************************************/
inline ICOORD operator/( // scalar divide
const ICOORD &op1, // operands
int16_t scale) {
ICOORD result; // output
result.xcoord = op1.xcoord / scale;
result.ycoord = op1.ycoord / scale;
return result;
}
/**********************************************************************
* operator/=
*
* Scalar divide of an ICOORD.
**********************************************************************/
inline ICOORD &operator/=( // scalar divide
ICOORD &op1, // operands
int16_t scale) {
op1.xcoord /= scale;
op1.ycoord /= scale;
return op1;
}
/**********************************************************************
* ICOORD::rotate
*
* Rotate an ICOORD by the given (normalized) (cos,sin) vector.
**********************************************************************/
inline void ICOORD::rotate( // rotate by vector
const FCOORD &vec) {
auto tmp = static_cast<int16_t>(std::floor(xcoord * vec.x() - ycoord * vec.y() + 0.5f));
ycoord = static_cast<int16_t>(std::floor(ycoord * vec.x() + xcoord * vec.y() + 0.5f));
xcoord = tmp;
}
/**********************************************************************
* operator!
*
* Rotate an FCOORD 90 degrees anticlockwise.
**********************************************************************/
inline FCOORD operator!( // rotate 90 deg anti
const FCOORD &src // thing to rotate
) {
FCOORD result; // output
result.xcoord = -src.ycoord;
result.ycoord = src.xcoord;
return result;
}
/**********************************************************************
* operator-
*
* Unary minus of an FCOORD.
**********************************************************************/
inline FCOORD operator-( // unary minus
const FCOORD &src // thing to minus
) {
FCOORD result; // output
result.xcoord = -src.xcoord;
result.ycoord = -src.ycoord;
return result;
}
/**********************************************************************
* operator+
*
* Add 2 FCOORDS.
**********************************************************************/
inline FCOORD operator+( // sum vectors
const FCOORD &op1, // operands
const FCOORD &op2) {
FCOORD sum; // result
sum.xcoord = op1.xcoord + op2.xcoord;
sum.ycoord = op1.ycoord + op2.ycoord;
return sum;
}
/**********************************************************************
* operator+=
*
* Add 2 FCOORDS.
**********************************************************************/
inline FCOORD &operator+=( // sum vectors
FCOORD &op1, // operands
const FCOORD &op2) {
op1.xcoord += op2.xcoord;
op1.ycoord += op2.ycoord;
return op1;
}
/**********************************************************************
* operator-
*
* Subtract 2 FCOORDS.
**********************************************************************/
inline FCOORD operator-( // subtract vectors
const FCOORD &op1, // operands
const FCOORD &op2) {
FCOORD sum; // result
sum.xcoord = op1.xcoord - op2.xcoord;
sum.ycoord = op1.ycoord - op2.ycoord;
return sum;
}
/**********************************************************************
* operator-=
*
* Subtract 2 FCOORDS.
**********************************************************************/
inline FCOORD &operator-=( // subtract vectors
FCOORD &op1, // operands
const FCOORD &op2) {
op1.xcoord -= op2.xcoord;
op1.ycoord -= op2.ycoord;
return op1;
}
/**********************************************************************
* operator%
*
* Scalar product of 2 FCOORDS.
**********************************************************************/
inline float operator%( // scalar product
const FCOORD &op1, // operands
const FCOORD &op2) {
return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
}
/**********************************************************************
* operator*
*
* Cross product of 2 FCOORDS.
**********************************************************************/
inline float operator*( // cross product
const FCOORD &op1, // operands
const FCOORD &op2) {
return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
}
/**********************************************************************
* operator*
*
* Scalar multiply of an FCOORD.
**********************************************************************/
inline FCOORD operator*( // scalar multiply
const FCOORD &op1, // operands
float scale) {
FCOORD result; // output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
inline FCOORD operator*( // scalar multiply
float scale,
const FCOORD &op1 // operands
) {
FCOORD result; // output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
/**********************************************************************
* operator*=
*
* Scalar multiply of an FCOORD.
**********************************************************************/
inline FCOORD &operator*=( // scalar multiply
FCOORD &op1, // operands
float scale) {
op1.xcoord *= scale;
op1.ycoord *= scale;
return op1;
}
/**********************************************************************
* operator/
*
* Scalar divide of an FCOORD.
**********************************************************************/
inline FCOORD operator/( // scalar divide
const FCOORD &op1, // operands
float scale) {
FCOORD result; // output
ASSERT_HOST(scale != 0.0f);
result.xcoord = op1.xcoord / scale;
result.ycoord = op1.ycoord / scale;
return result;
}
/**********************************************************************
* operator/=
*
* Scalar divide of an FCOORD.
**********************************************************************/
inline FCOORD &operator/=( // scalar divide
FCOORD &op1, // operands
float scale) {
ASSERT_HOST(scale != 0.0f);
op1.xcoord /= scale;
op1.ycoord /= scale;
return op1;
}
/**********************************************************************
* rotate
*
* Rotate an FCOORD by the given (normalized) (cos,sin) vector.
**********************************************************************/
inline void FCOORD::rotate( // rotate by vector
const FCOORD vec) {
float tmp;
tmp = xcoord * vec.x() - ycoord * vec.y();
ycoord = ycoord * vec.x() + xcoord * vec.y();
xcoord = tmp;
}
inline void FCOORD::unrotate(const FCOORD &vec) {
rotate(FCOORD(vec.x(), -vec.y()));
}
} // namespace tesseract
#endif

View File

@ -0,0 +1,571 @@
/**********************************************************************
* File: polyaprx.cpp (Formerly polygon.c)
* Description: Code for polygonal approximation from old edgeprog.
* Author: Ray Smith
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "polyaprx.h"
#include "blobs.h" // for EDGEPT, TPOINT, VECTOR, TESSLINE
#include "coutln.h" // for C_OUTLINE
#include "errcode.h" // for ASSERT_HOST
#include "mod128.h" // for DIR128
#include "params.h" // for BoolParam, BOOL_VAR
#include "points.h" // for ICOORD
#include "rect.h" // for TBOX
#include "tprintf.h" // for tprintf
#include <cstdint> // for INT16_MAX, int8_t
namespace tesseract {
#define FASTEDGELENGTH 256
static BOOL_VAR(poly_debug, false, "Debug old poly");
static BOOL_VAR(poly_wide_objects_better, true, "More accurate approx on wide things");
#define fixed_dist 20 // really an int_variable
#define approx_dist 15 // really an int_variable
const int par1 = 4500 / (approx_dist * approx_dist);
const int par2 = 6750 / (approx_dist * approx_dist);
/**********************************************************************
* tesspoly_outline
*
* Approximate an outline from chain codes form using the old tess algorithm.
* If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
* contain pointers to the input C_OUTLINEs that enable higher-resolution
* feature extraction that does not use the polygonal approximation.
**********************************************************************/
TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
EDGEPT *edgepts = stack_edgepts;
// Use heap memory if the stack buffer is not big enough.
if (c_outline->pathlength() > FASTEDGELENGTH) {
edgepts = new EDGEPT[c_outline->pathlength()];
}
// bounding box
const auto &loop_box = c_outline->bounding_box();
int32_t area = loop_box.height();
if (!poly_wide_objects_better && loop_box.width() > area) {
area = loop_box.width();
}
area *= area;
edgesteps_to_edgepts(c_outline, edgepts);
fix2(edgepts, area);
EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
EDGEPT *startpt = edgept;
EDGEPT *result = nullptr;
EDGEPT *prev_result = nullptr;
do {
auto *new_pt = new EDGEPT;
new_pt->pos = edgept->pos;
new_pt->prev = prev_result;
if (prev_result == nullptr) {
result = new_pt;
} else {
prev_result->next = new_pt;
new_pt->prev = prev_result;
}
if (allow_detailed_fx) {
new_pt->src_outline = edgept->src_outline;
new_pt->start_step = edgept->start_step;
new_pt->step_count = edgept->step_count;
}
prev_result = new_pt;
edgept = edgept->next;
} while (edgept != startpt);
prev_result->next = result;
result->prev = prev_result;
if (edgepts != stack_edgepts) {
delete[] edgepts;
}
return TESSLINE::BuildFromOutlineList(result);
}
/**********************************************************************
* edgesteps_to_edgepts
*
* Convert a C_OUTLINE to EDGEPTs.
**********************************************************************/
EDGEPT *edgesteps_to_edgepts( // convert outline
C_OUTLINE *c_outline, // input
EDGEPT edgepts[] // output is array
) {
int32_t length; // steps in path
ICOORD pos; // current coords
int32_t stepindex; // current step
int32_t stepinc; // increment
int32_t epindex; // current EDGEPT
ICOORD vec; // for this 8 step
ICOORD prev_vec;
int8_t epdir; // of this step
DIR128 prevdir; // previous dir
DIR128 dir; // of this step
pos = c_outline->start_pos(); // start of loop
length = c_outline->pathlength();
stepindex = 0;
epindex = 0;
prevdir = -1;
// repeated steps
uint32_t count = 0;
int prev_stepindex = 0;
do {
dir = c_outline->step_dir(stepindex);
vec = c_outline->step(stepindex);
if (stepindex < length - 1 && c_outline->step_dir(stepindex + 1) - dir == -32) {
dir += 128 - 16;
vec += c_outline->step(stepindex + 1);
stepinc = 2;
} else {
stepinc = 1;
}
if (count == 0) {
prevdir = dir;
prev_vec = vec;
}
if (prevdir.get_dir() != dir.get_dir()) {
edgepts[epindex].pos.x = pos.x();
edgepts[epindex].pos.y = pos.y();
prev_vec *= count;
edgepts[epindex].vec.x = prev_vec.x();
edgepts[epindex].vec.y = prev_vec.y();
pos += prev_vec;
edgepts[epindex].runlength = count;
edgepts[epindex].prev = &edgepts[epindex - 1];
// TODO: reset is_hidden, too?
edgepts[epindex].fixed = false;
edgepts[epindex].next = &edgepts[epindex + 1];
prevdir += 64;
epdir = DIR128(0) - prevdir;
epdir >>= 4;
epdir &= 7;
edgepts[epindex].dir = epdir;
edgepts[epindex].src_outline = c_outline;
edgepts[epindex].start_step = prev_stepindex;
edgepts[epindex].step_count = stepindex - prev_stepindex;
epindex++;
prevdir = dir;
prev_vec = vec;
count = 1;
prev_stepindex = stepindex;
} else {
count++;
}
stepindex += stepinc;
} while (stepindex < length);
edgepts[epindex].pos.x = pos.x();
edgepts[epindex].pos.y = pos.y();
prev_vec *= count;
edgepts[epindex].vec.x = prev_vec.x();
edgepts[epindex].vec.y = prev_vec.y();
pos += prev_vec;
edgepts[epindex].runlength = count;
// TODO: reset is_hidden, too?
edgepts[epindex].fixed = false;
edgepts[epindex].src_outline = c_outline;
edgepts[epindex].start_step = prev_stepindex;
edgepts[epindex].step_count = stepindex - prev_stepindex;
edgepts[epindex].prev = &edgepts[epindex - 1];
edgepts[epindex].next = &edgepts[0];
prevdir += 64;
epdir = DIR128(0) - prevdir;
epdir >>= 4;
epdir &= 7;
edgepts[epindex].dir = epdir;
edgepts[0].prev = &edgepts[epindex];
ASSERT_HOST(pos.x() == c_outline->start_pos().x() && pos.y() == c_outline->start_pos().y());
return &edgepts[0];
}
/**********************************************************************
*fix2(start,area) fixes points on the outline according to a trial method*
**********************************************************************/
void fix2( // polygonal approx
EDGEPT *start, /*loop to approimate */
int area) {
EDGEPT *edgept; /*current point */
EDGEPT *edgept1;
EDGEPT *loopstart; /*modified start of loop */
EDGEPT *linestart; /*start of line segment */
int stopped; /*completed flag */
int fixed_count; // no of fixed points
int8_t dir;
int d01, d12, d23, gapmin;
TPOINT d01vec, d12vec, d23vec;
EDGEPT *edgefix, *startfix;
EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
edgept = start; /*start of loop */
while (((edgept->dir - edgept->prev->dir + 1) & 7) < 3 &&
(dir = (edgept->prev->dir - edgept->next->dir) & 7) != 2 && dir != 6) {
edgept = edgept->next; /*find suitable start */
}
loopstart = edgept; /*remember start */
stopped = 0; /*not finished yet */
edgept->fixed = true; //fix it
do {
linestart = edgept; /*possible start of line */
auto dir1 = edgept->dir; //first direction
//length of dir1
auto sum1 = edgept->runlength;
edgept = edgept->next;
auto dir2 = edgept->dir; //2nd direction
//length in dir2
auto sum2 = edgept->runlength;
if (((dir1 - dir2 + 1) & 7) < 3) {
while (edgept->prev->dir == edgept->next->dir) {
edgept = edgept->next; /*look at next */
if (edgept->dir == dir1) {
/*sum lengths */
sum1 += edgept->runlength;
} else {
sum2 += edgept->runlength;
}
}
if (edgept == loopstart) {
stopped = 1; /*finished */
}
if (sum2 + sum1 > 2 && linestart->prev->dir == dir2 &&
(linestart->prev->runlength > linestart->runlength || sum2 > sum1)) {
/*start is back one */
linestart = linestart->prev;
linestart->fixed = true;
}
if (((edgept->next->dir - edgept->dir + 1) & 7) >= 3 ||
(edgept->dir == dir1 && sum1 >= sum2) ||
((edgept->prev->runlength < edgept->runlength ||
(edgept->dir == dir2 && sum2 >= sum1)) &&
linestart->next != edgept)) {
edgept = edgept->next;
}
}
/*sharp bend */
edgept->fixed = true;
}
/*do whole loop */
while (edgept != loopstart && !stopped);
edgept = start;
do {
if (((edgept->runlength >= 8) && (edgept->dir != 2) &&
(edgept->dir != 6)) ||
((edgept->runlength >= 8) &&
((edgept->dir == 2) || (edgept->dir == 6)))) {
edgept->fixed = true;
edgept1 = edgept->next;
edgept1->fixed = true;
}
edgept = edgept->next;
} while (edgept != start);
edgept = start;
do {
/*single fixed step */
if (edgept->fixed &&
edgept->runlength == 1
/*and neighbours free */
&& edgept->next->fixed &&
!edgept->prev->fixed
/*same pair of dirs */
&& !edgept->next->next->fixed &&
edgept->prev->dir == edgept->next->dir &&
edgept->prev->prev->dir == edgept->next->next->dir &&
((edgept->prev->dir - edgept->dir + 1) & 7) < 3) {
// unfix it
edgept->fixed = false;
edgept->next->fixed = false;
}
edgept = edgept->next; /*do all points */
} while (edgept != start); /*until finished */
stopped = 0;
if (area < 450) {
area = 450;
}
gapmin = area * fixed_dist * fixed_dist / 44000;
edgept = start;
fixed_count = 0;
do {
if (edgept->fixed) {
fixed_count++;
}
edgept = edgept->next;
} while (edgept != start);
while (!edgept->fixed) {
edgept = edgept->next;
}
edgefix0 = edgept;
edgept = edgept->next;
while (!edgept->fixed) {
edgept = edgept->next;
}
edgefix1 = edgept;
edgept = edgept->next;
while (!edgept->fixed) {
edgept = edgept->next;
}
edgefix2 = edgept;
edgept = edgept->next;
while (!edgept->fixed) {
edgept = edgept->next;
}
edgefix3 = edgept;
startfix = edgefix2;
do {
if (fixed_count <= 3) {
break; // already too few
}
d12vec.diff(edgefix1->pos, edgefix2->pos);
d12 = d12vec.length();
// TODO(rays) investigate this change:
// Only unfix a point if it is part of a low-curvature section
// of outline and the total angle change of the outlines is
// less than 90 degrees, ie the scalar product is positive.
// if (d12 <= gapmin && edgefix0->vec.dot(edgefix2->vec) > 0) {
if (d12 <= gapmin) {
d01vec.diff(edgefix0->pos, edgefix1->pos);
d01 = d01vec.length();
d23vec.diff(edgefix2->pos, edgefix3->pos);
d23 = d23vec.length();
if (d01 > d23) {
edgefix2->fixed = false;
fixed_count--;
} else {
edgefix1->fixed = false;
fixed_count--;
edgefix1 = edgefix2;
}
} else {
edgefix0 = edgefix1;
edgefix1 = edgefix2;
}
edgefix2 = edgefix3;
edgept = edgept->next;
while (!edgept->fixed) {
if (edgept == startfix) {
stopped = 1;
}
edgept = edgept->next;
}
edgefix3 = edgept;
edgefix = edgefix2;
} while ((edgefix != startfix) && (!stopped));
}
/**********************************************************************
*poly2(startpt,area,path) applies a second approximation to the outline
*using the points which have been fixed by the first approximation*
**********************************************************************/
EDGEPT *poly2( // second poly
EDGEPT *startpt, /*start of loop */
int area /*area of blob box */
) {
EDGEPT *edgept; /*current outline point */
EDGEPT *loopstart; /*starting point */
EDGEPT *linestart; /*start of line */
int edgesum; /*correction count */
if (area < 1200) {
area = 1200; /*minimum value */
}
loopstart = nullptr; /*not found it yet */
edgept = startpt; /*start of loop */
do {
// current point fixed and next not
if (edgept->fixed && !edgept->next->fixed) {
loopstart = edgept; /*start of repoly */
break;
}
edgept = edgept->next; /*next point */
} while (edgept != startpt); /*until found or finished */
if (loopstart == nullptr && !startpt->fixed) {
/*fixed start of loop */
startpt->fixed = true;
loopstart = startpt; /*or start of loop */
}
if (loopstart) {
do {
edgept = loopstart; /*first to do */
do {
linestart = edgept;
edgesum = 0; /*sum of lengths */
do {
/*sum lengths */
edgesum += edgept->runlength;
edgept = edgept->next; /*move on */
} while (!edgept->fixed && edgept != loopstart && edgesum < 126);
if (poly_debug) {
tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", linestart->pos.x,
linestart->pos.y, linestart->dir, linestart->vec.x, linestart->vec.y,
edgesum, edgept->pos.x, edgept->pos.y);
}
/*reapproximate */
cutline(linestart, edgept, area);
while (edgept->next->fixed && edgept != loopstart) {
edgept = edgept->next; /*look for next non-fixed */
}
}
/*do all the loop */
while (edgept != loopstart);
edgesum = 0;
do {
if (edgept->fixed) {
edgesum++;
}
edgept = edgept->next;
}
// count fixed pts
while (edgept != loopstart);
if (edgesum < 3) {
area /= 2; // must have 3 pts
}
} while (edgesum < 3);
do {
linestart = edgept;
do {
edgept = edgept->next;
} while (!edgept->fixed);
linestart->next = edgept;
edgept->prev = linestart;
linestart->vec.x = edgept->pos.x - linestart->pos.x;
linestart->vec.y = edgept->pos.y - linestart->pos.y;
} while (edgept != loopstart);
} else {
edgept = startpt; /*start of loop */
}
loopstart = edgept; /*new start */
return loopstart; /*correct exit */
}
/**********************************************************************
*cutline(first,last,area) straightens out a line by partitioning
*and joining the ends by a straight line*
**********************************************************************/
void cutline( // recursive refine
EDGEPT *first, /*ends of line */
EDGEPT *last, int area /*area of object */
) {
EDGEPT *edge; /*current edge */
TPOINT vecsum; /*vector sum */
int vlen; /*approx length of vecsum */
TPOINT vec; /*accumulated vector */
EDGEPT *maxpoint; /*worst point */
int maxperp; /*max deviation */
int perp; /*perp distance */
int ptcount; /*no of points */
int squaresum; /*sum of perps */
edge = first; /*start of line */
if (edge->next == last) {
return; /*simple line */
}
/*vector sum */
vecsum.x = last->pos.x - edge->pos.x;
vecsum.y = last->pos.y - edge->pos.y;
if (vecsum.x == 0 && vecsum.y == 0) {
/*special case */
vecsum.x = -edge->prev->vec.x;
vecsum.y = -edge->prev->vec.y;
}
/*absolute value */
vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
if (vecsum.y > vlen) {
vlen = vecsum.y; /*maximum */
} else if (-vecsum.y > vlen) {
vlen = -vecsum.y; /*absolute value */
}
vec.x = edge->vec.x; /*accumulated vector */
vec.y = edge->vec.y;
maxperp = 0; /*none yet */
squaresum = ptcount = 0;
edge = edge->next; /*move to actual point */
maxpoint = edge; /*in case there isn't one */
do {
perp = vec.cross(vecsum); // get perp distance
if (perp != 0) {
perp *= perp; /*squared deviation */
}
squaresum += perp; /*sum squares */
ptcount++; /*count points */
if (poly_debug) {
tprintf("Cutline:Final perp=%d\n", perp);
}
if (perp > maxperp) {
maxperp = perp;
maxpoint = edge; /*find greatest deviation */
}
vec.x += edge->vec.x; /*accumulate vectors */
vec.y += edge->vec.y;
edge = edge->next;
} while (edge != last); /*test all line */
perp = vecsum.length();
ASSERT_HOST(perp != 0);
if (maxperp < 256 * INT16_MAX) {
maxperp <<= 8;
maxperp /= perp; /*true max perp */
} else {
maxperp /= perp;
maxperp <<= 8; /*avoid overflow */
}
if (squaresum < 256 * INT16_MAX) {
/*mean squared perp */
perp = (squaresum << 8) / (perp * ptcount);
} else {
/*avoid overflow */
perp = (squaresum / perp << 8) / ptcount;
}
if (poly_debug) {
tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area, maxperp / 256.0,
maxperp * 200.0 / area, perp / 256.0, perp * 300.0 / area);
}
if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
maxpoint->fixed = true;
/*partitions */
cutline(first, maxpoint, area);
cutline(maxpoint, last, area);
}
}
} // namespace tesseract

View File

@ -0,0 +1,49 @@
/**********************************************************************
* File: polyaprx.h (Formerly polygon.h)
* Description: Code for polygonal approximation from old edgeprog.
* Author: Ray Smith
* Created: Thu Nov 25 11:42:04 GMT 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POLYAPRX_H
#define POLYAPRX_H
namespace tesseract {
class C_OUTLINE;
struct EDGEPT;
struct TESSLINE;
// convert a chain-coded input to the old OUTLINE approximation
TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline);
EDGEPT *edgesteps_to_edgepts( // convert outline
C_OUTLINE *c_outline, // input
EDGEPT edgepts[] // output is array
);
void fix2( // polygonal approx
EDGEPT *start, /*loop to approimate */
int area);
EDGEPT *poly2( // second poly
EDGEPT *startpt, /*start of loop */
int area /*area of blob box */
);
void cutline( // recursive refine
EDGEPT *first, /*ends of line */
EDGEPT *last, int area /*area of object */
);
} // namespace tesseract
#endif

View File

@ -0,0 +1,422 @@
/**********************************************************************
* File: polyblk.cpp (Formerly poly_block.c)
* Description: Polygonal blocks
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "polyblk.h"
#include "elst.h"
#include <cctype>
#include <cinttypes> // PRId32
#include <cmath>
#include <cstdio>
#include <memory> // std::unique_ptr
namespace tesseract {
#define INTERSECTING INT16_MAX
int lessthan(const void *first, const void *second);
POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) {
ICOORDELT_IT v = &vertices;
vertices.clear();
v.move_to_first();
v.add_list_before(points);
compute_bb();
type = t;
}
// Initialize from box coordinates.
POLY_BLOCK::POLY_BLOCK(const TBOX &tbox, PolyBlockType t) {
vertices.clear();
ICOORDELT_IT v = &vertices;
v.move_to_first();
v.add_to_end(new ICOORDELT(tbox.left(), tbox.top()));
v.add_to_end(new ICOORDELT(tbox.left(), tbox.bottom()));
v.add_to_end(new ICOORDELT(tbox.right(), tbox.bottom()));
v.add_to_end(new ICOORDELT(tbox.right(), tbox.top()));
compute_bb();
type = t;
}
/**
* @name POLY_BLOCK::compute_bb
*
* Compute the bounding box from the outline points.
*/
void POLY_BLOCK::compute_bb() { // constructor
ICOORD ibl, itr; // integer bb
ICOORD botleft; // bounding box
ICOORD topright;
ICOORD pos; // current pos;
ICOORDELT_IT pts = &vertices; // iterator
botleft = *pts.data();
topright = botleft;
do {
pos = *pts.data();
if (pos.x() < botleft.x()) {
// get bounding box
botleft = ICOORD(pos.x(), botleft.y());
}
if (pos.y() < botleft.y()) {
botleft = ICOORD(botleft.x(), pos.y());
}
if (pos.x() > topright.x()) {
topright = ICOORD(pos.x(), topright.y());
}
if (pos.y() > topright.y()) {
topright = ICOORD(topright.x(), pos.y());
}
pts.forward();
} while (!pts.at_first());
ibl = ICOORD(botleft.x(), botleft.y());
itr = ICOORD(topright.x(), topright.y());
box = TBOX(ibl, itr);
}
/**
* @name POLY_BLOCK::winding_number
*
* Return the winding number of the outline around the given point.
* @param point point to wind around
*/
int16_t POLY_BLOCK::winding_number(const ICOORD &point) {
int16_t count; // winding count
ICOORD pt; // current point
ICOORD vec; // point to current point
ICOORD vvec; // current point to next point
int32_t cross; // cross product
ICOORDELT_IT it = &vertices; // iterator
count = 0;
do {
pt = *it.data();
vec = pt - point;
vvec = *it.data_relative(1) - pt;
// crossing the line
if (vec.y() <= 0 && vec.y() + vvec.y() > 0) {
cross = vec * vvec; // cross product
if (cross > 0) {
count++; // crossing right half
} else if (cross == 0) {
return INTERSECTING; // going through point
}
} else if (vec.y() > 0 && vec.y() + vvec.y() <= 0) {
cross = vec * vvec;
if (cross < 0) {
count--; // crossing back
} else if (cross == 0) {
return INTERSECTING; // illegal
}
} else if (vec.y() == 0 && vec.x() == 0) {
return INTERSECTING;
}
it.forward();
} while (!it.at_first());
return count; // winding number
}
/// @return true if other is inside this.
bool POLY_BLOCK::contains(POLY_BLOCK *other) {
int16_t count; // winding count
ICOORDELT_IT it = &vertices; // iterator
ICOORD vertex;
if (!box.overlap(*(other->bounding_box()))) {
return false; // can't be contained
}
/* check that no vertex of this is inside other */
do {
vertex = *it.data();
// get winding number
count = other->winding_number(vertex);
if (count != INTERSECTING) {
if (count != 0) {
return false;
}
}
it.forward();
} while (!it.at_first());
/* check that all vertices of other are inside this */
// switch lists
it.set_to_list(other->points());
do {
vertex = *it.data();
// try other way round
count = winding_number(vertex);
if (count != INTERSECTING) {
if (count == 0) {
return false;
}
}
it.forward();
} while (!it.at_first());
return true;
}
/**
* @name POLY_BLOCK::rotate
*
* Rotate the POLY_BLOCK.
* @param rotation cos, sin of angle
*/
void POLY_BLOCK::rotate(FCOORD rotation) {
FCOORD pos; // current pos;
ICOORDELT *pt; // current point
ICOORDELT_IT pts = &vertices; // iterator
do {
pt = pts.data();
pos.set_x(pt->x());
pos.set_y(pt->y());
pos.rotate(rotation);
pt->set_x(static_cast<int16_t>(floor(pos.x() + 0.5)));
pt->set_y(static_cast<int16_t>(floor(pos.y() + 0.5)));
pts.forward();
} while (!pts.at_first());
compute_bb();
}
/**
* @name POLY_BLOCK::reflect_in_y_axis
*
* Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)
*/
void POLY_BLOCK::reflect_in_y_axis() {
ICOORDELT *pt; // current point
ICOORDELT_IT pts = &vertices; // Iterator.
do {
pt = pts.data();
pt->set_x(-pt->x());
pts.forward();
} while (!pts.at_first());
compute_bb();
}
/**
* POLY_BLOCK::move
*
* Move the POLY_BLOCK.
* @param shift x,y translation vector
*/
void POLY_BLOCK::move(ICOORD shift) {
ICOORDELT *pt; // current point
ICOORDELT_IT pts = &vertices; // iterator
do {
pt = pts.data();
*pt += shift;
pts.forward();
} while (!pts.at_first());
compute_bb();
}
#ifndef GRAPHICS_DISABLED
void POLY_BLOCK::plot(ScrollView *window, int32_t num) {
ICOORDELT_IT v = &vertices;
window->Pen(ColorForPolyBlockType(type));
v.move_to_first();
if (num > 0) {
window->TextAttributes("Times", 80, false, false, false);
char temp_buff[34];
# if !defined(_WIN32) || defined(__MINGW32__)
snprintf(temp_buff, sizeof(temp_buff), "%" PRId32, num);
# else
_ltoa(num, temp_buff, 10);
# endif
window->Text(v.data()->x(), v.data()->y(), temp_buff);
}
window->SetCursor(v.data()->x(), v.data()->y());
for (v.mark_cycle_pt(); !v.cycled_list(); v.forward()) {
window->DrawTo(v.data()->x(), v.data()->y());
}
v.move_to_first();
window->DrawTo(v.data()->x(), v.data()->y());
}
void POLY_BLOCK::fill(ScrollView *window, ScrollView::Color colour) {
int16_t y;
int16_t width;
PB_LINE_IT *lines;
ICOORDELT_IT s_it;
lines = new PB_LINE_IT(this);
window->Pen(colour);
for (y = this->bounding_box()->bottom(); y <= this->bounding_box()->top(); y++) {
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y));
if (!segments->empty()) {
s_it.set_to_list(segments.get());
for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) {
// Note different use of ICOORDELT, x coord is x coord of pixel
// at the start of line segment, y coord is length of line segment
// Last pixel is start pixel + length.
width = s_it.data()->y();
window->SetCursor(s_it.data()->x(), y);
window->DrawTo(s_it.data()->x() + static_cast<float>(width), y);
}
}
}
delete lines;
}
#endif
/// @return true if the polygons of other and this overlap.
bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
int16_t count; // winding count
ICOORDELT_IT it = &vertices; // iterator
ICOORD vertex;
if (!box.overlap(*(other->bounding_box()))) {
return false; // can't be any overlap.
}
/* see if a vertex of this is inside other */
do {
vertex = *it.data();
// get winding number
count = other->winding_number(vertex);
if (count != INTERSECTING) {
if (count != 0) {
return true;
}
}
it.forward();
} while (!it.at_first());
/* see if a vertex of other is inside this */
// switch lists
it.set_to_list(other->points());
do {
vertex = *it.data();
// try other way round
count = winding_number(vertex);
if (count != INTERSECTING) {
if (count != 0) {
return true;
}
}
it.forward();
} while (!it.at_first());
return false;
}
ICOORDELT_LIST *PB_LINE_IT::get_line(int16_t y) {
ICOORDELT_IT v, r;
ICOORDELT_LIST *result;
ICOORDELT *x, *current, *previous;
float fy = y + 0.5f;
result = new ICOORDELT_LIST();
r.set_to_list(result);
v.set_to_list(block->points());
for (v.mark_cycle_pt(); !v.cycled_list(); v.forward()) {
if (((v.data_relative(-1)->y() > y) && (v.data()->y() <= y)) ||
((v.data_relative(-1)->y() <= y) && (v.data()->y() > y))) {
previous = v.data_relative(-1);
current = v.data();
float fx =
0.5f + previous->x() +
(current->x() - previous->x()) * (fy - previous->y()) / (current->y() - previous->y());
x = new ICOORDELT(static_cast<int16_t>(fx), 0);
r.add_to_end(x);
}
}
if (!r.empty()) {
r.sort(lessthan);
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
x = r.data();
}
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
r.data()->set_y(r.data_relative(1)->x() - r.data()->x());
r.forward();
delete (r.extract());
}
}
return result;
}
int lessthan(const void *first, const void *second) {
const ICOORDELT *p1 = *reinterpret_cast<const ICOORDELT *const *>(first);
const ICOORDELT *p2 = *reinterpret_cast<const ICOORDELT *const *>(second);
if (p1->x() < p2->x()) {
return (-1);
} else if (p1->x() > p2->x()) {
return (1);
} else {
return (0);
}
}
#ifndef GRAPHICS_DISABLED
/// Returns a color to draw the given type.
ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) {
// Keep kPBColors in sync with PolyBlockType.
const ScrollView::Color kPBColors[PT_COUNT] = {
ScrollView::WHITE, // Type is not yet known. Keep as the 1st element.
ScrollView::BLUE, // Text that lives inside a column.
ScrollView::CYAN, // Text that spans more than one column.
ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out
// region.
ScrollView::AQUAMARINE, // Partition belonging to an equation region.
ScrollView::SKY_BLUE, // Partition belonging to an inline equation
// region.
ScrollView::MAGENTA, // Partition belonging to a table region.
ScrollView::GREEN, // Text-line runs vertically.
ScrollView::LIGHT_BLUE, // Text that belongs to an image.
ScrollView::RED, // Image that lives inside a column.
ScrollView::YELLOW, // Image that spans more than one column.
ScrollView::ORANGE, // Image in a cross-column pull-out region.
ScrollView::BROWN, // Horizontal Line.
ScrollView::DARK_GREEN, // Vertical Line.
ScrollView::GREY // Lies outside of any column.
};
if (type < PT_COUNT) {
return kPBColors[type];
}
return ScrollView::WHITE;
}
#endif // !GRAPHICS_DISABLED
} // namespace tesseract

View File

@ -0,0 +1,117 @@
/**********************************************************************
* File: polyblk.h (Formerly poly_block.h)
* Description: Polygonal blocks
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POLYBLK_H
#define POLYBLK_H
#include "elst.h"
#include "points.h"
#include "rect.h"
#include "scrollview.h"
#include <tesseract/publictypes.h>
namespace tesseract {
class TESS_API POLY_BLOCK {
public:
POLY_BLOCK() = default;
// Initialize from box coordinates.
POLY_BLOCK(const TBOX &tbox, PolyBlockType type);
POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type);
~POLY_BLOCK() = default;
TBOX *bounding_box() { // access function
return &box;
}
ICOORDELT_LIST *points() { // access function
return &vertices;
}
void compute_bb();
PolyBlockType isA() const {
return type;
}
bool IsText() const {
return PTIsTextType(type);
}
// Rotate about the origin by the given rotation. (Analogous to
// multiplying by a complex number.
void rotate(FCOORD rotation);
// Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)
void reflect_in_y_axis();
// Move by adding shift to all coordinates.
void move(ICOORD shift);
#ifndef GRAPHICS_DISABLED
void plot(ScrollView *window, int32_t num);
void fill(ScrollView *window, ScrollView::Color colour);
#endif // !GRAPHICS_DISABLED
// Returns true if other is inside this.
bool contains(POLY_BLOCK *other);
// Returns true if the polygons of other and this overlap.
bool overlap(POLY_BLOCK *other);
// Returns the winding number of this around the test_pt.
// Positive for anticlockwise, negative for clockwise, and zero for
// test_pt outside this.
int16_t winding_number(const ICOORD &test_pt);
#ifndef GRAPHICS_DISABLED
// Static utility functions to handle the PolyBlockType.
// Returns a color to draw the given type.
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type);
#endif // !GRAPHICS_DISABLED
private:
ICOORDELT_LIST vertices; // vertices
TBOX box; // bounding box
PolyBlockType type; // Type of this region.
};
// Class to iterate the scanlines of a polygon.
class PB_LINE_IT {
public:
PB_LINE_IT(POLY_BLOCK *blkptr) {
block = blkptr;
}
void set_to_block(POLY_BLOCK *blkptr) {
block = blkptr;
}
// Returns a list of runs of pixels for the given y coord.
// Each element of the returned list is the start (x) and extent(y) of
// a run inside the region.
// Delete the returned list after use.
ICOORDELT_LIST *get_line(int16_t y);
private:
POLY_BLOCK *block;
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,144 @@
/**********************************************************************
* File: quadlsq.cpp (Formerly qlsq.c)
* Description: Code for least squares approximation of quadratics.
* Author: Ray Smith
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "quadlsq.h"
#include "tprintf.h"
#include <cmath>
#include <cstdio>
namespace tesseract {
// Minimum variance in least squares before backing off to a lower degree.
const long double kMinVariance = 1.0L / 1024;
/**********************************************************************
* QLSQ::clear
*
* Function to initialize a QLSQ.
**********************************************************************/
void QLSQ::clear() { // initialize
a = 0.0;
b = 0.0;
c = 0.0;
n = 0; // No elements.
sigx = 0.0; // Zero accumulators.
sigy = 0.0;
sigxx = 0.0;
sigxy = 0.0;
sigyy = 0.0;
sigxxx = 0.0;
sigxxy = 0.0;
sigxxxx = 0.0;
}
/**********************************************************************
* QLSQ::add
*
* Add an element to the accumulator.
**********************************************************************/
void QLSQ::add(double x, double y) {
n++; // Count elements.
sigx += x; // Update accumulators.
sigy += y;
sigxx += x * x;
sigxy += x * y;
sigyy += y * y;
sigxxx += static_cast<long double>(x) * x * x;
sigxxy += static_cast<long double>(x) * x * y;
sigxxxx += static_cast<long double>(x) * x * x * x;
}
/**********************************************************************
* QLSQ::remove
*
* Delete an element from the accumulator.
**********************************************************************/
void QLSQ::remove(double x, double y) {
if (n <= 0) {
tprintf("Can't remove an element from an empty QLSQ accumulator!\n");
return;
}
n--; // Count elements.
sigx -= x; // Update accumulators.
sigy -= y;
sigxx -= x * x;
sigxy -= x * y;
sigyy -= y * y;
sigxxx -= static_cast<long double>(x) * x * x;
sigxxy -= static_cast<long double>(x) * x * y;
sigxxxx -= static_cast<long double>(x) * x * x * x;
}
/**********************************************************************
* QLSQ::fit
*
* Fit the given degree of polynomial and store the result.
* This creates a quadratic of the form axx + bx + c, but limited to
* the given degree.
**********************************************************************/
void QLSQ::fit(int degree) {
long double x_variance =
static_cast<long double>(sigxx) * n - static_cast<long double>(sigx) * sigx;
// Note: for computational efficiency, we do not normalize the variance,
// covariance and cube variance here as they are in the same order in both
// nominators and denominators. However, we need be careful in value range
// check.
if (x_variance < kMinVariance * n * n || degree < 1 || n < 2) {
// We cannot calculate b reliably so forget a and b, and just work on c.
a = b = 0.0;
if (n >= 1 && degree >= 0) {
c = sigy / n;
} else {
c = 0.0;
}
return;
}
long double top96 = 0.0; // Accurate top.
long double bottom96 = 0.0; // Accurate bottom.
long double cubevar = sigxxx * n - static_cast<long double>(sigxx) * sigx;
long double covariance =
static_cast<long double>(sigxy) * n - static_cast<long double>(sigx) * sigy;
if (n >= 4 && degree >= 2) {
top96 = cubevar * covariance;
top96 += x_variance * (static_cast<long double>(sigxx) * sigy - sigxxy * n);
bottom96 = cubevar * cubevar;
bottom96 -= x_variance * (sigxxxx * n - static_cast<long double>(sigxx) * sigxx);
}
if (bottom96 >= kMinVariance * n * n * n * n) {
// Denominators looking good
a = top96 / bottom96;
top96 = covariance - cubevar * a;
b = top96 / x_variance;
} else {
// Forget a, and concentrate on b.
a = 0.0;
b = covariance / x_variance;
}
c = (sigy - a * sigxx - b * sigx) / n;
}
} // namespace tesseract

View File

@ -0,0 +1,70 @@
/**********************************************************************
* File: quadlsq.h (Formerly qlsq.h)
* Description: Code for least squares approximation of quadratics.
* Author: Ray Smith
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef QUADLSQ_H
#define QUADLSQ_H
#include "points.h"
namespace tesseract {
class QLSQ {
public:
QLSQ() { // constructor
clear(); // set to zeros
}
void clear(); // initialize
void add( // add element
double x, // coords to add
double y);
void remove( // delete element
double x, // coords to delete
double y);
int32_t count() { // no of elements
return n;
}
void fit( // fit the given
int degree); // return actual
double get_a() const { // get x squard
return a;
}
double get_b() const { // get x squard
return b;
}
double get_c() const { // get x squard
return c;
}
private:
int32_t n; // no of elements
double a, b, c; // result
double sigx; // sum of x
double sigy; // sum of y
double sigxx; // sum x squared
double sigxy; // sum of xy
double sigyy; // sum y squared
long double sigxxx; // sum x cubed
long double sigxxy; // sum xsquared y
long double sigxxxx; // sum x fourth
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,65 @@
/**********************************************************************
* File: quadratc.h (Formerly quadrtic.h)
* Description: Code for the QUAD_COEFFS class.
* Author: Ray Smith
* Created: Tue Oct 08 17:24:40 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef QUADRATC_H
#define QUADRATC_H
#include "points.h"
namespace tesseract {
class QUAD_COEFFS {
public:
QUAD_COEFFS() = default;
QUAD_COEFFS( // constructor
double xsq, // coefficients
float x, float constant) {
a = xsq;
b = x;
c = constant;
}
float y( // evaluate
float x) const { // at x
return static_cast<float>((a * x + b) * x + c);
}
void move( // reposition word
ICOORD vec) { // by vector
/************************************************************
y - q = a (x - p)^2 + b (x - p) + c
y - q = ax^2 - 2apx + ap^2 + bx - bp + c
y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q)
************************************************************/
int16_t p = vec.x();
int16_t q = vec.y();
c = static_cast<float>(c - b * p + a * p * p + q);
b = static_cast<float>(b - 2 * a * p);
}
double a; // x squared
float b; // x
float c; // constant
private:
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,402 @@
/**********************************************************************
* File: quspline.cpp (Formerly qspline.c)
* Description: Code for the QSPLINE class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "quspline.h"
#include "points.h" // for ICOORD
#include "quadlsq.h" // for QLSQ
#include "quadratc.h" // for QUAD_COEFFS
#include <allheaders.h> // for pixRenderPolyline, pixGetDepth, pixGetHeight
#include "pix.h" // for L_CLEAR_PIXELS, L_SET_PIXELS, Pix (ptr only)
namespace tesseract {
#define QSPLINE_PRECISION 16 // no of steps to draw
/**********************************************************************
* QSPLINE::QSPLINE
*
* Constructor to build a QSPLINE given the components used in the old code.
**********************************************************************/
QSPLINE::QSPLINE( // constructor
int32_t count, // no of segments
int32_t *xstarts, // start coords
double *coeffs // coefficients
) {
int32_t index; // segment index
// get memory
xcoords = new int32_t[count + 1];
quadratics = new QUAD_COEFFS[count];
segments = count;
for (index = 0; index < segments; index++) {
// copy them
xcoords[index] = xstarts[index];
quadratics[index] =
QUAD_COEFFS(coeffs[index * 3], coeffs[index * 3 + 1], coeffs[index * 3 + 2]);
}
// right edge
xcoords[index] = xstarts[index];
}
/**********************************************************************
* QSPLINE::QSPLINE
*
* Constructor to build a QSPLINE by appproximation of points.
**********************************************************************/
QSPLINE::QSPLINE( // constructor
int xstarts[], // spline boundaries
int segcount, // no of segments
int xpts[], // points to fit
int ypts[], int pointcount, // no of pts
int degree // fit required
) {
int pointindex; /*no along text line */
int segment; /*segment no */
int32_t *ptcounts; // no in each segment
QLSQ qlsq; /*accumulator */
segments = segcount;
xcoords = new int32_t[segcount + 1];
ptcounts = new int32_t[segcount + 1];
quadratics = new QUAD_COEFFS[segcount];
memmove(xcoords, xstarts, (segcount + 1) * sizeof(int32_t));
ptcounts[0] = 0; /*none in any yet */
for (segment = 0, pointindex = 0; pointindex < pointcount; pointindex++) {
while (segment < segcount && xpts[pointindex] >= xstarts[segment]) {
segment++; /*try next segment */
/*cumulative counts */
ptcounts[segment] = ptcounts[segment - 1];
}
ptcounts[segment]++; /*no in previous partition */
}
while (segment < segcount) {
segment++;
/*zero the rest */
ptcounts[segment] = ptcounts[segment - 1];
}
for (segment = 0; segment < segcount; segment++) {
qlsq.clear();
/*first blob */
pointindex = ptcounts[segment];
if (pointindex > 0 && xpts[pointindex] != xpts[pointindex - 1] &&
xpts[pointindex] != xstarts[segment]) {
qlsq.add(xstarts[segment],
ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
(xstarts[segment] - xpts[pointindex - 1]) /
(xpts[pointindex] - xpts[pointindex - 1]));
}
for (; pointindex < ptcounts[segment + 1]; pointindex++) {
qlsq.add(xpts[pointindex], ypts[pointindex]);
}
if (pointindex > 0 && pointindex < pointcount && xpts[pointindex] != xstarts[segment + 1]) {
qlsq.add(xstarts[segment + 1],
ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
(xstarts[segment + 1] - xpts[pointindex - 1]) /
(xpts[pointindex] - xpts[pointindex - 1]));
}
qlsq.fit(degree);
quadratics[segment].a = qlsq.get_a();
quadratics[segment].b = qlsq.get_b();
quadratics[segment].c = qlsq.get_c();
}
delete[] ptcounts;
}
/**********************************************************************
* QSPLINE::QSPLINE
*
* Constructor to build a QSPLINE from another.
**********************************************************************/
QSPLINE::QSPLINE( // constructor
const QSPLINE &src) {
segments = 0;
xcoords = nullptr;
quadratics = nullptr;
*this = src;
}
/**********************************************************************
* QSPLINE::~QSPLINE
*
* Destroy a QSPLINE.
**********************************************************************/
QSPLINE::~QSPLINE() {
delete[] xcoords;
delete[] quadratics;
}
/**********************************************************************
* QSPLINE::operator=
*
* Copy a QSPLINE
**********************************************************************/
QSPLINE &QSPLINE::operator=( // assignment
const QSPLINE &source) {
delete[] xcoords;
delete[] quadratics;
segments = source.segments;
xcoords = new int32_t[segments + 1];
quadratics = new QUAD_COEFFS[segments];
memmove(xcoords, source.xcoords, (segments + 1) * sizeof(int32_t));
memmove(quadratics, source.quadratics, segments * sizeof(QUAD_COEFFS));
return *this;
}
/**********************************************************************
* QSPLINE::step
*
* Return the total of the step functions between the given coords.
**********************************************************************/
double QSPLINE::step( // find step functions
double x1, // between coords
double x2) {
int index1, index2; // indices of coords
double total; /*total steps */
index1 = spline_index(x1);
index2 = spline_index(x2);
total = 0;
while (index1 < index2) {
total += static_cast<double>(quadratics[index1 + 1].y(static_cast<float>(xcoords[index1 + 1])));
total -= static_cast<double>(quadratics[index1].y(static_cast<float>(xcoords[index1 + 1])));
index1++; /*next segment */
}
return total; /*total steps */
}
/**********************************************************************
* QSPLINE::y
*
* Return the y value at the given x value.
**********************************************************************/
double QSPLINE::y( // evaluate
double x // coord to evaluate at
) const {
int32_t index; // segment index
index = spline_index(x);
return quadratics[index].y(x); // in correct segment
}
/**********************************************************************
* QSPLINE::spline_index
*
* Return the index to the largest xcoord not greater than x.
**********************************************************************/
int32_t QSPLINE::spline_index( // evaluate
double x // coord to evaluate at
) const {
int32_t index; // segment index
int32_t bottom; // bottom of range
int32_t top; // top of range
bottom = 0;
top = segments;
while (top - bottom > 1) {
index = (top + bottom) / 2; // centre of range
if (x >= xcoords[index]) {
bottom = index; // new min
} else {
top = index; // new max
}
}
return bottom;
}
/**********************************************************************
* QSPLINE::move
*
* Reposition spline by vector
**********************************************************************/
void QSPLINE::move( // reposition spline
ICOORD vec // by vector
) {
int32_t segment; // index of segment
int16_t x_shift = vec.x();
for (segment = 0; segment < segments; segment++) {
xcoords[segment] += x_shift;
quadratics[segment].move(vec);
}
xcoords[segment] += x_shift;
}
/**********************************************************************
* QSPLINE::overlap
*
* Return true if spline2 overlaps this by no more than fraction less
* than the bounds of this.
**********************************************************************/
bool QSPLINE::overlap( // test overlap
QSPLINE *spline2, // 2 cannot be smaller
double fraction // by more than this
) {
int leftlimit = xcoords[1]; /*common left limit */
int rightlimit = xcoords[segments - 1]; /*common right limit */
/*or too non-overlap */
return !(spline2->segments < 3 ||
spline2->xcoords[1] > leftlimit + fraction * (rightlimit - leftlimit) ||
spline2->xcoords[spline2->segments - 1] <
rightlimit - fraction * (rightlimit - leftlimit));
}
/**********************************************************************
* extrapolate_spline
*
* Extrapolates the spline linearly using the same gradient as the
* quadratic has at either end.
**********************************************************************/
void QSPLINE::extrapolate( // linear extrapolation
double gradient, // gradient to use
int xmin, // new left edge
int xmax // new right edge
) {
int segment; /*current segment of spline */
int dest_segment; // dest index
int32_t *xstarts; // new boundaries
QUAD_COEFFS *quads; // new ones
int increment; // in size
increment = xmin < xcoords[0] ? 1 : 0;
if (xmax > xcoords[segments]) {
increment++;
}
if (increment == 0) {
return;
}
xstarts = new int32_t[segments + 1 + increment];
quads = new QUAD_COEFFS[segments + increment];
if (xmin < xcoords[0]) {
xstarts[0] = xmin;
quads[0].a = 0;
quads[0].b = gradient;
quads[0].c = y(xcoords[0]) - quads[0].b * xcoords[0];
dest_segment = 1;
} else {
dest_segment = 0;
}
for (segment = 0; segment < segments; segment++) {
xstarts[dest_segment] = xcoords[segment];
quads[dest_segment] = quadratics[segment];
dest_segment++;
}
xstarts[dest_segment] = xcoords[segment];
if (xmax > xcoords[segments]) {
quads[dest_segment].a = 0;
quads[dest_segment].b = gradient;
quads[dest_segment].c = y(xcoords[segments]) - quads[dest_segment].b * xcoords[segments];
dest_segment++;
xstarts[dest_segment] = xmax + 1;
}
segments = dest_segment;
delete[] xcoords;
delete[] quadratics;
xcoords = xstarts;
quadratics = quads;
}
/**********************************************************************
* QSPLINE::plot
*
* Draw the QSPLINE in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void QSPLINE::plot( // draw it
ScrollView *window, // window to draw in
ScrollView::Color colour // colour to draw in
) const {
int32_t segment; // index of segment
int16_t step; // index of poly piece
double increment; // x increment
double x; // x coord
window->Pen(colour);
for (segment = 0; segment < segments; segment++) {
increment = static_cast<double>(xcoords[segment + 1] - xcoords[segment]) / QSPLINE_PRECISION;
x = xcoords[segment];
for (step = 0; step <= QSPLINE_PRECISION; step++) {
if (segment == 0 && step == 0) {
window->SetCursor(x, quadratics[segment].y(x));
} else {
window->DrawTo(x, quadratics[segment].y(x));
}
x += increment;
}
}
}
#endif
void QSPLINE::plot(Image pix) const {
if (pix == nullptr) {
return;
}
int32_t segment; // Index of segment
int16_t step; // Index of poly piece
double increment; // x increment
double x; // x coord
auto height = static_cast<double>(pixGetHeight(pix));
Pta *points = ptaCreate(QSPLINE_PRECISION * segments);
const int kLineWidth = 5;
for (segment = 0; segment < segments; segment++) {
increment = static_cast<double>((xcoords[segment + 1] - xcoords[segment])) / QSPLINE_PRECISION;
x = xcoords[segment];
for (step = 0; step <= QSPLINE_PRECISION; step++) {
double y = height - quadratics[segment].y(x);
ptaAddPt(points, x, y);
x += increment;
}
}
switch (pixGetDepth(pix)) {
case 1:
pixRenderPolyline(pix, points, kLineWidth, L_SET_PIXELS, 1);
break;
case 32:
pixRenderPolylineArb(pix, points, kLineWidth, 255, 0, 0, 1);
break;
default:
pixRenderPolyline(pix, points, kLineWidth, L_CLEAR_PIXELS, 1);
break;
}
ptaDestroy(&points);
}
} // namespace tesseract

View File

@ -0,0 +1,99 @@
/**********************************************************************
* File: quspline.h (Formerly qspline.h)
* Description: Code for the QSPLINE class.
* Author: Ray Smith
* Created: Tue Oct 08 17:16:12 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef QUSPLINE_H
#define QUSPLINE_H
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include <cstdint> // for int32_t
struct Pix;
namespace tesseract {
class ICOORD;
class QUAD_COEFFS;
class ROW;
class TBOX;
class TESS_API QSPLINE {
friend void make_first_baseline(TBOX *, int, int *, int *, QSPLINE *, QSPLINE *, float);
friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float);
friend void tweak_row_baseline(ROW *, double, double);
public:
QSPLINE() { // empty constructor
segments = 0;
xcoords = nullptr; // everything empty
quadratics = nullptr;
}
QSPLINE( // copy constructor
const QSPLINE &src);
QSPLINE( // constructor
int32_t count, // number of segments
int32_t *xstarts, // segment starts
double *coeffs); // coefficients
~QSPLINE(); // destructor
QSPLINE( // least squares fit
int xstarts[], // spline boundaries
int segcount, // no of segments
int xcoords[], // points to fit
int ycoords[], int blobcount, // no of coords
int degree); // function
double step( // step change
double x1, // between coords
double x2);
double y( // evaluate
double x) const; // at x
void move( // reposition spline
ICOORD vec); // by vector
bool overlap( // test overlap
QSPLINE *spline2, // 2 cannot be smaller
double fraction); // by more than this
void extrapolate( // linear extrapolation
double gradient, // gradient to use
int left, // new left edge
int right); // new right edge
#ifndef GRAPHICS_DISABLED
void plot( // draw it
ScrollView *window, // in window
ScrollView::Color colour) const; // in colour
#endif
// Paint the baseline over pix. If pix has depth of 32, then the line will
// be painted in red. Otherwise it will be painted in black.
void plot(Image pix) const;
QSPLINE &operator=(const QSPLINE &source); // from this
private:
int32_t spline_index( // binary search
double x) const; // for x
int32_t segments; // no of segments
int32_t *xcoords; // no of coords
QUAD_COEFFS *quadratics; // spline pieces
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,829 @@
/**********************************************************************
* File: ratngs.cpp (Formerly ratings.c)
* Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes.
* Author: Ray Smith
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "ratngs.h"
#include "blobs.h"
#include "matrix.h"
#include "normalis.h" // kBlnBaselineOffset.
#include "unicharset.h"
#include <algorithm>
#include <string>
#include <vector>
namespace tesseract {
const float WERD_CHOICE::kBadRating = 100000.0;
// Min offset in baseline-normalized coords to make a character a subscript.
const int kMinSubscriptOffset = 20;
// Min offset in baseline-normalized coords to make a character a superscript.
const int kMinSuperscriptOffset = 20;
// Max y of bottom of a drop-cap blob.
const int kMaxDropCapBottom = -128;
// Max fraction of x-height to use as denominator in measuring x-height overlap.
const double kMaxOverlapDenominator = 0.125;
// Min fraction of x-height range that should be in agreement for matching
// x-heights.
const double kMinXHeightMatch = 0.5;
// Max tolerance on baseline position as a fraction of x-height for matching
// baselines.
const double kMaxBaselineDrift = 0.0625;
static const char kPermuterTypeNoPerm[] = "None";
static const char kPermuterTypePuncPerm[] = "Punctuation";
static const char kPermuterTypeTopPerm[] = "Top Choice";
static const char kPermuterTypeLowerPerm[] = "Top Lower Case";
static const char kPermuterTypeUpperPerm[] = "Top Upper Case";
static const char kPermuterTypeNgramPerm[] = "Ngram";
static const char kPermuterTypeNumberPerm[] = "Number";
static const char kPermuterTypeUserPatPerm[] = "User Pattern";
static const char kPermuterTypeSysDawgPerm[] = "System Dictionary";
static const char kPermuterTypeDocDawgPerm[] = "Document Dictionary";
static const char kPermuterTypeUserDawgPerm[] = "User Dictionary";
static const char kPermuterTypeFreqDawgPerm[] = "Frequent Words Dictionary";
static const char kPermuterTypeCompoundPerm[] = "Compound";
static const char *const kPermuterTypeNames[] = {
kPermuterTypeNoPerm, // 0
kPermuterTypePuncPerm, // 1
kPermuterTypeTopPerm, // 2
kPermuterTypeLowerPerm, // 3
kPermuterTypeUpperPerm, // 4
kPermuterTypeNgramPerm, // 5
kPermuterTypeNumberPerm, // 6
kPermuterTypeUserPatPerm, // 7
kPermuterTypeSysDawgPerm, // 8
kPermuterTypeDocDawgPerm, // 9
kPermuterTypeUserDawgPerm, // 10
kPermuterTypeFreqDawgPerm, // 11
kPermuterTypeCompoundPerm // 12
};
/**
* BLOB_CHOICE::BLOB_CHOICE
*
* Constructor to build a BLOB_CHOICE from a char, rating and certainty.
*/
BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
int src_script_id, // script
float min_xheight, // min xheight allowed
float max_xheight, // max xheight by this char
float yshift, // yshift out of position
BlobChoiceClassifier c) { // adapted match or other
unichar_id_ = src_unichar_id;
rating_ = src_rating;
certainty_ = src_cert;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
script_id_ = src_script_id;
min_xheight_ = min_xheight;
max_xheight_ = max_xheight;
yshift_ = yshift;
classifier_ = c;
}
/**
* BLOB_CHOICE::BLOB_CHOICE
*
* Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
*/
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST_LINK(other) {
unichar_id_ = other.unichar_id();
rating_ = other.rating();
certainty_ = other.certainty();
fontinfo_id_ = other.fontinfo_id();
fontinfo_id2_ = other.fontinfo_id2();
script_id_ = other.script_id();
matrix_cell_ = other.matrix_cell_;
min_xheight_ = other.min_xheight_;
max_xheight_ = other.max_xheight_;
yshift_ = other.yshift();
classifier_ = other.classifier_;
#ifndef DISABLED_LEGACY_ENGINE
fonts_ = other.fonts_;
#endif // ndef DISABLED_LEGACY_ENGINE
}
// Copy assignment operator.
BLOB_CHOICE &BLOB_CHOICE::operator=(const BLOB_CHOICE &other) {
ELIST_LINK::operator=(other);
unichar_id_ = other.unichar_id();
rating_ = other.rating();
certainty_ = other.certainty();
fontinfo_id_ = other.fontinfo_id();
fontinfo_id2_ = other.fontinfo_id2();
script_id_ = other.script_id();
matrix_cell_ = other.matrix_cell_;
min_xheight_ = other.min_xheight_;
max_xheight_ = other.max_xheight_;
yshift_ = other.yshift();
classifier_ = other.classifier_;
#ifndef DISABLED_LEGACY_ENGINE
fonts_ = other.fonts_;
#endif // ndef DISABLED_LEGACY_ENGINE
return *this;
}
// Returns true if *this and other agree on the baseline and x-height
// to within some tolerance based on a given estimate of the x-height.
bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const {
double baseline_diff = fabs(yshift() - other.yshift());
if (baseline_diff > kMaxBaselineDrift * x_height) {
if (debug) {
tprintf("Baseline diff %g for %d v %d\n", baseline_diff, unichar_id_, other.unichar_id_);
}
return false;
}
double this_range = max_xheight() - min_xheight();
double other_range = other.max_xheight() - other.min_xheight();
double denominator =
ClipToRange(std::min(this_range, other_range), 1.0, kMaxOverlapDenominator * x_height);
double overlap =
std::min(max_xheight(), other.max_xheight()) - std::max(min_xheight(), other.min_xheight());
overlap /= denominator;
if (debug) {
tprintf("PosAndSize for %d v %d: bl diff = %g, ranges %g, %g / %g ->%g\n", unichar_id_,
other.unichar_id_, baseline_diff, this_range, other_range, denominator, overlap);
}
return overlap >= kMinXHeightMatch;
}
// Helper to find the BLOB_CHOICE in the bc_list that matches the given
// unichar_id, or nullptr if there is no match.
BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list) {
// Find the corresponding best BLOB_CHOICE.
BLOB_CHOICE_IT choice_it(bc_list);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
BLOB_CHOICE *choice = choice_it.data();
if (choice->unichar_id() == char_id) {
return choice;
}
}
return nullptr;
}
const char *WERD_CHOICE::permuter_name(uint8_t permuter) {
return kPermuterTypeNames[permuter];
}
const char *ScriptPosToString(enum ScriptPos script_pos) {
switch (script_pos) {
case SP_NORMAL:
return "NORM";
case SP_SUBSCRIPT:
return "SUB";
case SP_SUPERSCRIPT:
return "SUPER";
case SP_DROPCAP:
return "DROPC";
}
return "SP_UNKNOWN";
}
/**
* WERD_CHOICE::WERD_CHOICE
*
* Constructor to build a WERD_CHOICE from the given string.
* The function assumes that src_string is not nullptr.
*/
WERD_CHOICE::WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset)
: unicharset_(&unicharset) {
std::vector<UNICHAR_ID> encoding;
std::vector<char> lengths;
std::string cleaned = unicharset.CleanupString(src_string);
if (unicharset.encode_string(cleaned.c_str(), true, &encoding, &lengths, nullptr)) {
lengths.push_back('\0');
std::string src_lengths = &lengths[0];
this->init(cleaned.c_str(), src_lengths.c_str(), 0.0, 0.0, NO_PERM);
} else { // There must have been an invalid unichar in the string.
this->init(8);
this->make_bad();
}
}
/**
* WERD_CHOICE::init
*
* Helper function to build a WERD_CHOICE from the given string,
* fragment lengths, rating, certainty and permuter.
*
* The function assumes that src_string is not nullptr.
* src_lengths argument could be nullptr, in which case the unichars
* in src_string are assumed to all be of length 1.
*/
void WERD_CHOICE::init(const char *src_string, const char *src_lengths, float src_rating,
float src_certainty, uint8_t src_permuter) {
int src_string_len = strlen(src_string);
if (src_string_len == 0) {
this->init(8);
} else {
this->init(src_lengths ? strlen(src_lengths) : src_string_len);
length_ = reserved_;
int offset = 0;
for (int i = 0; i < length_; ++i) {
int unichar_length = src_lengths ? src_lengths[i] : 1;
unichar_ids_[i] = unicharset_->unichar_to_id(src_string + offset, unichar_length);
state_[i] = 1;
certainties_[i] = src_certainty;
offset += unichar_length;
}
}
adjust_factor_ = 1.0f;
rating_ = src_rating;
certainty_ = src_certainty;
permuter_ = src_permuter;
dangerous_ambig_found_ = false;
}
/**
* WERD_CHOICE::~WERD_CHOICE
*/
WERD_CHOICE::~WERD_CHOICE() = default;
const char *WERD_CHOICE::permuter_name() const {
return kPermuterTypeNames[permuter_];
}
// Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
// taken from the appropriate cell in the ratings MATRIX.
// Borrowed pointer, so do not delete.
BLOB_CHOICE_LIST *WERD_CHOICE::blob_choices(int index, MATRIX *ratings) const {
MATRIX_COORD coord = MatrixCoord(index);
BLOB_CHOICE_LIST *result = ratings->get(coord.col, coord.row);
if (result == nullptr) {
result = new BLOB_CHOICE_LIST;
ratings->put(coord.col, coord.row, result);
}
return result;
}
// Returns the MATRIX_COORD corresponding to the location in the ratings
// MATRIX for the given index into the word.
MATRIX_COORD WERD_CHOICE::MatrixCoord(int index) const {
int col = 0;
for (int i = 0; i < index; ++i) {
col += state_[i];
}
int row = col + state_[index] - 1;
return MATRIX_COORD(col, row);
}
// Sets the entries for the given index from the BLOB_CHOICE, assuming
// unit fragment lengths, but setting the state for this index to blob_count.
void WERD_CHOICE::set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice) {
unichar_ids_[index] = blob_choice->unichar_id();
script_pos_[index] = tesseract::SP_NORMAL;
state_[index] = blob_count;
certainties_[index] = blob_choice->certainty();
}
/**
* contains_unichar_id
*
* Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
*/
bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const {
for (int i = 0; i < length_; ++i) {
if (unichar_ids_[i] == unichar_id) {
return true;
}
}
return false;
}
/**
* remove_unichar_ids
*
* Removes num unichar ids starting from index start from unichar_ids_
* and updates length_ and fragment_lengths_ to reflect this change.
* Note: this function does not modify rating_ and certainty_.
*/
void WERD_CHOICE::remove_unichar_ids(int start, int num) {
ASSERT_HOST(start >= 0 && start + num <= length_);
// Accumulate the states to account for the merged blobs.
for (int i = 0; i < num; ++i) {
if (start > 0) {
state_[start - 1] += state_[start + i];
} else if (start + num < length_) {
state_[start + num] += state_[start + i];
}
}
for (int i = start; i + num < length_; ++i) {
unichar_ids_[i] = unichar_ids_[i + num];
script_pos_[i] = script_pos_[i + num];
state_[i] = state_[i + num];
certainties_[i] = certainties_[i + num];
}
length_ -= num;
}
/**
* reverse_and_mirror_unichar_ids
*
* Reverses and mirrors unichars in unichar_ids.
*/
void WERD_CHOICE::reverse_and_mirror_unichar_ids() {
for (int i = 0; i < length_ / 2; ++i) {
UNICHAR_ID tmp_id = unichar_ids_[i];
unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_ - 1 - i]);
unichar_ids_[length_ - 1 - i] = unicharset_->get_mirror(tmp_id);
}
if (length_ % 2 != 0) {
unichar_ids_[length_ / 2] = unicharset_->get_mirror(unichar_ids_[length_ / 2]);
}
}
/**
* punct_stripped
*
* Returns the half-open interval of unichar_id indices [start, end) which
* enclose the core portion of this word -- the part after stripping
* punctuation from the left and right.
*/
void WERD_CHOICE::punct_stripped(int *start, int *end) const {
*start = 0;
*end = length() - 1;
while (*start < length() && unicharset()->get_ispunctuation(unichar_id(*start))) {
(*start)++;
}
while (*end > -1 && unicharset()->get_ispunctuation(unichar_id(*end))) {
(*end)--;
}
(*end)++;
}
void WERD_CHOICE::GetNonSuperscriptSpan(int *pstart, int *pend) const {
int end = length();
while (end > 0 && unicharset_->get_isdigit(unichar_ids_[end - 1]) &&
BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) {
end--;
}
int start = 0;
while (start < end && unicharset_->get_isdigit(unichar_ids_[start]) &&
BlobPosition(start) == tesseract::SP_SUPERSCRIPT) {
start++;
}
*pstart = start;
*pend = end;
}
WERD_CHOICE WERD_CHOICE::shallow_copy(int start, int end) const {
ASSERT_HOST(start >= 0 && start <= length_);
ASSERT_HOST(end >= 0 && end <= length_);
if (end < start) {
end = start;
}
WERD_CHOICE retval(unicharset_, end - start);
for (int i = start; i < end; i++) {
retval.append_unichar_id_space_allocated(unichar_ids_[i], state_[i], 0.0f, certainties_[i]);
}
return retval;
}
/**
* has_rtl_unichar_id
*
* Returns true if unichar_ids contain at least one "strongly" RTL unichar.
*/
bool WERD_CHOICE::has_rtl_unichar_id() const {
int i;
for (i = 0; i < length_; ++i) {
UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
return true;
}
}
return false;
}
/**
* string_and_lengths
*
* Populates the given word_str with unichars from unichar_ids and
* and word_lengths_str with the corresponding unichar lengths.
*/
void WERD_CHOICE::string_and_lengths(std::string *word_str, std::string *word_lengths_str) const {
*word_str = "";
if (word_lengths_str != nullptr) {
*word_lengths_str = "";
}
for (int i = 0; i < length_; ++i) {
const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
*word_str += ch;
if (word_lengths_str != nullptr) {
*word_lengths_str += (char)strlen(ch);
}
}
}
/**
* append_unichar_id
*
* Make sure there is enough space in the word for the new unichar id
* and call append_unichar_id_space_allocated().
*/
void WERD_CHOICE::append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating,
float certainty) {
if (length_ == reserved_) {
this->double_the_size();
}
this->append_unichar_id_space_allocated(unichar_id, blob_count, rating, certainty);
}
/**
* WERD_CHOICE::operator+=
*
* Cat a second word rating on the end of this current one.
* The ratings are added and the confidence is the min.
* If the permuters are NOT the same the permuter is set to COMPOUND_PERM
*/
WERD_CHOICE &WERD_CHOICE::operator+=(const WERD_CHOICE &second) {
ASSERT_HOST(unicharset_ == second.unicharset_);
while (reserved_ < length_ + second.length()) {
this->double_the_size();
}
const std::vector<UNICHAR_ID> &other_unichar_ids = second.unichar_ids();
for (int i = 0; i < second.length(); ++i) {
unichar_ids_[length_ + i] = other_unichar_ids[i];
state_[length_ + i] = second.state_[i];
certainties_[length_ + i] = second.certainties_[i];
script_pos_[length_ + i] = second.BlobPosition(i);
}
length_ += second.length();
if (second.adjust_factor_ > adjust_factor_) {
adjust_factor_ = second.adjust_factor_;
}
rating_ += second.rating(); // add ratings
if (second.certainty() < certainty_) { // take min
certainty_ = second.certainty();
}
if (second.dangerous_ambig_found_) {
dangerous_ambig_found_ = true;
}
if (permuter_ == NO_PERM) {
permuter_ = second.permuter();
} else if (second.permuter() != NO_PERM && second.permuter() != permuter_) {
permuter_ = COMPOUND_PERM;
}
return *this;
}
/**
* WERD_CHOICE::operator=
*
* Allocate enough memory to hold a copy of source and copy over
* all the information from source to this WERD_CHOICE.
*/
WERD_CHOICE &WERD_CHOICE::operator=(const WERD_CHOICE &source) {
while (reserved_ < source.length()) {
this->double_the_size();
}
unicharset_ = source.unicharset_;
const std::vector<UNICHAR_ID> &other_unichar_ids = source.unichar_ids();
for (int i = 0; i < source.length(); ++i) {
unichar_ids_[i] = other_unichar_ids[i];
state_[i] = source.state_[i];
certainties_[i] = source.certainties_[i];
script_pos_[i] = source.BlobPosition(i);
}
length_ = source.length();
adjust_factor_ = source.adjust_factor_;
rating_ = source.rating();
certainty_ = source.certainty();
min_x_height_ = source.min_x_height();
max_x_height_ = source.max_x_height();
permuter_ = source.permuter();
dangerous_ambig_found_ = source.dangerous_ambig_found_;
return *this;
}
// Sets up the script_pos_ member using the blobs_list to get the bln
// bounding boxes, *this to get the unichars, and this->unicharset
// to get the target positions. If small_caps is true, sub/super are not
// considered, but dropcaps are.
// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
// Initialize to normal.
for (int i = 0; i < length_; ++i) {
script_pos_[i] = tesseract::SP_NORMAL;
}
if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
return;
}
int position_counts[4] = {0, 0, 0, 0};
int chunk_index = 0;
for (int blob_index = 0; blob_index < length_; ++blob_index, ++chunk_index) {
TBLOB *tblob = word->blobs[chunk_index];
int uni_id = unichar_id(blob_index);
TBOX blob_box = tblob->bounding_box();
if (!state_.empty()) {
for (int i = 1; i < state_[blob_index]; ++i) {
++chunk_index;
tblob = word->blobs[chunk_index];
blob_box += tblob->bounding_box();
}
}
script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box, uni_id);
if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) {
script_pos_[blob_index] = tesseract::SP_NORMAL;
}
position_counts[script_pos_[blob_index]]++;
}
// If almost everything looks like a superscript or subscript,
// we most likely just got the baseline wrong.
if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ ||
position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) {
if (debug >= 2) {
tprintf(
"Most characters of %s are subscript or superscript.\n"
"That seems wrong, so I'll assume we got the baseline wrong\n",
unichar_string().c_str());
}
for (int i = 0; i < length_; i++) {
ScriptPos sp = script_pos_[i];
if (sp == tesseract::SP_SUBSCRIPT || sp == tesseract::SP_SUPERSCRIPT) {
position_counts[sp]--;
position_counts[tesseract::SP_NORMAL]++;
script_pos_[i] = tesseract::SP_NORMAL;
}
}
}
if ((debug >= 1 && position_counts[tesseract::SP_NORMAL] < length_) || debug >= 2) {
tprintf("SetScriptPosition on %s\n", unichar_string().c_str());
int chunk_index = 0;
for (int blob_index = 0; blob_index < length_; ++blob_index) {
if (debug >= 2 || script_pos_[blob_index] != tesseract::SP_NORMAL) {
TBLOB *tblob = word->blobs[chunk_index];
ScriptPositionOf(true, *unicharset_, tblob->bounding_box(), unichar_id(blob_index));
}
chunk_index += state_.empty() ? 1 : state_[blob_index];
}
}
}
// Sets all the script_pos_ positions to the given position.
void WERD_CHOICE::SetAllScriptPositions(tesseract::ScriptPos position) {
for (int i = 0; i < length_; ++i) {
script_pos_[i] = position;
}
}
/* static */
ScriptPos WERD_CHOICE::ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset,
const TBOX &blob_box, UNICHAR_ID unichar_id) {
ScriptPos retval = tesseract::SP_NORMAL;
int top = blob_box.top();
int bottom = blob_box.bottom();
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
int sub_thresh_top = min_top - kMinSubscriptOffset;
int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset;
int sup_thresh_bot = max_bottom + kMinSuperscriptOffset;
if (bottom <= kMaxDropCapBottom) {
retval = tesseract::SP_DROPCAP;
} else if (top < sub_thresh_top && bottom < sub_thresh_bot) {
retval = tesseract::SP_SUBSCRIPT;
} else if (bottom > sup_thresh_bot) {
retval = tesseract::SP_SUPERSCRIPT;
}
if (print_debug) {
const char *pos = ScriptPosToString(retval);
tprintf(
"%s Character %s[bot:%d top: %d] "
"bot_range[%d,%d] top_range[%d, %d] "
"sub_thresh[bot:%d top:%d] sup_thresh_bot %d\n",
pos, unicharset.id_to_unichar(unichar_id), bottom, top, min_bottom, max_bottom, min_top,
max_top, sub_thresh_bot, sub_thresh_top, sup_thresh_bot);
}
return retval;
}
// Returns the script-id (eg Han) of the dominant script in the word.
int WERD_CHOICE::GetTopScriptID() const {
int max_script = unicharset_->get_script_table_size();
int *sid = new int[max_script];
int x;
for (x = 0; x < max_script; x++) {
sid[x] = 0;
}
for (x = 0; x < length_; ++x) {
int script_id = unicharset_->get_script(unichar_id(x));
sid[script_id]++;
}
if (unicharset_->han_sid() != unicharset_->null_sid()) {
// Add the Hiragana & Katakana counts to Han and zero them out.
if (unicharset_->hiragana_sid() != unicharset_->null_sid()) {
sid[unicharset_->han_sid()] += sid[unicharset_->hiragana_sid()];
sid[unicharset_->hiragana_sid()] = 0;
}
if (unicharset_->katakana_sid() != unicharset_->null_sid()) {
sid[unicharset_->han_sid()] += sid[unicharset_->katakana_sid()];
sid[unicharset_->katakana_sid()] = 0;
}
}
// Note that high script ID overrides lower one on a tie, thus biasing
// towards non-Common script (if sorted that way in unicharset file).
int max_sid = 0;
for (x = 1; x < max_script; x++) {
if (sid[x] >= sid[max_sid]) {
max_sid = x;
}
}
if (sid[max_sid] < length_ / 2) {
max_sid = unicharset_->null_sid();
}
delete[] sid;
return max_sid;
}
// Fixes the state_ for a chop at the given blob_posiiton.
void WERD_CHOICE::UpdateStateForSplit(int blob_position) {
int total_chunks = 0;
for (int i = 0; i < length_; ++i) {
total_chunks += state_[i];
if (total_chunks > blob_position) {
++state_[i];
return;
}
}
}
// Returns the sum of all the state elements, being the total number of blobs.
int WERD_CHOICE::TotalOfStates() const {
int total_chunks = 0;
for (int i = 0; i < length_; ++i) {
total_chunks += state_[i];
}
return total_chunks;
}
/**
* WERD_CHOICE::print
*
* Print WERD_CHOICE to stdout.
*/
void WERD_CHOICE::print(const char *msg) const {
tprintf("%s : ", msg);
for (int i = 0; i < length_; ++i) {
tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i]));
}
tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n", rating_, certainty_,
adjust_factor_, permuter_, min_x_height_, max_x_height_, dangerous_ambig_found_);
tprintf("pos");
for (int i = 0; i < length_; ++i) {
tprintf("\t%s", ScriptPosToString(script_pos_[i]));
}
tprintf("\nstr");
for (int i = 0; i < length_; ++i) {
tprintf("\t%s", unicharset_->id_to_unichar(unichar_ids_[i]));
}
tprintf("\nstate:");
for (int i = 0; i < length_; ++i) {
tprintf("\t%d ", state_[i]);
}
tprintf("\nC");
for (int i = 0; i < length_; ++i) {
tprintf("\t%.3f", certainties_[i]);
}
tprintf("\n");
}
// Prints the segmentation state with an introductory message.
void WERD_CHOICE::print_state(const char *msg) const {
tprintf("%s", msg);
for (int i = 0; i < length_; ++i) {
tprintf(" %d", state_[i]);
}
tprintf("\n");
}
#ifndef GRAPHICS_DISABLED
// Displays the segmentation state of *this (if not the same as the last
// one displayed) and waits for a click in the window.
void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
// Number of different colors to draw with.
const int kNumColors = 6;
static ScrollView *segm_window = nullptr;
// Check the state against the static prev_drawn_state.
static std::vector<int> prev_drawn_state;
bool already_done = prev_drawn_state.size() == length_;
if (!already_done) {
prev_drawn_state.resize(length_);
}
for (int i = 0; i < length_; ++i) {
if (prev_drawn_state[i] != state_[i]) {
already_done = false;
}
prev_drawn_state[i] = state_[i];
}
if (already_done || word->blobs.empty()) {
return;
}
// Create the window if needed.
if (segm_window == nullptr) {
segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true);
} else {
segm_window->Clear();
}
TBOX bbox;
int blob_index = 0;
for (int c = 0; c < length_; ++c) {
auto color = static_cast<ScrollView::Color>(c % kNumColors + 3);
for (int i = 0; i < state_[c]; ++i, ++blob_index) {
TBLOB *blob = word->blobs[blob_index];
bbox += blob->bounding_box();
blob->plot(segm_window, color, color);
}
}
segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom());
segm_window->Update();
segm_window->Wait();
}
#endif // !GRAPHICS_DISABLED
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2) {
const UNICHARSET *uchset = word1.unicharset();
if (word2.unicharset() != uchset) {
return false;
}
int w1start, w1end;
word1.punct_stripped(&w1start, &w1end);
int w2start, w2end;
word2.punct_stripped(&w2start, &w2end);
if (w1end - w1start != w2end - w2start) {
return false;
}
for (int i = 0; i < w1end - w1start; i++) {
if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
uchset->to_lower(word2.unichar_id(w2start + i))) {
return false;
}
}
return true;
}
/**
* print_ratings_list
*
* Send all the ratings out to the logfile.
*
* @param msg intro message
* @param ratings list of ratings
* @param current_unicharset unicharset that can be used
* for id-to-unichar conversion
*/
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings,
const UNICHARSET &current_unicharset) {
if (ratings->empty()) {
tprintf("%s:<none>\n", msg);
return;
}
if (*msg != '\0') {
tprintf("%s\n", msg);
}
BLOB_CHOICE_IT c_it;
c_it.set_to_list(ratings);
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
c_it.data()->print(&current_unicharset);
if (!c_it.at_last()) {
tprintf("\n");
}
}
tprintf("\n");
fflush(stdout);
}
} // namespace tesseract

View File

@ -0,0 +1,638 @@
/**********************************************************************
* File: ratngs.h (Formerly ratings.h)
* Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
* Author: Ray Smith
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef RATNGS_H
#define RATNGS_H
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "clst.h"
#include "elst.h"
#ifndef DISABLED_LEGACY_ENGINE
# include "fontinfo.h"
#endif // undef DISABLED_LEGACY_ENGINE
#include "matrix.h"
#include "unicharset.h"
#include "werd.h"
#include <tesseract/unichar.h>
#include <cassert>
#include <cfloat> // for FLT_MAX
namespace tesseract {
class MATRIX;
struct TBLOB;
struct TWERD;
// Enum to describe the source of a BLOB_CHOICE to make it possible to determine
// whether a blob has been classified by inspecting the BLOB_CHOICEs.
enum BlobChoiceClassifier {
BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
BCC_AMBIG, // Generated by ambiguity detection.
BCC_FAKE, // From some other process.
};
class BLOB_CHOICE : public ELIST_LINK {
public:
BLOB_CHOICE() {
unichar_id_ = UNICHAR_SPACE;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
rating_ = 10.0;
certainty_ = -1.0;
script_id_ = -1;
min_xheight_ = 0.0f;
max_xheight_ = 0.0f;
yshift_ = 0.0f;
classifier_ = BCC_FAKE;
}
BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
int script_id, // script
float min_xheight, // min xheight in image pixel units
float max_xheight, // max xheight allowed by this char
float yshift, // the larger of y shift (top or bottom)
BlobChoiceClassifier c); // adapted match or other
BLOB_CHOICE(const BLOB_CHOICE &other);
~BLOB_CHOICE() = default;
UNICHAR_ID unichar_id() const {
return unichar_id_;
}
float rating() const {
return rating_;
}
float certainty() const {
return certainty_;
}
int16_t fontinfo_id() const {
return fontinfo_id_;
}
int16_t fontinfo_id2() const {
return fontinfo_id2_;
}
#ifndef DISABLED_LEGACY_ENGINE
const std::vector<ScoredFont> &fonts() const {
return fonts_;
}
void set_fonts(const std::vector<ScoredFont> &fonts) {
fonts_ = fonts;
int score1 = 0, score2 = 0;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
for (auto &f : fonts_) {
if (f.score > score1) {
score2 = score1;
fontinfo_id2_ = fontinfo_id_;
score1 = f.score;
fontinfo_id_ = f.fontinfo_id;
} else if (f.score > score2) {
score2 = f.score;
fontinfo_id2_ = f.fontinfo_id;
}
}
}
#endif // ndef DISABLED_LEGACY_ENGINE
int script_id() const {
return script_id_;
}
const MATRIX_COORD &matrix_cell() {
return matrix_cell_;
}
float min_xheight() const {
return min_xheight_;
}
float max_xheight() const {
return max_xheight_;
}
float yshift() const {
return yshift_;
}
BlobChoiceClassifier classifier() const {
return classifier_;
}
bool IsAdapted() const {
return classifier_ == BCC_ADAPTED_CLASSIFIER;
}
bool IsClassified() const {
return classifier_ == BCC_STATIC_CLASSIFIER || classifier_ == BCC_ADAPTED_CLASSIFIER ||
classifier_ == BCC_SPECKLE_CLASSIFIER;
}
void set_unichar_id(UNICHAR_ID newunichar_id) {
unichar_id_ = newunichar_id;
}
void set_rating(float newrat) {
rating_ = newrat;
}
void set_certainty(float newrat) {
certainty_ = newrat;
}
void set_script(int newscript_id) {
script_id_ = newscript_id;
}
void set_matrix_cell(int col, int row) {
matrix_cell_.col = col;
matrix_cell_.row = row;
}
void set_classifier(BlobChoiceClassifier classifier) {
classifier_ = classifier;
}
static BLOB_CHOICE *deep_copy(const BLOB_CHOICE *src) {
auto *choice = new BLOB_CHOICE;
*choice = *src;
return choice;
}
// Returns true if *this and other agree on the baseline and x-height
// to within some tolerance based on a given estimate of the x-height.
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;
void print(const UNICHARSET *unicharset) const {
tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
}
void print_full() const {
print(nullptr);
tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
fontinfo_id2_, yshift_, classifier_);
}
// Sort function for sorting BLOB_CHOICEs in increasing order of rating.
static int SortByRating(const void *p1, const void *p2) {
const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
return (bc1->rating_ < bc2->rating_) ? -1 : 1;
}
private:
// Copy assignment operator.
BLOB_CHOICE &operator=(const BLOB_CHOICE &other);
UNICHAR_ID unichar_id_; // unichar id
#ifndef DISABLED_LEGACY_ENGINE
// Fonts and scores. Allowed to be empty.
std::vector<ScoredFont> fonts_;
#endif // ndef DISABLED_LEGACY_ENGINE
int16_t fontinfo_id_; // char font information
int16_t fontinfo_id2_; // 2nd choice font information
// Rating is the classifier distance weighted by the length of the outline
// in the blob. In terms of probability, classifier distance is -klog p such
// that the resulting distance is in the range [0, 1] and then
// rating = w (-k log p) where w is the weight for the length of the outline.
// Sums of ratings may be compared meaningfully for words of different
// segmentation.
float rating_; // size related
// Certainty is a number in [-20, 0] indicating the classifier certainty
// of the choice. In terms of probability, certainty = 20 (k log p) where
// k is defined as above to normalize -klog p to the range [0, 1].
float certainty_; // absolute
int script_id_;
// Holds the position of this choice in the ratings matrix.
// Used to location position in the matrix during path backtracking.
MATRIX_COORD matrix_cell_;
// X-height range (in image pixels) that this classification supports.
float min_xheight_;
float max_xheight_;
// yshift_ - The vertical distance (in image pixels) the character is
// shifted (up or down) from an acceptable y position.
float yshift_;
BlobChoiceClassifier classifier_; // What generated *this.
};
// Make BLOB_CHOICE listable.
ELISTIZEH(BLOB_CHOICE)
// Return the BLOB_CHOICE in bc_list matching a given unichar_id,
// or nullptr if there is no match.
BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
// Permuter codes used in WERD_CHOICEs.
enum PermuterType {
NO_PERM, // 0
PUNC_PERM, // 1
TOP_CHOICE_PERM, // 2
LOWER_CASE_PERM, // 3
UPPER_CASE_PERM, // 4
NGRAM_PERM, // 5
NUMBER_PERM, // 6
USER_PATTERN_PERM, // 7
SYSTEM_DAWG_PERM, // 8
DOC_DAWG_PERM, // 9
USER_DAWG_PERM, // 10
FREQ_DAWG_PERM, // 11
COMPOUND_PERM, // 12
NUM_PERMUTER_TYPES
};
// ScriptPos tells whether a character is subscript, superscript or normal.
enum ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP };
const char *ScriptPosToString(ScriptPos script_pos);
class TESS_API WERD_CHOICE : public ELIST_LINK {
public:
static const float kBadRating;
static const char *permuter_name(uint8_t permuter);
WERD_CHOICE(const UNICHARSET *unicharset) : unicharset_(unicharset) {
this->init(8);
}
WERD_CHOICE(const UNICHARSET *unicharset, int reserved) : unicharset_(unicharset) {
this->init(reserved);
}
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating,
float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
: unicharset_(&unicharset) {
this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter);
}
WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
this->init(word.length());
this->operator=(word);
}
~WERD_CHOICE();
const UNICHARSET *unicharset() const {
return unicharset_;
}
bool empty() const {
return length_ == 0;
}
inline int length() const {
return length_;
}
float adjust_factor() const {
return adjust_factor_;
}
void set_adjust_factor(float factor) {
adjust_factor_ = factor;
}
inline const std::vector<UNICHAR_ID> &unichar_ids() const {
return unichar_ids_;
}
inline UNICHAR_ID unichar_id(int index) const {
assert(index < length_);
return unichar_ids_[index];
}
inline int state(int index) const {
return state_[index];
}
ScriptPos BlobPosition(int index) const {
if (index < 0 || index >= length_) {
return SP_NORMAL;
}
return script_pos_[index];
}
inline float rating() const {
return rating_;
}
inline float certainty() const {
return certainty_;
}
inline float certainty(int index) const {
return certainties_[index];
}
inline float min_x_height() const {
return min_x_height_;
}
inline float max_x_height() const {
return max_x_height_;
}
inline void set_x_heights(float min_height, float max_height) {
min_x_height_ = min_height;
max_x_height_ = max_height;
}
inline uint8_t permuter() const {
return permuter_;
}
const char *permuter_name() const;
// Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
// taken from the appropriate cell in the ratings MATRIX.
// Borrowed pointer, so do not delete.
BLOB_CHOICE_LIST *blob_choices(int index, MATRIX *ratings) const;
// Returns the MATRIX_COORD corresponding to the location in the ratings
// MATRIX for the given index into the word.
MATRIX_COORD MatrixCoord(int index) const;
inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
assert(index < length_);
unichar_ids_[index] = unichar_id;
}
bool dangerous_ambig_found() const {
return dangerous_ambig_found_;
}
void set_dangerous_ambig_found_(bool value) {
dangerous_ambig_found_ = value;
}
inline void set_rating(float new_val) {
rating_ = new_val;
}
inline void set_certainty(float new_val) {
certainty_ = new_val;
}
inline void set_permuter(uint8_t perm) {
permuter_ = perm;
}
// Note: this function should only be used if all the fields
// are populated manually with set_* functions (rather than
// (copy)constructors and append_* functions).
inline void set_length(int len) {
ASSERT_HOST(reserved_ >= len);
length_ = len;
}
/// Make more space in unichar_id_ and fragment_lengths_ arrays.
inline void double_the_size() {
if (reserved_ > 0) {
reserved_ *= 2;
} else {
reserved_ = 1;
}
unichar_ids_.resize(reserved_);
script_pos_.resize(reserved_);
state_.resize(reserved_);
certainties_.resize(reserved_);
}
/// Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and
/// fragment_length_ arrays. Sets other values to default (blank) values.
inline void init(int reserved) {
reserved_ = reserved;
if (reserved > 0) {
unichar_ids_.resize(reserved);
script_pos_.resize(reserved);
state_.resize(reserved);
certainties_.resize(reserved);
} else {
unichar_ids_.clear();
script_pos_.clear();
state_.clear();
certainties_.clear();
}
length_ = 0;
adjust_factor_ = 1.0f;
rating_ = 0.0;
certainty_ = FLT_MAX;
min_x_height_ = 0.0f;
max_x_height_ = FLT_MAX;
permuter_ = NO_PERM;
unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
dangerous_ambig_found_ = false;
}
/// Helper function to build a WERD_CHOICE from the given string,
/// fragment lengths, rating, certainty and permuter.
/// The function assumes that src_string is not nullptr.
/// src_lengths argument could be nullptr, in which case the unichars
/// in src_string are assumed to all be of length 1.
void init(const char *src_string, const char *src_lengths, float src_rating, float src_certainty,
uint8_t src_permuter);
/// Set the fields in this choice to be default (bad) values.
inline void make_bad() {
length_ = 0;
rating_ = kBadRating;
certainty_ = -FLT_MAX;
}
/// This function assumes that there is enough space reserved
/// in the WERD_CHOICE for adding another unichar.
/// This is an efficient alternative to append_unichar_id().
inline void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating,
float certainty) {
assert(reserved_ > length_);
length_++;
this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_ - 1);
}
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty);
inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty,
int index) {
assert(index < length_);
unichar_ids_[index] = unichar_id;
state_[index] = blob_count;
certainties_[index] = certainty;
script_pos_[index] = SP_NORMAL;
rating_ += rating;
if (certainty < certainty_) {
certainty_ = certainty;
}
}
// Sets the entries for the given index from the BLOB_CHOICE, assuming
// unit fragment lengths, but setting the state for this index to blob_count.
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice);
bool contains_unichar_id(UNICHAR_ID unichar_id) const;
void remove_unichar_ids(int index, int num);
inline void remove_last_unichar_id() {
--length_;
}
inline void remove_unichar_id(int index) {
this->remove_unichar_ids(index, 1);
}
bool has_rtl_unichar_id() const;
void reverse_and_mirror_unichar_ids();
// Returns the half-open interval of unichar_id indices [start, end) which
// enclose the core portion of this word -- the part after stripping
// punctuation from the left and right.
void punct_stripped(int *start_core, int *end_core) const;
// Returns the indices [start, end) containing the core of the word, stripped
// of any superscript digits on either side. (i.e., the non-footnote part
// of the word). There is no guarantee that the output range is non-empty.
void GetNonSuperscriptSpan(int *start, int *end) const;
// Return a copy of this WERD_CHOICE with the choices [start, end).
// The result is useful only for checking against a dictionary.
WERD_CHOICE shallow_copy(int start, int end) const;
void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const;
std::string debug_string() const {
std::string word_str;
for (int i = 0; i < length_; ++i) {
word_str += unicharset_->debug_str(unichar_ids_[i]);
word_str += " ";
}
return word_str;
}
// Returns true if any unichar_id in the word is a non-space-delimited char.
bool ContainsAnyNonSpaceDelimited() const {
for (int i = 0; i < length_; ++i) {
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
return true;
}
}
return false;
}
// Returns true if the word is all spaces.
bool IsAllSpaces() const {
for (int i = 0; i < length_; ++i) {
if (unichar_ids_[i] != UNICHAR_SPACE) {
return false;
}
}
return true;
}
// Call this to override the default (strict left to right graphemes)
// with the fact that some engine produces a "reading order" set of
// Graphemes for each word.
bool set_unichars_in_script_order(bool in_script_order) {
return unichars_in_script_order_ = in_script_order;
}
bool unichars_in_script_order() const {
return unichars_in_script_order_;
}
// Returns a UTF-8 string equivalent to the current choice
// of UNICHAR IDs.
std::string &unichar_string() {
this->string_and_lengths(&unichar_string_, &unichar_lengths_);
return unichar_string_;
}
// Returns a UTF-8 string equivalent to the current choice
// of UNICHAR IDs.
const std::string &unichar_string() const {
this->string_and_lengths(&unichar_string_, &unichar_lengths_);
return unichar_string_;
}
// Returns the lengths, one byte each, representing the number of bytes
// required in the unichar_string for each UNICHAR_ID.
const std::string &unichar_lengths() const {
this->string_and_lengths(&unichar_string_, &unichar_lengths_);
return unichar_lengths_;
}
// Sets up the script_pos_ member using the blobs_list to get the bln
// bounding boxes, *this to get the unichars, and this->unicharset
// to get the target positions. If small_caps is true, sub/super are not
// considered, but dropcaps are.
// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
void SetScriptPositions(bool small_caps, TWERD *word, int debug = 0);
// Sets all the script_pos_ positions to the given position.
void SetAllScriptPositions(ScriptPos position);
static ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset,
const TBOX &blob_box, UNICHAR_ID unichar_id);
// Returns the "dominant" script ID for the word. By "dominant", the script
// must account for at least half the characters. Otherwise, it returns 0.
// Note that for Japanese, Hiragana and Katakana are simply treated as Han.
int GetTopScriptID() const;
// Fixes the state_ for a chop at the given blob_posiiton.
void UpdateStateForSplit(int blob_position);
// Returns the sum of all the state elements, being the total number of blobs.
int TotalOfStates() const;
void print() const {
this->print("");
}
void print(const char *msg) const;
// Prints the segmentation state with an introductory message.
void print_state(const char *msg) const;
// Displays the segmentation state of *this (if not the same as the last
// one displayed) and waits for a click in the window.
void DisplaySegmentation(TWERD *word);
WERD_CHOICE &operator+=( // concatanate
const WERD_CHOICE &second); // second on first
WERD_CHOICE &operator=(const WERD_CHOICE &source);
private:
const UNICHARSET *unicharset_;
// TODO(rays) Perhaps replace the multiple arrays with an array of structs?
// unichar_ids_ is an array of classifier "results" that make up a word.
// For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
// of each unichar_id.
// state_[i] indicates the number of blobs in WERD_RES::chopped_word that
// were put together to make the classification results in the ith position
// in unichar_ids_, and certainties_[i] is the certainty of the choice that
// was used in this word.
// == Change from before ==
// Previously there was fragment_lengths_ that allowed a word to be
// artificially composed of multiple fragment results. Since the new
// segmentation search doesn't do fragments, treatment of fragments has
// been moved to a lower level, augmenting the ratings matrix with the
// combined fragments, and allowing the language-model/segmentation-search
// to deal with only the combined unichar_ids.
std::vector<UNICHAR_ID> unichar_ids_; // unichar ids that represent the text of the word
std::vector<ScriptPos> script_pos_; // Normal/Sub/Superscript of each unichar.
std::vector<int> state_; // Number of blobs in each unichar.
std::vector<float> certainties_; // Certainty of each unichar.
int reserved_; // size of the above arrays
int length_; // word length
// Factor that was used to adjust the rating.
float adjust_factor_;
// Rating is the sum of the ratings of the individual blobs in the word.
float rating_; // size related
// certainty is the min (worst) certainty of the individual blobs in the word.
float certainty_; // absolute
// xheight computed from the result, or 0 if inconsistent.
float min_x_height_;
float max_x_height_;
uint8_t permuter_; // permuter code
// Normally, the ratings_ matrix represents the recognition results in order
// from left-to-right. However, some engines (say Cube) may return
// recognition results in the order of the script's major reading direction
// (for Arabic, that is right-to-left).
bool unichars_in_script_order_;
// True if NoDangerousAmbig found an ambiguity.
bool dangerous_ambig_found_;
// The following variables are populated and passed by reference any
// time unichar_string() or unichar_lengths() are called.
mutable std::string unichar_string_;
mutable std::string unichar_lengths_;
};
// Make WERD_CHOICE listable.
ELISTIZEH(WERD_CHOICE)
using BLOB_CHOICE_LIST_VECTOR = std::vector<BLOB_CHOICE_LIST *>;
// Utilities for comparing WERD_CHOICEs
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2);
// Utilities for debug printing.
void print_ratings_list(const char *msg, // intro message
BLOB_CHOICE_LIST *ratings, // list of results
const UNICHARSET &current_unicharset // unicharset that can be used
// for id-to-unichar conversion
);
} // namespace tesseract
#endif

View File

@ -0,0 +1,277 @@
/**********************************************************************
* File: rect.cpp (Formerly box.c)
* Description: Bounding box class definition.
* Author: Phil Cheatle
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "rect.h"
#include "serialis.h" // for TFile
namespace tesseract {
/**********************************************************************
* TBOX::TBOX() Constructor from 2 ICOORDS
*
**********************************************************************/
TBOX::TBOX( // constructor
const ICOORD pt1, // one corner
const ICOORD pt2 // the other corner
) {
if (pt1.x() <= pt2.x()) {
if (pt1.y() <= pt2.y()) {
bot_left = pt1;
top_right = pt2;
} else {
bot_left = ICOORD(pt1.x(), pt2.y());
top_right = ICOORD(pt2.x(), pt1.y());
}
} else {
if (pt1.y() <= pt2.y()) {
bot_left = ICOORD(pt2.x(), pt1.y());
top_right = ICOORD(pt1.x(), pt2.y());
} else {
bot_left = pt2;
top_right = pt1;
}
}
}
bool TBOX::DeSerialize(TFile *f) {
return bot_left.DeSerialize(f) && top_right.DeSerialize(f);
}
bool TBOX::Serialize(TFile *f) const {
return bot_left.Serialize(f) && top_right.Serialize(f);
}
// rotate_large constructs the containing bounding box of all 4
// corners after rotating them. It therefore guarantees that all
// original content is contained within, but also slightly enlarges the box.
void TBOX::rotate_large(const FCOORD &vec) {
ICOORD top_left(bot_left.x(), top_right.y());
ICOORD bottom_right(top_right.x(), bot_left.y());
top_left.rotate(vec);
bottom_right.rotate(vec);
rotate(vec);
TBOX box2(top_left, bottom_right);
*this += box2;
}
/**********************************************************************
* TBOX::intersection() Build the largest box contained in both boxes
*
**********************************************************************/
TBOX TBOX::intersection( // shared area box
const TBOX &box) const {
int16_t left;
int16_t bottom;
int16_t right;
int16_t top;
if (overlap(box)) {
if (box.bot_left.x() > bot_left.x()) {
left = box.bot_left.x();
} else {
left = bot_left.x();
}
if (box.top_right.x() < top_right.x()) {
right = box.top_right.x();
} else {
right = top_right.x();
}
if (box.bot_left.y() > bot_left.y()) {
bottom = box.bot_left.y();
} else {
bottom = bot_left.y();
}
if (box.top_right.y() < top_right.y()) {
top = box.top_right.y();
} else {
top = top_right.y();
}
} else {
left = INT16_MAX;
bottom = INT16_MAX;
top = -INT16_MAX;
right = -INT16_MAX;
}
return TBOX(left, bottom, right, top);
}
/**********************************************************************
* TBOX::bounding_union() Build the smallest box containing both boxes
*
**********************************************************************/
TBOX TBOX::bounding_union( // box enclosing both
const TBOX &box) const {
ICOORD bl; // bottom left
ICOORD tr; // top right
if (box.bot_left.x() < bot_left.x()) {
bl.set_x(box.bot_left.x());
} else {
bl.set_x(bot_left.x());
}
if (box.top_right.x() > top_right.x()) {
tr.set_x(box.top_right.x());
} else {
tr.set_x(top_right.x());
}
if (box.bot_left.y() < bot_left.y()) {
bl.set_y(box.bot_left.y());
} else {
bl.set_y(bot_left.y());
}
if (box.top_right.y() > top_right.y()) {
tr.set_y(box.top_right.y());
} else {
tr.set_y(top_right.y());
}
return TBOX(bl, tr);
}
/**********************************************************************
* TBOX::plot() Paint a box using specified settings
*
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void TBOX::plot( // paint box
ScrollView *fd, // where to paint
ScrollView::Color fill_colour, // colour for inside
ScrollView::Color border_colour // colour for border
) const {
fd->Brush(fill_colour);
fd->Pen(border_colour);
plot(fd);
}
#endif
// Appends the bounding box as (%d,%d)->(%d,%d) to a string.
void TBOX::print_to_str(std::string &str) const {
// "(%d,%d)->(%d,%d)", left(), bottom(), right(), top()
str += "(" + std::to_string(left());
str += "," + std::to_string(bottom());
str += ")->(" + std::to_string(right());
str += "," + std::to_string(top());
str += ')';
}
// Writes to the given file. Returns false in case of error.
bool TBOX::Serialize(FILE *fp) const {
if (!bot_left.Serialize(fp)) {
return false;
}
if (!top_right.Serialize(fp)) {
return false;
}
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool TBOX::DeSerialize(bool swap, FILE *fp) {
if (!bot_left.DeSerialize(swap, fp)) {
return false;
}
if (!top_right.DeSerialize(swap, fp)) {
return false;
}
return true;
}
/**********************************************************************
* operator+=
*
* Extend one box to include the other (In place union)
**********************************************************************/
TBOX &operator+=( // bounding bounding bx
TBOX &op1, // operands
const TBOX &op2) {
if (op2.bot_left.x() < op1.bot_left.x()) {
op1.bot_left.set_x(op2.bot_left.x());
}
if (op2.top_right.x() > op1.top_right.x()) {
op1.top_right.set_x(op2.top_right.x());
}
if (op2.bot_left.y() < op1.bot_left.y()) {
op1.bot_left.set_y(op2.bot_left.y());
}
if (op2.top_right.y() > op1.top_right.y()) {
op1.top_right.set_y(op2.top_right.y());
}
return op1;
}
/**********************************************************************
* operator&=
*
* Reduce one box to intersection with the other (In place intersection)
**********************************************************************/
TBOX &operator&=(TBOX &op1, const TBOX &op2) {
if (op1.overlap(op2)) {
if (op2.bot_left.x() > op1.bot_left.x()) {
op1.bot_left.set_x(op2.bot_left.x());
}
if (op2.top_right.x() < op1.top_right.x()) {
op1.top_right.set_x(op2.top_right.x());
}
if (op2.bot_left.y() > op1.bot_left.y()) {
op1.bot_left.set_y(op2.bot_left.y());
}
if (op2.top_right.y() < op1.top_right.y()) {
op1.top_right.set_y(op2.top_right.y());
}
} else {
op1.bot_left.set_x(INT16_MAX);
op1.bot_left.set_y(INT16_MAX);
op1.top_right.set_x(-INT16_MAX);
op1.top_right.set_y(-INT16_MAX);
}
return op1;
}
bool TBOX::x_almost_equal(const TBOX &box, int tolerance) const {
return (abs(left() - box.left()) <= tolerance && abs(right() - box.right()) <= tolerance);
}
bool TBOX::almost_equal(const TBOX &box, int tolerance) const {
return (abs(left() - box.left()) <= tolerance && abs(right() - box.right()) <= tolerance &&
abs(top() - box.top()) <= tolerance && abs(bottom() - box.bottom()) <= tolerance);
}
} // namespace tesseract

View File

@ -0,0 +1,503 @@
/**********************************************************************
* File: rect.h (Formerly box.h)
* Description: Bounding box class definition.
* Author: Phil Cheatle
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef RECT_H
#define RECT_H
#include "points.h" // for ICOORD, FCOORD
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include "tprintf.h" // for tprintf
#include <tesseract/export.h> // for DLLSYM
#include <algorithm> // for std::max, std::min
#include <cmath> // for std::ceil, std::floor
#include <cstdint> // for INT16_MAX
#include <cstdio> // for FILE
#include <string> // for std::string
namespace tesseract {
class TESS_API TBOX { // bounding box
public:
TBOX()
: // empty constructor making a null box
bot_left(INT16_MAX, INT16_MAX)
, top_right(-INT16_MAX, -INT16_MAX) {}
TBOX( // constructor
const ICOORD pt1, // one corner
const ICOORD pt2); // the other corner
//*********************************************************************
// TBOX::TBOX() Constructor from 4 integer values.
// Note: It is caller's responsibility to provide values
// in the right order.
//*********************************************************************
TBOX( // constructor
int16_t left, int16_t bottom, int16_t right, int16_t top)
: bot_left(left, bottom), top_right(right, top) {}
TBOX( // box around FCOORD
const FCOORD pt);
bool null_box() const { // Is box null
return ((left() >= right()) || (top() <= bottom()));
}
bool operator==(const TBOX &other) const {
return bot_left == other.bot_left && top_right == other.top_right;
}
int16_t top() const { // coord of top
return top_right.y();
}
void set_top(int y) {
top_right.set_y(y);
}
int16_t bottom() const { // coord of bottom
return bot_left.y();
}
void set_bottom(int y) {
bot_left.set_y(y);
}
int16_t left() const { // coord of left
return bot_left.x();
}
void set_left(int x) {
bot_left.set_x(x);
}
int16_t right() const { // coord of right
return top_right.x();
}
void set_right(int x) {
top_right.set_x(x);
}
int x_middle() const {
return (bot_left.x() + top_right.x()) / 2;
}
int y_middle() const {
return (bot_left.y() + top_right.y()) / 2;
}
const ICOORD &botleft() const { // access function
return bot_left;
}
ICOORD botright() const { // ~ access function
return ICOORD(top_right.x(), bot_left.y());
}
ICOORD topleft() const { // ~ access function
return ICOORD(bot_left.x(), top_right.y());
}
const ICOORD &topright() const { // access function
return top_right;
}
int16_t height() const { // how high is it?
if (!null_box()) {
return top_right.y() - bot_left.y();
} else {
return 0;
}
}
int16_t width() const { // how high is it?
if (!null_box()) {
return top_right.x() - bot_left.x();
} else {
return 0;
}
}
int32_t area() const { // what is the area?
if (!null_box()) {
return width() * height();
} else {
return 0;
}
}
// Pads the box on either side by the supplied x,y pad amounts.
// NO checks for exceeding any bounds like 0 or an image size.
void pad(int xpad, int ypad) {
ICOORD pad(xpad, ypad);
bot_left -= pad;
top_right += pad;
}
void move_bottom_edge( // move one edge
const int16_t y) { // by +/- y
bot_left += ICOORD(0, y);
}
void move_left_edge( // move one edge
const int16_t x) { // by +/- x
bot_left += ICOORD(x, 0);
}
void move_right_edge( // move one edge
const int16_t x) { // by +/- x
top_right += ICOORD(x, 0);
}
void move_top_edge( // move one edge
const int16_t y) { // by +/- y
top_right += ICOORD(0, y);
}
void move( // move box
const ICOORD vec) { // by vector
bot_left += vec;
top_right += vec;
}
void move( // move box
const FCOORD vec) { // by float vector
bot_left.set_x(static_cast<int16_t>(std::floor(bot_left.x() + vec.x())));
// round left
bot_left.set_y(static_cast<int16_t>(std::floor(bot_left.y() + vec.y())));
// round down
top_right.set_x(static_cast<int16_t>(std::ceil(top_right.x() + vec.x())));
// round right
top_right.set_y(static_cast<int16_t>(std::ceil(top_right.y() + vec.y())));
// round up
}
void scale( // scale box
const float f) { // by multiplier
// round left
bot_left.set_x(static_cast<int16_t>(std::floor(bot_left.x() * f)));
// round down
bot_left.set_y(static_cast<int16_t>(std::floor(bot_left.y() * f)));
// round right
top_right.set_x(static_cast<int16_t>(std::ceil(top_right.x() * f)));
// round up
top_right.set_y(static_cast<int16_t>(std::ceil(top_right.y() * f)));
}
void scale( // scale box
const FCOORD vec) { // by float vector
bot_left.set_x(static_cast<int16_t>(std::floor(bot_left.x() * vec.x())));
bot_left.set_y(static_cast<int16_t>(std::floor(bot_left.y() * vec.y())));
top_right.set_x(static_cast<int16_t>(std::ceil(top_right.x() * vec.x())));
top_right.set_y(static_cast<int16_t>(std::ceil(top_right.y() * vec.y())));
}
// rotate doesn't enlarge the box - it just rotates the bottom-left
// and top-right corners. Use rotate_large if you want to guarantee
// that all content is contained within the rotated box.
void rotate(const FCOORD &vec) { // by vector
bot_left.rotate(vec);
top_right.rotate(vec);
*this = TBOX(bot_left, top_right);
}
// rotate_large constructs the containing bounding box of all 4
// corners after rotating them. It therefore guarantees that all
// original content is contained within, but also slightly enlarges the box.
void rotate_large(const FCOORD &vec);
bool contains( // is pt inside box
const FCOORD pt) const;
bool contains( // is box inside box
const TBOX &box) const;
bool overlap( // do boxes overlap
const TBOX &box) const;
bool major_overlap( // do boxes overlap more than half
const TBOX &box) const;
// Do boxes overlap on x axis.
bool x_overlap(const TBOX &box) const;
// Return the horizontal gap between the boxes. If the boxes
// overlap horizontally then the return value is negative, indicating
// the amount of the overlap.
int x_gap(const TBOX &box) const {
return std::max(bot_left.x(), box.bot_left.x()) - std::min(top_right.x(), box.top_right.x());
}
// Return the vertical gap between the boxes. If the boxes
// overlap vertically then the return value is negative, indicating
// the amount of the overlap.
int y_gap(const TBOX &box) const {
return std::max(bot_left.y(), box.bot_left.y()) - std::min(top_right.y(), box.top_right.y());
}
// Do boxes overlap on x axis by more than
// half of the width of the narrower box.
bool major_x_overlap(const TBOX &box) const;
// Do boxes overlap on y axis.
bool y_overlap(const TBOX &box) const;
// Do boxes overlap on y axis by more than
// half of the height of the shorter box.
bool major_y_overlap(const TBOX &box) const;
// fraction of current box's area covered by other
double overlap_fraction(const TBOX &box) const;
// fraction of the current box's projected area covered by the other's
double x_overlap_fraction(const TBOX &box) const;
// fraction of the current box's projected area covered by the other's
double y_overlap_fraction(const TBOX &box) const;
// Returns true if the boxes are almost equal on x axis.
bool x_almost_equal(const TBOX &box, int tolerance) const;
// Returns true if the boxes are almost equal
bool almost_equal(const TBOX &box, int tolerance) const;
TBOX intersection( // shared area box
const TBOX &box) const;
TBOX bounding_union( // box enclosing both
const TBOX &box) const;
// Sets the box boundaries to the given coordinates.
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max) {
bot_left.set_x(x_min);
bot_left.set_y(y_min);
top_right.set_x(x_max);
top_right.set_y(y_max);
}
void print() const { // print
tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), top());
}
// Appends the bounding box as (%d,%d)->(%d,%d) to a string.
void print_to_str(std::string &str) const;
#ifndef GRAPHICS_DISABLED
void plot( // use current settings
ScrollView *fd) const { // where to paint
fd->Rectangle(bot_left.x(), bot_left.y(), top_right.x(), top_right.y());
}
void plot( // paint box
ScrollView *fd, // where to paint
ScrollView::Color fill_colour, // colour for inside
ScrollView::Color border_colour) const; // colour for border
#endif
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE *fp) const;
bool Serialize(TFile *fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE *fp);
bool DeSerialize(TFile *fp);
friend TBOX &operator+=(TBOX &, const TBOX &);
// in place union
friend TBOX &operator&=(TBOX &, const TBOX &);
// in place intersection
private:
ICOORD bot_left; // bottom left corner
ICOORD top_right; // top right corner
};
/**********************************************************************
* TBOX::TBOX() Constructor from 1 FCOORD
*
**********************************************************************/
inline TBOX::TBOX( // constructor
const FCOORD pt // floating centre
) {
bot_left =
ICOORD(static_cast<int16_t>(std::floor(pt.x())), static_cast<int16_t>(std::floor(pt.y())));
top_right =
ICOORD(static_cast<int16_t>(std::ceil(pt.x())), static_cast<int16_t>(std::ceil(pt.y())));
}
/**********************************************************************
* TBOX::contains() Is point within box
*
**********************************************************************/
inline bool TBOX::contains(const FCOORD pt) const {
return ((pt.x() >= bot_left.x()) && (pt.x() <= top_right.x()) && (pt.y() >= bot_left.y()) &&
(pt.y() <= top_right.y()));
}
/**********************************************************************
* TBOX::contains() Is box within box
*
**********************************************************************/
inline bool TBOX::contains(const TBOX &box) const {
return (contains(box.bot_left) && contains(box.top_right));
}
/**********************************************************************
* TBOX::overlap() Do two boxes overlap?
*
**********************************************************************/
inline bool TBOX::overlap( // do boxes overlap
const TBOX &box) const {
return ((box.bot_left.x() <= top_right.x()) && (box.top_right.x() >= bot_left.x()) &&
(box.bot_left.y() <= top_right.y()) && (box.top_right.y() >= bot_left.y()));
}
/**********************************************************************
* TBOX::major_overlap() Do two boxes overlap by at least half of the smallest?
*
**********************************************************************/
inline bool TBOX::major_overlap( // Do boxes overlap more that half.
const TBOX &box) const {
int overlap = std::min(box.top_right.x(), top_right.x());
overlap -= std::max(box.bot_left.x(), bot_left.x());
overlap += overlap;
if (overlap < std::min(box.width(), width())) {
return false;
}
overlap = std::min(box.top_right.y(), top_right.y());
overlap -= std::max(box.bot_left.y(), bot_left.y());
overlap += overlap;
if (overlap < std::min(box.height(), height())) {
return false;
}
return true;
}
/**********************************************************************
* TBOX::overlap_fraction() Fraction of area covered by the other box
*
**********************************************************************/
inline double TBOX::overlap_fraction(const TBOX &box) const {
double fraction = 0.0;
if (this->area()) {
fraction = this->intersection(box).area() * 1.0 / this->area();
}
return fraction;
}
/**********************************************************************
* TBOX::x_overlap() Do two boxes overlap on x-axis
*
**********************************************************************/
inline bool TBOX::x_overlap(const TBOX &box) const {
return ((box.bot_left.x() <= top_right.x()) && (box.top_right.x() >= bot_left.x()));
}
/**********************************************************************
* TBOX::major_x_overlap() Do two boxes overlap by more than half the
* width of the narrower box on the x-axis
*
**********************************************************************/
inline bool TBOX::major_x_overlap(const TBOX &box) const {
int16_t overlap = box.width();
if (this->left() > box.left()) {
overlap -= this->left() - box.left();
}
if (this->right() < box.right()) {
overlap -= box.right() - this->right();
}
return (overlap >= box.width() / 2 || overlap >= this->width() / 2);
}
/**********************************************************************
* TBOX::y_overlap() Do two boxes overlap on y-axis
*
**********************************************************************/
inline bool TBOX::y_overlap(const TBOX &box) const {
return ((box.bot_left.y() <= top_right.y()) && (box.top_right.y() >= bot_left.y()));
}
/**********************************************************************
* TBOX::major_y_overlap() Do two boxes overlap by more than half the
* height of the shorter box on the y-axis
*
**********************************************************************/
inline bool TBOX::major_y_overlap(const TBOX &box) const {
int16_t overlap = box.height();
if (this->bottom() > box.bottom()) {
overlap -= this->bottom() - box.bottom();
}
if (this->top() < box.top()) {
overlap -= box.top() - this->top();
}
return (overlap >= box.height() / 2 || overlap >= this->height() / 2);
}
/**********************************************************************
* TBOX::x_overlap_fraction() Calculates the horizontal overlap of the
* given boxes as a fraction of this boxes
* width.
*
**********************************************************************/
inline double TBOX::x_overlap_fraction(const TBOX &other) const {
int low = std::max(left(), other.left());
int high = std::min(right(), other.right());
int width = right() - left();
if (width == 0) {
int x = left();
if (other.left() <= x && x <= other.right()) {
return 1.0;
} else {
return 0.0;
}
} else {
return std::max(0.0, static_cast<double>(high - low) / width);
}
}
/**********************************************************************
* TBOX::y_overlap_fraction() Calculates the vertical overlap of the
* given boxes as a fraction of this boxes
* height.
*
**********************************************************************/
inline double TBOX::y_overlap_fraction(const TBOX &other) const {
int low = std::max(bottom(), other.bottom());
int high = std::min(top(), other.top());
int height = top() - bottom();
if (height == 0) {
int y = bottom();
if (other.bottom() <= y && y <= other.top()) {
return 1.0;
} else {
return 0.0;
}
} else {
return std::max(0.0, static_cast<double>(high - low) / height);
}
}
} // namespace tesseract
#endif

View File

@ -0,0 +1,251 @@
/**********************************************************************
* File: rejctmap.cpp (Formerly rejmap.c)
* Description: REJ and REJMAP class functions.
* Author: Phil Cheatle
*
* (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "rejctmap.h"
#include <memory>
#include "params.h"
namespace tesseract {
void REJ::full_print(FILE *fp) const {
fprintf(fp, "R_TESS_FAILURE: %s\n", flag(R_TESS_FAILURE) ? "T" : "F");
fprintf(fp, "R_SMALL_XHT: %s\n", flag(R_SMALL_XHT) ? "T" : "F");
fprintf(fp, "R_EDGE_CHAR: %s\n", flag(R_EDGE_CHAR) ? "T" : "F");
fprintf(fp, "R_1IL_CONFLICT: %s\n", flag(R_1IL_CONFLICT) ? "T" : "F");
fprintf(fp, "R_POSTNN_1IL: %s\n", flag(R_POSTNN_1IL) ? "T" : "F");
fprintf(fp, "R_REJ_CBLOB: %s\n", flag(R_REJ_CBLOB) ? "T" : "F");
fprintf(fp, "R_MM_REJECT: %s\n", flag(R_MM_REJECT) ? "T" : "F");
fprintf(fp, "R_BAD_REPETITION: %s\n", flag(R_BAD_REPETITION) ? "T" : "F");
fprintf(fp, "R_POOR_MATCH: %s\n", flag(R_POOR_MATCH) ? "T" : "F");
fprintf(fp, "R_NOT_TESS_ACCEPTED: %s\n",
flag(R_NOT_TESS_ACCEPTED) ? "T" : "F");
fprintf(fp, "R_CONTAINS_BLANKS: %s\n", flag(R_CONTAINS_BLANKS) ? "T" : "F");
fprintf(fp, "R_BAD_PERMUTER: %s\n", flag(R_BAD_PERMUTER) ? "T" : "F");
fprintf(fp, "R_HYPHEN: %s\n", flag(R_HYPHEN) ? "T" : "F");
fprintf(fp, "R_DUBIOUS: %s\n", flag(R_DUBIOUS) ? "T" : "F");
fprintf(fp, "R_NO_ALPHANUMS: %s\n", flag(R_NO_ALPHANUMS) ? "T" : "F");
fprintf(fp, "R_MOSTLY_REJ: %s\n", flag(R_MOSTLY_REJ) ? "T" : "F");
fprintf(fp, "R_XHT_FIXUP: %s\n", flag(R_XHT_FIXUP) ? "T" : "F");
fprintf(fp, "R_BAD_QUALITY: %s\n", flag(R_BAD_QUALITY) ? "T" : "F");
fprintf(fp, "R_DOC_REJ: %s\n", flag(R_DOC_REJ) ? "T" : "F");
fprintf(fp, "R_BLOCK_REJ: %s\n", flag(R_BLOCK_REJ) ? "T" : "F");
fprintf(fp, "R_ROW_REJ: %s\n", flag(R_ROW_REJ) ? "T" : "F");
fprintf(fp, "R_UNLV_REJ: %s\n", flag(R_UNLV_REJ) ? "T" : "F");
fprintf(fp, "R_HYPHEN_ACCEPT: %s\n", flag(R_HYPHEN_ACCEPT) ? "T" : "F");
fprintf(fp, "R_NN_ACCEPT: %s\n", flag(R_NN_ACCEPT) ? "T" : "F");
fprintf(fp, "R_MM_ACCEPT: %s\n", flag(R_MM_ACCEPT) ? "T" : "F");
fprintf(fp, "R_QUALITY_ACCEPT: %s\n", flag(R_QUALITY_ACCEPT) ? "T" : "F");
fprintf(fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
flag(R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
}
REJMAP &REJMAP::operator=(const REJMAP &source) {
initialise(source.len);
for (int i = 0; i < len; i++) {
ptr[i] = source.ptr[i];
}
return *this;
}
void REJMAP::initialise(int16_t length) {
ptr = std::make_unique<REJ[]>(length);
len = length;
}
int16_t REJMAP::accept_count() const { // How many accepted?
int i;
int16_t count = 0;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
count++;
}
}
return count;
}
bool REJMAP::recoverable_rejects() const { // Any non perm rejs?
for (int i = 0; i < len; i++) {
if (ptr[i].recoverable()) {
return true;
}
}
return false;
}
bool REJMAP::quality_recoverable_rejects() const { // Any potential rejs?
for (int i = 0; i < len; i++) {
if (ptr[i].accept_if_good_quality()) {
return true;
}
}
return false;
}
void REJMAP::remove_pos( // Cut out an element
int16_t pos // element to remove
) {
ASSERT_HOST(pos >= 0);
ASSERT_HOST(pos < len);
ASSERT_HOST(len > 0);
len--;
for (; pos < len; pos++) {
ptr[pos] = ptr[pos + 1];
}
}
void REJMAP::print(FILE *fp) const {
int i;
char buff[512];
for (i = 0; i < len; i++) {
buff[i] = ptr[i].display_char();
}
buff[i] = '\0';
fprintf(fp, "\"%s\"", buff);
}
void REJMAP::full_print(FILE *fp) const {
int i;
for (i = 0; i < len; i++) {
ptr[i].full_print(fp);
fprintf(fp, "\n");
}
}
void REJMAP::rej_word_small_xht() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
ptr[i].setrej_small_xht();
}
}
void REJMAP::rej_word_tess_failure() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
ptr[i].setrej_tess_failure();
}
}
void REJMAP::rej_word_not_tess_accepted() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_not_tess_accepted();
}
}
}
void REJMAP::rej_word_contains_blanks() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_contains_blanks();
}
}
}
void REJMAP::rej_word_bad_permuter() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_bad_permuter();
}
}
}
void REJMAP::rej_word_xht_fixup() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_xht_fixup();
}
}
}
void REJMAP::rej_word_no_alphanums() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_no_alphanums();
}
}
}
void REJMAP::rej_word_mostly_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_mostly_rej();
}
}
}
void REJMAP::rej_word_bad_quality() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_bad_quality();
}
}
}
void REJMAP::rej_word_doc_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_doc_rej();
}
}
}
void REJMAP::rej_word_block_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_block_rej();
}
}
}
void REJMAP::rej_word_row_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted()) {
ptr[i].setrej_row_rej();
}
}
}
} // namespace tesseract

View File

@ -0,0 +1,376 @@
/**********************************************************************
* File: rejctmap.h (Formerly rejmap.h)
* Description: REJ and REJMAP class functions.
* Author: Phil Cheatle
*
* (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
This module may look unnecessarily verbose, but here's the philosophy...
ALL processing of the reject map is done in this module. There are lots of
separate calls to set reject/accept flags. These have DELIBERATELY been kept
distinct so that this module can decide what to do.
Basically, there is a flag for each sort of rejection or acceptance. This
provides a history of what has happened to EACH character.
Determining whether a character is CURRENTLY rejected depends on implicit
understanding of the SEQUENCE of possible calls. The flags are defined and
grouped in the REJ_FLAGS enum. These groupings are used in determining a
characters CURRENT rejection status. Basically, a character is ACCEPTED if
none of the permanent rej flags are set
AND ( the character has never been rejected
OR an accept flag is set which is LATER than the latest reject flag )
IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
**********************************************************************/
#ifndef REJCTMAP_H
#define REJCTMAP_H
#include "errcode.h"
#include "params.h"
#include <bitset>
#include <memory>
namespace tesseract {
enum REJ_FLAGS {
/* Reject modes which are NEVER overridden */
R_TESS_FAILURE, // PERM Tess didn't classify
R_SMALL_XHT, // PERM Xht too small
R_EDGE_CHAR, // PERM Too close to edge of image
R_1IL_CONFLICT, // PERM 1Il confusion
R_POSTNN_1IL, // PERM 1Il unrejected by NN
R_REJ_CBLOB, // PERM Odd blob
R_MM_REJECT, // PERM Matrix match rejection (m's)
R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
/* Initial reject modes (pre NN_ACCEPT) */
R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
/* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
R_DUBIOUS, // TEMP Post NN dodgy chars
R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
R_XHT_FIXUP, // TEMP Xht tests unsure
/* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
/* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
R_DOC_REJ, // TEMP Document rejection
R_BLOCK_REJ, // TEMP Block rejection
R_ROW_REJ, // TEMP Row rejection
R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
/* Accept modes which occur between the above rejection groups */
R_NN_ACCEPT, // NN acceptance
R_HYPHEN_ACCEPT, // Hyphen acceptance
R_MM_ACCEPT, // Matrix match acceptance
R_QUALITY_ACCEPT, // Accept word in good quality doc
R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
};
/* REJECT MAP VALUES */
#define MAP_ACCEPT '1'
#define MAP_REJECT_PERM '0'
#define MAP_REJECT_TEMP '2'
#define MAP_REJECT_POTENTIAL '3'
class REJ {
std::bitset<32> flags;
void set_flag(REJ_FLAGS rej_flag) {
flags.set(rej_flag);
}
public:
REJ() = default;
REJ( // classwise copy
const REJ &source) {
flags = source.flags;
}
REJ &operator=( // assign REJ
const REJ &source) = default;
bool flag(REJ_FLAGS rej_flag) const {
return flags[rej_flag];
}
char display_char() const {
if (perm_rejected()) {
return MAP_REJECT_PERM;
} else if (accept_if_good_quality()) {
return MAP_REJECT_POTENTIAL;
} else if (rejected()) {
return MAP_REJECT_TEMP;
} else {
return MAP_ACCEPT;
}
}
bool perm_rejected() const { // Is char perm reject?
return (flag(R_TESS_FAILURE) || flag(R_SMALL_XHT) || flag(R_EDGE_CHAR) ||
flag(R_1IL_CONFLICT) || flag(R_POSTNN_1IL) || flag(R_REJ_CBLOB) ||
flag(R_BAD_REPETITION) || flag(R_MM_REJECT));
}
private:
bool rej_before_nn_accept() const {
return flag(R_POOR_MATCH) || flag(R_NOT_TESS_ACCEPTED) ||
flag(R_CONTAINS_BLANKS) || flag(R_BAD_PERMUTER);
}
bool rej_between_nn_and_mm() const {
return flag(R_HYPHEN) || flag(R_DUBIOUS) || flag(R_NO_ALPHANUMS) ||
flag(R_MOSTLY_REJ) || flag(R_XHT_FIXUP);
}
bool rej_between_mm_and_quality_accept() const {
return flag(R_BAD_QUALITY);
}
bool rej_between_quality_and_minimal_rej_accept() const {
return flag(R_DOC_REJ) || flag(R_BLOCK_REJ) || flag(R_ROW_REJ) ||
flag(R_UNLV_REJ);
}
bool rej_before_mm_accept() const {
return rej_between_nn_and_mm() ||
(rej_before_nn_accept() && !flag(R_NN_ACCEPT) &&
!flag(R_HYPHEN_ACCEPT));
}
bool rej_before_quality_accept() const {
return rej_between_mm_and_quality_accept() ||
(!flag(R_MM_ACCEPT) && rej_before_mm_accept());
}
public:
bool rejected() const { // Is char rejected?
if (flag(R_MINIMAL_REJ_ACCEPT)) {
return false;
} else {
return (perm_rejected() || rej_between_quality_and_minimal_rej_accept() ||
(!flag(R_QUALITY_ACCEPT) && rej_before_quality_accept()));
}
}
bool accept_if_good_quality() const { // potential rej?
return (rejected() && !perm_rejected() && flag(R_BAD_PERMUTER) &&
!flag(R_POOR_MATCH) && !flag(R_NOT_TESS_ACCEPTED) &&
!flag(R_CONTAINS_BLANKS) &&
(!rej_between_nn_and_mm() && !rej_between_mm_and_quality_accept() &&
!rej_between_quality_and_minimal_rej_accept()));
}
void setrej_tess_failure() { // Tess generated blank
set_flag(R_TESS_FAILURE);
}
void setrej_small_xht() { // Small xht char/wd
set_flag(R_SMALL_XHT);
}
void setrej_edge_char() { // Close to image edge
set_flag(R_EDGE_CHAR);
}
void setrej_1Il_conflict() { // Initial reject map
set_flag(R_1IL_CONFLICT);
}
void setrej_postNN_1Il() { // 1Il after NN
set_flag(R_POSTNN_1IL);
}
void setrej_rej_cblob() { // Insert duff blob
set_flag(R_REJ_CBLOB);
}
void setrej_mm_reject() { // Matrix matcher
set_flag(R_MM_REJECT);
}
void setrej_bad_repetition() { // Odd repeated char
set_flag(R_BAD_REPETITION);
}
void setrej_poor_match() { // Failed Rays heuristic
set_flag(R_POOR_MATCH);
}
void setrej_not_tess_accepted() {
// TEMP reject_word
set_flag(R_NOT_TESS_ACCEPTED);
}
void setrej_contains_blanks() {
// TEMP reject_word
set_flag(R_CONTAINS_BLANKS);
}
void setrej_bad_permuter() { // POTENTIAL reject_word
set_flag(R_BAD_PERMUTER);
}
void setrej_hyphen() { // PostNN dubious hyphen or .
set_flag(R_HYPHEN);
}
void setrej_dubious() { // PostNN dubious limit
set_flag(R_DUBIOUS);
}
void setrej_no_alphanums() { // TEMP reject_word
set_flag(R_NO_ALPHANUMS);
}
void setrej_mostly_rej() { // TEMP reject_word
set_flag(R_MOSTLY_REJ);
}
void setrej_xht_fixup() { // xht fixup
set_flag(R_XHT_FIXUP);
}
void setrej_bad_quality() { // TEMP reject_word
set_flag(R_BAD_QUALITY);
}
void setrej_doc_rej() { // TEMP reject_word
set_flag(R_DOC_REJ);
}
void setrej_block_rej() { // TEMP reject_word
set_flag(R_BLOCK_REJ);
}
void setrej_row_rej() { // TEMP reject_word
set_flag(R_ROW_REJ);
}
void setrej_unlv_rej() { // TEMP reject_word
set_flag(R_UNLV_REJ);
}
void setrej_hyphen_accept() { // NN Flipped a char
set_flag(R_HYPHEN_ACCEPT);
}
void setrej_nn_accept() { // NN Flipped a char
set_flag(R_NN_ACCEPT);
}
void setrej_mm_accept() { // Matrix matcher
set_flag(R_MM_ACCEPT);
}
void setrej_quality_accept() { // Quality flip a char
set_flag(R_QUALITY_ACCEPT);
}
void setrej_minimal_rej_accept() {
// Accept all except blank
set_flag(R_MINIMAL_REJ_ACCEPT);
}
bool accepted() const { // Is char accepted?
return !rejected();
}
bool recoverable() const {
return (rejected() && !perm_rejected());
}
void full_print(FILE *fp) const;
};
class REJMAP {
std::unique_ptr<REJ[]> ptr; // ptr to the chars
int16_t len; // Number of chars
public:
REJMAP() : len(0) {}
REJMAP(const REJMAP &rejmap) {
*this = rejmap;
}
REJMAP &operator=(const REJMAP &source);
// Sets up the ptr array to length, whatever it was before.
void initialise(int16_t length);
REJ &operator[]( // access function
int16_t index) const // map index
{
ASSERT_HOST(index < len);
return ptr[index]; // no bounds checks
}
int32_t length() const { // map length
return len;
}
int16_t accept_count() const; // How many accepted?
int16_t reject_count() const { // How many rejects?
return len - accept_count();
}
void remove_pos( // Cut out an element
int16_t pos); // element to remove
void print(FILE *fp) const;
void full_print(FILE *fp) const;
bool recoverable_rejects() const; // Any non perm rejs?
bool quality_recoverable_rejects() const;
// Any potential rejs?
void rej_word_small_xht(); // Reject whole word
// Reject whole word
void rej_word_tess_failure();
void rej_word_not_tess_accepted();
// Reject whole word
// Reject whole word
void rej_word_contains_blanks();
// Reject whole word
void rej_word_bad_permuter();
void rej_word_xht_fixup(); // Reject whole word
// Reject whole word
void rej_word_no_alphanums();
void rej_word_mostly_rej(); // Reject whole word
void rej_word_bad_quality(); // Reject whole word
void rej_word_doc_rej(); // Reject whole word
void rej_word_block_rej(); // Reject whole word
void rej_word_row_rej(); // Reject whole word
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,275 @@
/******************************************************************************
*
* File: seam.cpp (Formerly seam.c)
* Author: Mark Seaman, OCR Technology
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "seam.h"
#include "blobs.h"
#include "tprintf.h"
namespace tesseract {
/*----------------------------------------------------------------------
Public Function Code
----------------------------------------------------------------------*/
// Returns the bounding box of all the points in the seam.
TBOX SEAM::bounding_box() const {
TBOX box(location_.x, location_.y, location_.x, location_.y);
for (int s = 0; s < num_splits_; ++s) {
box += splits_[s].bounding_box();
}
return box;
}
// Returns true if the splits in *this SEAM appear OK in the sense that they
// do not cross any outlines and do not chop off any ridiculously small
// pieces.
bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
// TODO(rays) Try testing all the splits. Duplicating original code for now,
// which tested only the first.
return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
}
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
// seam, which is about to be inserted at insert_index. Returns false if
// any of the computations fails, as this indicates an invalid chop.
// widthn_/widthp_ are only changed if modify is true.
bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
for (int s = 0; s < insert_index; ++s) {
if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
return false;
}
}
if (!FindBlobWidth(blobs, insert_index, modify)) {
return false;
}
for (unsigned s = insert_index; s < seams.size(); ++s) {
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
return false;
}
}
return true;
}
// Computes the widthp_/widthn_ range. Returns false if not all the splits
// are accounted for. widthn_/widthp_ are only changed if modify is true.
bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) {
int num_found = 0;
if (modify) {
widthp_ = 0;
widthn_ = 0;
}
for (int s = 0; s < num_splits_; ++s) {
const SPLIT &split = splits_[s];
bool found_split = split.ContainedByBlob(*blobs[index]);
// Look right.
for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && b - index > widthp_ && modify) {
widthp_ = b - index;
}
}
// Look left.
for (int b = index - 1; !found_split && b >= 0; --b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && index - b > widthn_ && modify) {
widthn_ = index - b;
}
}
if (found_split) {
++num_found;
}
}
return num_found == num_splits_;
}
// Splits this blob into two blobs by applying the splits included in
// *this SEAM
void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].SplitOutlineList(blob->outlines);
}
blob->ComputeBoundingBoxes();
divide_blobs(blob, other_blob, italic_blob, location_);
blob->EliminateDuplicateOutlines();
other_blob->EliminateDuplicateOutlines();
blob->CorrectBlobOrder(other_blob);
}
// Undoes ApplySeam by removing the seam between these two blobs.
// Produces one blob as a result, and deletes other_blob.
void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
if (blob->outlines == nullptr) {
blob->outlines = other_blob->outlines;
other_blob->outlines = nullptr;
}
TESSLINE *outline = blob->outlines;
while (outline->next) {
outline = outline->next;
}
outline->next = other_blob->outlines;
other_blob->outlines = nullptr;
delete other_blob;
for (int s = 0; s < num_splits_; ++s) {
splits_[s].UnsplitOutlineList(blob);
}
blob->ComputeBoundingBoxes();
blob->EliminateDuplicateOutlines();
}
// Prints everything in *this SEAM.
void SEAM::Print(const char *label) const {
tprintf("%s", label);
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, widthp_, widthn_);
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Print();
if (s + 1 < num_splits_) {
tprintf(", ");
}
}
tprintf("\n");
}
// Prints a collection of SEAMs.
/* static */
void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
if (!seams.empty()) {
tprintf("%s\n", label);
for (unsigned x = 0; x < seams.size(); ++x) {
tprintf("%2u: ", x);
seams[x]->Print("");
}
tprintf("\n");
}
}
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void SEAM::Mark(ScrollView *window) const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Mark(window);
}
}
#endif
// Break up the blobs in this chain so that they are all independent.
// This operation should undo the affect of join_pieces.
/* static */
void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int first, int last) {
for (int x = first; x < last; ++x) {
seams[x]->Reveal();
}
TESSLINE *outline = blobs[first]->outlines;
int next_blob = first + 1;
while (outline != nullptr && next_blob <= last) {
if (outline->next == blobs[next_blob]->outlines) {
outline->next = nullptr;
outline = blobs[next_blob]->outlines;
++next_blob;
} else {
outline = outline->next;
}
}
}
// Join a group of base level pieces into a single blob that can then
// be classified.
/* static */
void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int first, int last) {
TESSLINE *outline = blobs[first]->outlines;
if (!outline) {
return;
}
for (int x = first; x < last; ++x) {
SEAM *seam = seams[x];
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
seam->Hide();
}
while (outline->next) {
outline = outline->next;
}
outline->next = blobs[x + 1]->outlines;
}
}
// Hides the seam so the outlines appear not to be cut by it.
void SEAM::Hide() const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Hide();
}
}
// Undoes hide, so the outlines are cut by the seam.
void SEAM::Reveal() const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Reveal();
}
}
// Computes and returns, but does not set, the full priority of *this SEAM.
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
double center_knob, double width_change_knob) const {
if (num_splits_ == 0) {
return 0.0f;
}
for (int s = 1; s < num_splits_; ++s) {
splits_[s].SplitOutline();
}
float full_priority =
priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob,
width_change_knob);
for (int s = num_splits_ - 1; s >= 1; --s) {
splits_[s].UnsplitOutlines();
}
return full_priority;
}
/**
* @name start_seam_list
*
* Initialize a list of seams that match the original number of blobs
* present in the starting segmentation. Each of the seams created
* by this routine have location information only.
*/
void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) {
seam_array->clear();
TPOINT location;
for (int b = 1; b < word->NumBlobs(); ++b) {
TBOX bbox = word->blobs[b - 1]->bounding_box();
TBOX nbox = word->blobs[b]->bounding_box();
location.x = (bbox.right() + nbox.left()) / 2;
location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
seam_array->push_back(new SEAM(0.0f, location));
}
}
} // namespace tesseract

View File

@ -0,0 +1,206 @@
/******************************************************************************
*
* File: seam.h
* Author: Mark Seaman, SW Productivity
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
#ifndef SEAM_H
#define SEAM_H
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "blobs.h"
#include "split.h"
namespace tesseract {
using PRIORITY = float; /* PRIORITY */
class SEAM {
public:
// A seam with no splits
SEAM(float priority, const TPOINT &location)
: priority_(priority), location_(location), widthp_(0), widthn_(0), num_splits_(0) {}
// A seam with a single split point.
SEAM(float priority, const TPOINT &location, const SPLIT &split)
: priority_(priority), location_(location), widthp_(0), widthn_(0), num_splits_(1) {
splits_[0] = split;
}
// Default copy constructor, operator= and destructor are OK!
// Accessors.
float priority() const {
return priority_;
}
void set_priority(float priority) {
priority_ = priority;
}
bool HasAnySplits() const {
return num_splits_ > 0;
}
// Returns the bounding box of all the points in the seam.
TBOX bounding_box() const;
// Returns true if other can be combined into *this.
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const {
int dist = location_.x - other.location_.x;
return -max_x_dist < dist && dist < max_x_dist &&
num_splits_ + other.num_splits_ <= kMaxNumSplits &&
priority_ + other.priority_ < max_total_priority && !OverlappingSplits(other) &&
!SharesPosition(other);
}
// Combines other into *this. Only works if CombinableWith returned true.
void CombineWith(const SEAM &other) {
priority_ += other.priority_;
location_ += other.location_;
location_ /= 2;
for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
splits_[num_splits_++] = other.splits_[s];
}
}
// Returns true if the given blob contains all splits of *this SEAM.
bool ContainedByBlob(const TBLOB &blob) const {
for (int s = 0; s < num_splits_; ++s) {
if (!splits_[s].ContainedByBlob(blob)) {
return false;
}
}
return true;
}
// Returns true if the given EDGEPT is used by this SEAM, checking only
// the EDGEPT pointer, not the coordinates.
bool UsesPoint(const EDGEPT *point) const {
for (int s = 0; s < num_splits_; ++s) {
if (splits_[s].UsesPoint(point)) {
return true;
}
}
return false;
}
// Returns true if *this and other share any common point, by coordinates.
bool SharesPosition(const SEAM &other) const {
for (int s = 0; s < num_splits_; ++s) {
for (int t = 0; t < other.num_splits_; ++t) {
if (splits_[s].SharesPosition(other.splits_[t])) {
return true;
}
}
}
return false;
}
// Returns true if *this and other have any vertically overlapping splits.
bool OverlappingSplits(const SEAM &other) const {
for (int s = 0; s < num_splits_; ++s) {
TBOX split1_box = splits_[s].bounding_box();
for (int t = 0; t < other.num_splits_; ++t) {
TBOX split2_box = other.splits_[t].bounding_box();
if (split1_box.y_overlap(split2_box)) {
return true;
}
}
}
return false;
}
// Marks the edgepts used by the seam so the segments made by the cut
// never get split further by another seam in the future.
void Finalize() {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].point1->MarkChop();
splits_[s].point2->MarkChop();
}
}
// Returns true if the splits in *this SEAM appear OK in the sense that they
// do not cross any outlines and do not chop off any ridiculously small
// pieces.
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
// seam, which is about to be inserted at insert_index. Returns false if
// any of the computations fails, as this indicates an invalid chop.
// widthn_/widthp_ are only changed if modify is true.
bool PrepareToInsertSeam(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int insert_index, bool modify);
// Computes the widthp_/widthn_ range. Returns false if not all the splits
// are accounted for. widthn_/widthp_ are only changed if modify is true.
bool FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify);
// Splits this blob into two blobs by applying the splits included in
// *this SEAM
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const;
// Undoes ApplySeam by removing the seam between these two blobs.
// Produces one blob as a result, and deletes other_blob.
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const;
// Prints everything in *this SEAM.
void Print(const char *label) const;
// Prints a collection of SEAMs.
static void PrintSeams(const char *label, const std::vector<SEAM *> &seams);
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void Mark(ScrollView *window) const;
#endif
// Break up the blobs in this chain so that they are all independent.
// This operation should undo the affect of join_pieces.
static void BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int first, int last);
// Join a group of base level pieces into a single blob that can then
// be classified.
static void JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int first, int last);
// Hides the seam so the outlines appear not to be cut by it.
void Hide() const;
// Undoes hide, so the outlines are cut by the seam.
void Reveal() const;
// Computes and returns, but does not set, the full priority of *this SEAM.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
double center_knob, double width_change_knob) const;
private:
// Maximum number of splits that a SEAM can hold.
static const uint8_t kMaxNumSplits = 3;
// Priority of this split. Lower is better.
float priority_;
// Position of the middle of the seam.
TPOINT location_;
// A range such that all splits in *this SEAM are contained within blobs in
// the range [index - widthn_,index + widthp_] where index is the index of
// this SEAM in the seams vector.
int8_t widthp_;
int8_t widthn_;
// Number of splits_ that are used.
uint8_t num_splits_;
// Set of pairs of points that are the ends of each split in the SEAM.
SPLIT splits_[kMaxNumSplits];
};
void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array);
} // namespace tesseract
#endif

View File

@ -0,0 +1,324 @@
/******************************************************************************
*
* File: split.cpp (Formerly split.c)
* Author: Mark Seaman, OCR Technology
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*************************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "split.h"
#include "coutln.h"
#include "tprintf.h"
#include <algorithm>
namespace tesseract {
/*----------------------------------------------------------------------
V a r i a b l e s
----------------------------------------------------------------------*/
// Limit on the amount of penalty for the chop being off-center.
const int kCenterGradeCap = 25;
// Ridiculously large priority for splits that are no use.
const double kBadPriority = 999.0;
BOOL_VAR(wordrec_display_splits, 0, "Display splits");
// Hides the SPLIT so the outlines appear not to be cut by it.
void SPLIT::Hide() const {
EDGEPT *edgept = point1;
do {
edgept->Hide();
edgept = edgept->next;
} while (!edgept->EqualPos(*point2) && edgept != point1);
edgept = point2;
do {
edgept->Hide();
edgept = edgept->next;
} while (!edgept->EqualPos(*point1) && edgept != point2);
}
// Undoes hide, so the outlines are cut by the SPLIT.
void SPLIT::Reveal() const {
EDGEPT *edgept = point1;
do {
edgept->Reveal();
edgept = edgept->next;
} while (!edgept->EqualPos(*point2) && edgept != point1);
edgept = point2;
do {
edgept->Reveal();
edgept = edgept->next;
} while (!edgept->EqualPos(*point1) && edgept != point2);
}
// Compute a split priority based on the bounding boxes of the parts.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
double center_knob, double width_change_knob) const {
TBOX box1 = Box12();
TBOX box2 = Box21();
int min_left = std::min(box1.left(), box2.left());
int max_right = std::max(box1.right(), box2.right());
if (xmin < min_left && xmax > max_right) {
return kBadPriority;
}
float grade = 0.0f;
// grade_overlap.
int width1 = box1.width();
int width2 = box2.width();
int min_width = std::min(width1, width2);
int overlap = -box1.x_gap(box2);
if (overlap == min_width) {
grade += 100.0f; // Total overlap.
} else {
if (2 * overlap > min_width) {
overlap += 2 * overlap - min_width;
}
if (overlap > 0) {
grade += overlap_knob * overlap;
}
}
// grade_center_of_blob.
if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
}
// grade_width_change.
float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
if (width_change_grade > 0.0f) {
grade += width_change_grade * width_change_knob;
}
return grade;
}
// Returns true if *this SPLIT appears OK in the sense that it does not cross
// any outlines and does not chop off any ridiculously small pieces.
bool SPLIT::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
return !IsLittleChunk(min_points, min_area) &&
!blob.SegmentCrossesOutline(point1->pos, point2->pos);
}
// Returns true if the split generates a small chunk in terms of either area
// or number of points.
bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
if (point1->ShortNonCircularSegment(min_points, point2) &&
point1->SegmentArea(point2) < min_area) {
return true;
}
if (point2->ShortNonCircularSegment(min_points, point1) &&
point2->SegmentArea(point1) < min_area) {
return true;
}
return false;
}
/**********************************************************************
* make_edgept
*
* Create an EDGEPT and hook it into an existing list of edge points.
**********************************************************************/
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
EDGEPT *this_edgept;
/* Create point */
this_edgept = new EDGEPT;
this_edgept->pos.x = x;
this_edgept->pos.y = y;
// Now deal with the src_outline steps.
C_OUTLINE *prev_ol = prev->src_outline;
if (prev_ol != nullptr && prev->next == next) {
// Compute the fraction of the segment that is being cut.
FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
double cut_fraction = target_vec.length() / segment_vec.length();
// Get the start and end at the step level.
ICOORD step_start = prev_ol->position_at_index(prev->start_step);
int end_step = prev->start_step + prev->step_count;
int step_length = prev_ol->pathlength();
ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
ICOORD step_vec = step_end - step_start;
double target_length = step_vec.length() * cut_fraction;
// Find the point on the segment that gives the length nearest to target.
int best_step = prev->start_step;
ICOORD total_step(0, 0);
double best_dist = target_length;
for (int s = prev->start_step; s < end_step; ++s) {
total_step += prev_ol->step(s % step_length);
double dist = fabs(target_length - total_step.length());
if (dist < best_dist) {
best_dist = dist;
best_step = s + 1;
}
}
// The new point is an intermediate point.
this_edgept->src_outline = prev_ol;
this_edgept->step_count = end_step - best_step;
this_edgept->start_step = best_step % step_length;
prev->step_count = best_step - prev->start_step;
} else {
// The new point is poly only.
this_edgept->src_outline = nullptr;
this_edgept->step_count = 0;
this_edgept->start_step = 0;
}
/* Hook it up */
this_edgept->next = next;
this_edgept->prev = prev;
prev->next = this_edgept;
next->prev = this_edgept;
/* Set up vec entries */
this_edgept->vec.x = this_edgept->next->pos.x - x;
this_edgept->vec.y = this_edgept->next->pos.y - y;
this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
return this_edgept;
}
/**********************************************************************
* remove_edgept
*
* Remove a given EDGEPT from its list and delete it.
**********************************************************************/
void remove_edgept(EDGEPT *point) {
EDGEPT *prev = point->prev;
EDGEPT *next = point->next;
// Add point's steps onto prev's steps if they are from the same outline.
if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
prev->step_count += point->step_count;
}
prev->next = next;
next->prev = prev;
prev->vec.x = next->pos.x - prev->pos.x;
prev->vec.y = next->pos.y - prev->pos.y;
delete point;
}
/**********************************************************************
* Print
*
* Shows the coordinates of both points in a split.
**********************************************************************/
void SPLIT::Print() const {
tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
}
#ifndef GRAPHICS_DISABLED
// Draws the split in the given window.
void SPLIT::Mark(ScrollView *window) const {
window->Pen(ScrollView::GREEN);
window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
window->UpdateWindow();
}
#endif
// Creates two outlines out of one by splitting the original one in half.
// Inserts the resulting outlines into the given list.
void SPLIT::SplitOutlineList(TESSLINE *outlines) const {
SplitOutline();
while (outlines->next != nullptr) {
outlines = outlines->next;
}
outlines->next = new TESSLINE;
outlines->next->loop = point1;
outlines->next->ComputeBoundingBox();
outlines = outlines->next;
outlines->next = new TESSLINE;
outlines->next->loop = point2;
outlines->next->ComputeBoundingBox();
outlines->next->next = nullptr;
}
// Makes a split between these two edge points, but does not affect the
// outlines to which they belong.
void SPLIT::SplitOutline() const {
EDGEPT *temp2 = point2->next;
EDGEPT *temp1 = point1->next;
/* Create two new points */
EDGEPT *new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
EDGEPT *new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
// point1 and 2 are now cross-over points, so they must have nullptr
// src_outlines and give their src_outline information their new
// replacements.
new_point1->src_outline = point1->src_outline;
new_point1->start_step = point1->start_step;
new_point1->step_count = point1->step_count;
new_point2->src_outline = point2->src_outline;
new_point2->start_step = point2->start_step;
new_point2->step_count = point2->step_count;
point1->src_outline = nullptr;
point1->start_step = 0;
point1->step_count = 0;
point2->src_outline = nullptr;
point2->start_step = 0;
point2->step_count = 0;
}
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
// the split, but possibly leaving some duplicate outlines.
void SPLIT::UnsplitOutlineList(TBLOB *blob) const {
/* Modify edge points */
UnsplitOutlines();
auto *outline1 = new TESSLINE;
outline1->next = blob->outlines;
blob->outlines = outline1;
outline1->loop = point1;
auto *outline2 = new TESSLINE;
outline2->next = blob->outlines;
blob->outlines = outline2;
outline2->loop = point2;
}
// Removes the split that was put between these two points.
void SPLIT::UnsplitOutlines() const {
EDGEPT *tmp1 = point1->next;
EDGEPT *tmp2 = point2->next;
tmp1->next->prev = point2;
tmp2->next->prev = point1;
// tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
// deleted.
point1->next = tmp2->next;
point1->src_outline = tmp2->src_outline;
point1->start_step = tmp2->start_step;
point1->step_count = tmp2->step_count;
// Likewise point2 takes tmp1's place.
point2->next = tmp1->next;
point2->src_outline = tmp1->src_outline;
point2->start_step = tmp1->start_step;
point2->step_count = tmp1->step_count;
delete tmp1;
delete tmp2;
point1->vec.x = point1->next->pos.x - point1->pos.x;
point1->vec.y = point1->next->pos.y - point1->pos.y;
point2->vec.x = point2->next->pos.x - point2->pos.x;
point2->vec.y = point2->next->pos.y - point2->pos.y;
}
} // namespace tesseract

View File

@ -0,0 +1,125 @@
/******************************************************************************
*
* File: split.h
* Author: Mark Seaman, SW Productivity
* Status: Reusable Software Component
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*****************************************************************************/
#ifndef SPLIT_H
#define SPLIT_H
#include "blobs.h" // for EDGEPT, TBLOB, TESSLINE
#include "params.h" // for BOOL_VAR_H, BoolParam
#include "rect.h" // for TBOX
namespace tesseract {
class ScrollView;
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
struct SPLIT {
SPLIT() : point1(nullptr), point2(nullptr) {}
SPLIT(EDGEPT *pt1, EDGEPT *pt2) : point1(pt1), point2(pt2) {}
// Returns the bounding box of all the points in the split.
TBOX bounding_box() const {
return TBOX(std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
}
// Returns the bounding box of the outline from point1 to point2.
TBOX Box12() const {
return point1->SegmentBox(point2);
}
// Returns the bounding box of the outline from point1 to point1.
TBOX Box21() const {
return point2->SegmentBox(point1);
}
// Returns the bounding box of the out
// Hides the SPLIT so the outlines appear not to be cut by it.
void Hide() const;
// Undoes hide, so the outlines are cut by the SPLIT.
void Reveal() const;
// Returns true if the given EDGEPT is used by this SPLIT, checking only
// the EDGEPT pointer, not the coordinates.
bool UsesPoint(const EDGEPT *point) const {
return point1 == point || point2 == point;
}
// Returns true if the other SPLIT has any position shared with *this.
bool SharesPosition(const SPLIT &other) const {
return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
}
// Returns true if both points are contained within the blob.
bool ContainedByBlob(const TBLOB &blob) const {
return blob.Contains(point1->pos) && blob.Contains(point2->pos);
}
// Returns true if both points are contained within the outline.
bool ContainedByOutline(const TESSLINE &outline) const {
return outline.Contains(point1->pos) && outline.Contains(point2->pos);
}
// Compute a split priority based on the bounding boxes of the parts.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
double center_knob, double width_change_knob) const;
// Returns true if *this SPLIT appears OK in the sense that it does not cross
// any outlines and does not chop off any ridiculously small pieces.
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const;
// Returns true if the split generates a small chunk in terms of either area
// or number of points.
bool IsLittleChunk(int min_points, int min_area) const;
void Print() const;
#ifndef GRAPHICS_DISABLED
// Draws the split in the given window.
void Mark(ScrollView *window) const;
#endif
// Creates two outlines out of one by splitting the original one in half.
// Inserts the resulting outlines into the given list.
void SplitOutlineList(TESSLINE *outlines) const;
// Makes a split between these two edge points, but does not affect the
// outlines to which they belong.
void SplitOutline() const;
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
// the split, but possibly leaving some duplicate outlines.
void UnsplitOutlineList(TBLOB *blob) const;
// Removes the split that was put between these two points.
void UnsplitOutlines() const;
EDGEPT *point1;
EDGEPT *point2;
};
/*----------------------------------------------------------------------
V a r i a b l e s
----------------------------------------------------------------------*/
extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits");
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
void remove_edgept(EDGEPT *point);
} // namespace tesseract
#endif

View File

@ -0,0 +1,639 @@
/**********************************************************************
* File: statistc.cpp (Formerly stats.c)
* Description: Simple statistical package for integer values.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "statistc.h"
#include "errcode.h"
#include "scrollview.h"
#include "tprintf.h"
#include "helpers.h"
#include <cmath>
#include <cstdlib>
#include <cstring>
namespace tesseract {
/**********************************************************************
* STATS::STATS
*
* Construct a new stats element by allocating and zeroing the memory.
**********************************************************************/
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
if (max_bucket_value_plus_1 <= min_bucket_value) {
min_bucket_value = 0;
max_bucket_value_plus_1 = 1;
}
rangemin_ = min_bucket_value; // setup
rangemax_ = max_bucket_value_plus_1;
buckets_ = new int32_t[rangemax_ - rangemin_];
clear();
}
/**********************************************************************
* STATS::set_range
*
* Alter the range on an existing stats element.
**********************************************************************/
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
if (max_bucket_value_plus_1 <= min_bucket_value) {
return false;
}
if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
delete[] buckets_;
buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
}
rangemin_ = min_bucket_value; // setup
rangemax_ = max_bucket_value_plus_1;
clear(); // zero it
return true;
}
/**********************************************************************
* STATS::clear
*
* Clear out the STATS class by zeroing all the buckets.
**********************************************************************/
void STATS::clear() { // clear out buckets
total_count_ = 0;
if (buckets_ != nullptr) {
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
}
}
/**********************************************************************
* STATS::~STATS
*
* Destructor for a stats class.
**********************************************************************/
STATS::~STATS() {
delete[] buckets_;
}
/**********************************************************************
* STATS::add
*
* Add a set of samples to (or delete from) a pile.
**********************************************************************/
void STATS::add(int32_t value, int32_t count) {
if (buckets_ == nullptr) {
return;
}
value = ClipToRange(value, rangemin_, rangemax_ - 1);
buckets_[value - rangemin_] += count;
total_count_ += count; // keep count of total
}
/**********************************************************************
* STATS::mode
*
* Find the mode of a stats class.
**********************************************************************/
int32_t STATS::mode() const { // get mode of samples
if (buckets_ == nullptr) {
return rangemin_;
}
int32_t max = buckets_[0]; // max cell count
int32_t maxindex = 0; // index of max
for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
if (buckets_[index] > max) {
max = buckets_[index]; // find biggest
maxindex = index;
}
}
return maxindex + rangemin_; // index of biggest
}
/**********************************************************************
* STATS::mean
*
* Find the mean of a stats class.
**********************************************************************/
double STATS::mean() const { // get mean of samples
if (buckets_ == nullptr || total_count_ <= 0) {
return static_cast<double>(rangemin_);
}
int64_t sum = 0;
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
sum += static_cast<int64_t>(index) * buckets_[index];
}
return static_cast<double>(sum) / total_count_ + rangemin_;
}
/**********************************************************************
* STATS::sd
*
* Find the standard deviation of a stats class.
**********************************************************************/
double STATS::sd() const { // standard deviation
if (buckets_ == nullptr || total_count_ <= 0) {
return 0.0;
}
int64_t sum = 0;
double sqsum = 0.0;
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
sum += static_cast<int64_t>(index) * buckets_[index];
sqsum += static_cast<double>(index) * index * buckets_[index];
}
double variance = static_cast<double>(sum) / total_count_;
variance = sqsum / total_count_ - variance * variance;
if (variance > 0.0) {
return sqrt(variance);
}
return 0.0;
}
/**********************************************************************
* STATS::ile
*
* Returns the fractile value such that frac fraction (in [0,1]) of samples
* has a value less than the return value.
**********************************************************************/
double STATS::ile(double frac) const {
if (buckets_ == nullptr || total_count_ == 0) {
return static_cast<double>(rangemin_);
}
#if 0
// TODO(rays) The existing code doesn't seem to be doing the right thing
// with target a double but this substitute crashes the code that uses it.
// Investigate and fix properly.
int target = IntCastRounded(frac * total_count_);
target = ClipToRange(target, 1, total_count_);
#else
double target = frac * total_count_;
target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
#endif
int sum = 0;
int index = 0;
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
;
}
if (index > 0) {
ASSERT_HOST(buckets_[index - 1] > 0);
return rangemin_ + index - static_cast<double>(sum - target) / buckets_[index - 1];
} else {
return static_cast<double>(rangemin_);
}
}
/**********************************************************************
* STATS::min_bucket
*
* Find REAL minimum bucket - ile(0.0) isn't necessarily correct
**********************************************************************/
int32_t STATS::min_bucket() const { // Find min
if (buckets_ == nullptr || total_count_ == 0) {
return rangemin_;
}
int32_t min = 0;
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
;
}
return rangemin_ + min;
}
/**********************************************************************
* STATS::max_bucket
*
* Find REAL maximum bucket - ile(1.0) isn't necessarily correct
**********************************************************************/
int32_t STATS::max_bucket() const { // Find max
if (buckets_ == nullptr || total_count_ == 0) {
return rangemin_;
}
int32_t max;
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
;
}
return rangemin_ + max;
}
/**********************************************************************
* STATS::median
*
* Finds a more useful estimate of median than ile(0.5).
*
* Overcomes a problem with ile() - if the samples are, for example,
* 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
* between 6 and 13 = 9.5
**********************************************************************/
double STATS::median() const { // get median
if (buckets_ == nullptr) {
return static_cast<double>(rangemin_);
}
double median = ile(0.5);
int median_pile = static_cast<int>(floor(median));
if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
int32_t min_pile;
int32_t max_pile;
/* Find preceding non zero pile */
for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--) {
;
}
/* Find following non zero pile */
for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++) {
;
}
median = (min_pile + max_pile) / 2.0;
}
return median;
}
/**********************************************************************
* STATS::local_min
*
* Return true if this point is a local min.
**********************************************************************/
bool STATS::local_min(int32_t x) const {
if (buckets_ == nullptr) {
return false;
}
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
if (buckets_[x] == 0) {
return true;
}
int32_t index; // table index
for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) {
;
}
if (index >= 0 && buckets_[index] < buckets_[x]) {
return false;
}
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
;
}
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
return false;
} else {
return true;
}
}
/**********************************************************************
* STATS::smooth
*
* Apply a triangular smoothing filter to the stats.
* This makes the modes a bit more useful.
* The factor gives the height of the triangle, i.e. the weight of the
* centre.
**********************************************************************/
void STATS::smooth(int32_t factor) {
if (buckets_ == nullptr || factor < 2) {
return;
}
STATS result(rangemin_, rangemax_);
int entrycount = rangemax_ - rangemin_;
for (int entry = 0; entry < entrycount; entry++) {
// centre weight
int count = buckets_[entry] * factor;
for (int offset = 1; offset < factor; offset++) {
if (entry - offset >= 0) {
count += buckets_[entry - offset] * (factor - offset);
}
if (entry + offset < entrycount) {
count += buckets_[entry + offset] * (factor - offset);
}
}
result.add(entry + rangemin_, count);
}
total_count_ = result.total_count_;
memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
}
/**********************************************************************
* STATS::cluster
*
* Cluster the samples into max_cluster clusters.
* Each call runs one iteration. The array of clusters must be
* max_clusters+1 in size as cluster 0 is used to indicate which samples
* have been used.
* The return value is the current number of clusters.
**********************************************************************/
int32_t STATS::cluster(float lower, // thresholds
float upper,
float multiple, // distance threshold
int32_t max_clusters, // max no to make
STATS *clusters) { // array of clusters
bool new_cluster; // added one
float *centres; // cluster centres
int32_t entry; // bucket index
int32_t cluster; // cluster index
int32_t best_cluster; // one to assign to
int32_t new_centre = 0; // residual mode
int32_t new_mode; // pile count of new_centre
int32_t count; // pile to place
float dist; // from cluster
float min_dist; // from best_cluster
int32_t cluster_count; // no of clusters
if (buckets_ == nullptr || max_clusters < 1) {
return 0;
}
centres = new float[max_clusters + 1];
for (cluster_count = 1;
cluster_count <= max_clusters && clusters[cluster_count].buckets_ != nullptr &&
clusters[cluster_count].total_count_ > 0;
cluster_count++) {
centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5));
new_centre = clusters[cluster_count].mode();
for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
pile_count(entry) <= pile_count(entry + 1);
entry--) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add(entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
pile_count(entry) <= pile_count(entry - 1);
entry++) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add(entry, count);
}
}
}
cluster_count--;
if (cluster_count == 0) {
clusters[0].set_range(rangemin_, rangemax_);
}
do {
new_cluster = false;
new_mode = 0;
for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
count = buckets_[entry] - clusters[0].buckets_[entry];
// remaining pile
if (count > 0) { // any to handle
min_dist = static_cast<float>(INT32_MAX);
best_cluster = 0;
for (cluster = 1; cluster <= cluster_count; cluster++) {
dist = entry + rangemin_ - centres[cluster];
// find distance
if (dist < 0) {
dist = -dist;
}
if (dist < min_dist) {
min_dist = dist; // find least
best_cluster = cluster;
}
}
if (min_dist > upper // far enough for new
&& (best_cluster == 0 || entry + rangemin_ > centres[best_cluster] * multiple ||
entry + rangemin_ < centres[best_cluster] / multiple)) {
if (count > new_mode) {
new_mode = count;
new_centre = entry + rangemin_;
}
}
}
}
// need new and room
if (new_mode > 0 && cluster_count < max_clusters) {
cluster_count++;
new_cluster = true;
if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
delete[] centres;
return 0;
}
centres[cluster_count] = static_cast<float>(new_centre);
clusters[cluster_count].add(new_centre, new_mode);
clusters[0].add(new_centre, new_mode);
for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin_ &&
pile_count(entry) <= pile_count(entry + 1);
entry--) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add(entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
pile_count(entry) <= pile_count(entry - 1);
entry++) {
count = pile_count(entry) - clusters[0].pile_count(entry);
if (count > 0) {
clusters[cluster_count].add(entry, count);
clusters[0].add(entry, count);
}
}
centres[cluster_count] = static_cast<float>(clusters[cluster_count].ile(0.5));
}
} while (new_cluster && cluster_count < max_clusters);
delete[] centres;
return cluster_count;
}
// Helper tests that the current index is still part of the peak and gathers
// the data into the peak, returning false when the peak is ended.
// src_buckets[index] - used_buckets[index] is the unused part of the histogram.
// prev_count is the histogram count of the previous index on entry and is
// updated to the current index on return.
// total_count and total_value are accumulating the mean of the peak.
static bool GatherPeak(int index, const int *src_buckets, int *used_buckets, int *prev_count,
int *total_count, double *total_value) {
int pile_count = src_buckets[index] - used_buckets[index];
if (pile_count <= *prev_count && pile_count > 0) {
// Accumulate count and index.count product.
*total_count += pile_count;
*total_value += index * pile_count;
// Mark this index as used
used_buckets[index] = src_buckets[index];
*prev_count = pile_count;
return true;
} else {
return false;
}
}
// Finds (at most) the top max_modes modes, well actually the whole peak around
// each mode, returning them in the given modes vector as a <mean of peak,
// total count of peak> pair in order of decreasing total count.
// Since the mean is the key and the count the data in the pair, a single call
// to sort on the output will re-sort by increasing mean of peak if that is
// more useful than decreasing total count.
// Returns the actual number of modes found.
int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const {
if (max_modes <= 0) {
return 0;
}
int src_count = rangemax_ - rangemin_;
// Used copies the counts in buckets_ as they get used.
STATS used(rangemin_, rangemax_);
modes.clear();
// Total count of the smallest peak found so far.
int least_count = 1;
// Mode that is used as a seed for each peak
int max_count = 0;
do {
// Find an unused mode.
max_count = 0;
int max_index = 0;
for (int src_index = 0; src_index < src_count; src_index++) {
int pile_count = buckets_[src_index] - used.buckets_[src_index];
if (pile_count > max_count) {
max_count = pile_count;
max_index = src_index;
}
}
if (max_count > 0) {
// Copy the bucket count to used so it doesn't get found again.
used.buckets_[max_index] = max_count;
// Get the entire peak.
double total_value = max_index * max_count;
int total_count = max_count;
int prev_pile = max_count;
for (int offset = 1; max_index + offset < src_count; ++offset) {
if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, &total_count,
&total_value)) {
break;
}
}
prev_pile = buckets_[max_index];
for (int offset = 1; max_index - offset >= 0; ++offset) {
if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, &total_count,
&total_value)) {
break;
}
}
if (total_count > least_count || modes.size() < max_modes) {
// We definitely want this mode, so if we have enough discard the least.
if (modes.size() == max_modes) {
modes.resize(max_modes - 1);
}
int target_index = 0;
// Linear search for the target insertion point.
while (target_index < modes.size() && modes[target_index].data() >= total_count) {
++target_index;
}
auto peak_mean = static_cast<float>(total_value / total_count + rangemin_);
modes.insert(modes.begin() + target_index, KDPairInc<float, int>(peak_mean, total_count));
least_count = modes.back().data();
}
}
} while (max_count > 0);
return modes.size();
}
/**********************************************************************
* STATS::print
*
* Prints a summary and table of the histogram.
**********************************************************************/
void STATS::print() const {
if (buckets_ == nullptr) {
return;
}
int32_t min = min_bucket() - rangemin_;
int32_t max = max_bucket() - rangemin_;
int num_printed = 0;
for (int index = min; index <= max; index++) {
if (buckets_[index] != 0) {
tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
if (++num_printed % 8 == 0) {
tprintf("\n");
}
}
}
tprintf("\n");
print_summary();
}
/**********************************************************************
* STATS::print_summary
*
* Print a summary of the stats.
**********************************************************************/
void STATS::print_summary() const {
if (buckets_ == nullptr) {
return;
}
int32_t min = min_bucket();
int32_t max = max_bucket();
tprintf("Total count=%d\n", total_count_);
tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
tprintf("Lower quartile=%.2f\n", ile(0.25));
tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
tprintf("Upper quartile=%.2f\n", ile(0.75));
tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
tprintf("Range=%d\n", max + 1 - min);
tprintf("Mean= %.2f\n", mean());
tprintf("SD= %.2f\n", sd());
}
/**********************************************************************
* STATS::plot
*
* Draw a histogram of the stats table.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void STATS::plot(ScrollView *window, // to draw in
float xorigin, // bottom left
float yorigin,
float xscale, // one x unit
float yscale, // one y unit
ScrollView::Color colour) const { // colour to draw in
if (buckets_ == nullptr) {
return;
}
window->Pen(colour);
for (int index = 0; index < rangemax_ - rangemin_; index++) {
window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
yorigin + yscale * buckets_[index]);
}
}
#endif
/**********************************************************************
* STATS::plotline
*
* Draw a histogram of the stats table. (Line only)
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void STATS::plotline(ScrollView *window, // to draw in
float xorigin, // bottom left
float yorigin,
float xscale, // one x unit
float yscale, // one y unit
ScrollView::Color colour) const { // colour to draw in
if (buckets_ == nullptr) {
return;
}
window->Pen(colour);
window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
for (int index = 0; index < rangemax_ - rangemin_; index++) {
window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
}
}
#endif
} // namespace tesseract

View File

@ -0,0 +1,150 @@
/**********************************************************************
* File: statistc.h (Formerly stats.h)
* Description: Class description for STATS class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCSTRUCT_STATISTC_H_
#define TESSERACT_CCSTRUCT_STATISTC_H_
#include <cstdio>
#include "kdpair.h"
#include "scrollview.h"
namespace tesseract {
// Simple histogram-based statistics for integer values in a known
// range, such that the range is small compared to the number of samples.
class TESS_API STATS {
public:
// The histogram buckets are in the range
// [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
// [min_bucket_value, max_bucket_value].
// Any data under min_bucket value is silently mapped to min_bucket_value,
// and likewise, any data over max_bucket_value is silently mapped to
// max_bucket_value.
// In the internal array, min_bucket_value maps to 0 and
// max_bucket_value_plus_1 - min_bucket_value to the array size.
// TODO(rays) This is ugly. Convert the second argument to
// max_bucket_value and all the code that uses it.
STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
STATS() = default; // empty for arrays
~STATS();
// (Re)Sets the range and clears the counts.
// See the constructor for info on max and min values.
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
void clear(); // empty buckets
void add(int32_t value, int32_t count);
// "Accessors" return various statistics on the data.
int32_t mode() const; // get mode of samples
double mean() const; // get mean of samples
double sd() const; // standard deviation
// Returns the fractile value such that frac fraction (in [0,1]) of samples
// has a value less than the return value.
double ile(double frac) const;
// Returns the minimum used entry in the histogram (ie the minimum of the
// data, NOT the minimum of the supplied range, nor is it an index.)
// Would normally be called min(), but that is a reserved word in VC++.
int32_t min_bucket() const; // Find min
// Returns the maximum used entry in the histogram (ie the maximum of the
// data, NOT the maximum of the supplied range, nor is it an index.)
int32_t max_bucket() const; // Find max
// Finds a more useful estimate of median than ile(0.5).
// Overcomes a problem with ile() - if the samples are, for example,
// 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
// between 6 and 13 = 9.5
double median() const; // get median of samples
// Returns the count of the given value.
int32_t pile_count(int32_t value) const {
if (value <= rangemin_) {
return buckets_[0];
}
if (value >= rangemax_ - 1) {
return buckets_[rangemax_ - rangemin_ - 1];
}
return buckets_[value - rangemin_];
}
// Returns the total count of all buckets.
int32_t get_total() const {
return total_count_; // total of all piles
}
// Returns true if x is a local min.
bool local_min(int32_t x) const;
// Apply a triangular smoothing filter to the stats.
// This makes the modes a bit more useful.
// The factor gives the height of the triangle, i.e. the weight of the
// centre.
void smooth(int32_t factor);
// Cluster the samples into max_cluster clusters.
// Each call runs one iteration. The array of clusters must be
// max_clusters+1 in size as cluster 0 is used to indicate which samples
// have been used.
// The return value is the current number of clusters.
int32_t cluster(float lower, // thresholds
float upper,
float multiple, // distance threshold
int32_t max_clusters, // max no to make
STATS *clusters); // array of clusters
// Finds (at most) the top max_modes modes, well actually the whole peak
// around each mode, returning them in the given modes vector as a <mean of
// peak, total count of peak> pair in order of decreasing total count. Since
// the mean is the key and the count the data in the pair, a single call to
// sort on the output will re-sort by increasing mean of peak if that is more
// useful than decreasing total count. Returns the actual number of modes
// found.
int top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const;
// Prints a summary and table of the histogram.
void print() const;
// Prints summary stats only of the histogram.
void print_summary() const;
#ifndef GRAPHICS_DISABLED
// Draws the histogram as a series of rectangles.
void plot(ScrollView *window, // window to draw in
float xorigin, // origin of histo
float yorigin, // gram
float xscale, // size of one unit
float yscale, // size of one uint
ScrollView::Color colour) const; // colour to draw in
// Draws a line graph of the histogram.
void plotline(ScrollView *window, // window to draw in
float xorigin, // origin of histo
float yorigin, // gram
float xscale, // size of one unit
float yscale, // size of one uint
ScrollView::Color colour) const; // colour to draw in
#endif // !GRAPHICS_DISABLED
private:
int32_t rangemin_ = 0; // min of range
// rangemax_ is not well named as it is really one past the max.
int32_t rangemax_ = 0; // max of range
int32_t total_count_ = 0; // no of samples
int32_t *buckets_ = nullptr; // array of cells
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_STATISTC_H_

View File

@ -0,0 +1,550 @@
/**********************************************************************
* File: stepblob.cpp (Formerly cblob.c)
* Description: Code for C_BLOB class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "stepblob.h"
#include "points.h" // for operator+=, FCOORD, ICOORD
#include <allheaders.h> // for pixCreate, pixGetDepth
#include <vector> // for std::vector
namespace tesseract {
class DENORM;
// Max perimeter to width ratio for a baseline position above box bottom.
const double kMaxPerimeterWidthRatio = 8.0;
/**********************************************************************
* position_outline
*
* Position the outline in the given list at the relevant place
* according to its nesting.
**********************************************************************/
static void position_outline( // put in place
C_OUTLINE *outline, // thing to place
C_OUTLINE_LIST *destlist // desstination list
) {
C_OUTLINE *dest_outline; // outline from dest list
C_OUTLINE_IT it = destlist; // iterator
// iterator on children
C_OUTLINE_IT child_it = outline->child();
if (!it.empty()) {
do {
dest_outline = it.data(); // get destination
// encloses dest
if (*dest_outline < *outline) {
// take off list
dest_outline = it.extract();
// put this in place
it.add_after_then_move(outline);
// make it a child
child_it.add_to_end(dest_outline);
while (!it.at_last()) {
it.forward(); // do rest of list
// check for other children
dest_outline = it.data();
if (*dest_outline < *outline) {
// take off list
dest_outline = it.extract();
child_it.add_to_end(dest_outline);
// make it a child
if (it.empty()) {
break;
}
}
}
return; // finished
}
// enclosed by dest
else if (*outline < *dest_outline) {
position_outline(outline, dest_outline->child());
// place in child list
return; // finished
}
it.forward();
} while (!it.at_first());
}
it.add_to_end(outline); // at outer level
}
/**********************************************************************
* plot_outline_list
*
* Draw a list of outlines in the given colour and their children
* in the child colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
static void plot_outline_list( // draw outlines
C_OUTLINE_LIST *list, // outline to draw
ScrollView *window, // window to draw in
ScrollView::Color colour, // colour to use
ScrollView::Color child_colour // colour of children
) {
C_OUTLINE *outline; // current outline
C_OUTLINE_IT it = list; // iterator
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
// draw it
outline->plot(window, colour);
if (!outline->child()->empty()) {
plot_outline_list(outline->child(), window, child_colour, child_colour);
}
}
}
// Draws the outlines in the given colour, and child_colour, normalized
// using the given denorm, making use of sub-pixel accurate information
// if available.
static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
ScrollView::Color colour, ScrollView::Color child_colour,
ScrollView *window) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
outline->plot_normed(denorm, colour, window);
if (!outline->child()->empty()) {
plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
}
}
}
#endif
/**********************************************************************
* reverse_outline_list
*
* Reverse a list of outlines and their children.
**********************************************************************/
static void reverse_outline_list(C_OUTLINE_LIST *list) {
C_OUTLINE_IT it = list; // iterator
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
outline->reverse(); // reverse it
outline->set_flag(COUT_INVERSE, true);
if (!outline->child()->empty()) {
reverse_outline_list(outline->child());
}
}
}
/**********************************************************************
* C_BLOB::C_BLOB
*
* Constructor to build a C_BLOB from a list of C_OUTLINEs.
* The C_OUTLINEs are not copied so the source list is emptied.
* The C_OUTLINEs are nested correctly in the blob.
**********************************************************************/
C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
C_OUTLINE *outline = ol_it.extract();
// Position this outline in appropriate position in the hierarchy.
position_outline(outline, &outlines);
}
CheckInverseFlagAndDirection();
}
// Simpler constructor to build a blob from a single outline that has
// already been fully initialized.
C_BLOB::C_BLOB(C_OUTLINE *outline) {
C_OUTLINE_IT it(&outlines);
it.add_to_end(outline);
}
// Builds a set of one or more blobs from a list of outlines.
// Input: one outline on outline_list contains all the others, but the
// nesting and order are undefined.
// If good_blob is true, the blob is added to good_blobs_it, unless
// an illegal (generation-skipping) parent-child relationship is found.
// If so, the parent blob goes to bad_blobs_it, and the immediate children
// are promoted to the top level, recursively being sent to good_blobs_it.
// If good_blob is false, all created blobs will go to the bad_blobs_it.
// Output: outline_list is empty. One or more blobs are added to
// good_blobs_it and/or bad_blobs_it.
void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it) {
// List of top-level outlines with correctly nested children.
C_OUTLINE_LIST nested_outlines;
for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
C_OUTLINE *outline = ol_it.extract();
// Position this outline in appropriate position in the hierarchy.
position_outline(outline, &nested_outlines);
}
// Check for legal nesting and reassign as required.
for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
C_OUTLINE *outline = ol_it.extract();
bool blob_is_good = good_blob;
if (!outline->IsLegallyNested()) {
// The blob is illegally nested.
// Mark it bad, and add all its children to the top-level list.
blob_is_good = false;
ol_it.add_list_after(outline->child());
}
auto *blob = new C_BLOB(outline);
// Set inverse flag and reverse if needed.
blob->CheckInverseFlagAndDirection();
// Put on appropriate list.
if (!blob_is_good && bad_blobs_it != nullptr) {
bad_blobs_it->add_after_then_move(blob);
} else {
good_blobs_it->add_after_then_move(blob);
}
}
}
// Sets the COUT_INVERSE flag appropriately on the outlines and their
// children recursively, reversing the outlines if needed so that
// everything has an anticlockwise top-level.
void C_BLOB::CheckInverseFlagAndDirection() {
C_OUTLINE_IT ol_it(&outlines);
for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
C_OUTLINE *outline = ol_it.data();
if (outline->turn_direction() < 0) {
outline->reverse();
reverse_outline_list(outline->child());
outline->set_flag(COUT_INVERSE, true);
} else {
outline->set_flag(COUT_INVERSE, false);
}
}
}
// Build and return a fake blob containing a single fake outline with no
// steps.
C_BLOB *C_BLOB::FakeBlob(const TBOX &box) {
C_OUTLINE_LIST outlines;
C_OUTLINE::FakeOutline(box, &outlines);
return new C_BLOB(&outlines);
}
/**********************************************************************
* C_BLOB::bounding_box
*
* Return the bounding box of the blob.
**********************************************************************/
TBOX C_BLOB::bounding_box() const { // bounding box
C_OUTLINE *outline; // current outline
// This is a read-only iteration of the outlines.
C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST *>(&outlines);
TBOX box; // bounding box
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
box += outline->bounding_box();
}
return box;
}
/**********************************************************************
* C_BLOB::area
*
* Return the area of the blob.
**********************************************************************/
int32_t C_BLOB::area() { // area
C_OUTLINE *outline; // current outline
C_OUTLINE_IT it = &outlines; // outlines of blob
int32_t total; // total area
total = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
total += outline->area();
}
return total;
}
/**********************************************************************
* C_BLOB::perimeter
*
* Return the perimeter of the top and 2nd level outlines.
**********************************************************************/
int32_t C_BLOB::perimeter() {
C_OUTLINE *outline; // current outline
C_OUTLINE_IT it = &outlines; // outlines of blob
int32_t total; // total perimeter
total = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
total += outline->perimeter();
}
return total;
}
/**********************************************************************
* C_BLOB::outer_area
*
* Return the area of the blob.
**********************************************************************/
int32_t C_BLOB::outer_area() { // area
C_OUTLINE *outline; // current outline
C_OUTLINE_IT it = &outlines; // outlines of blob
int32_t total; // total area
total = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
total += outline->outer_area();
}
return total;
}
/**********************************************************************
* C_BLOB::count_transitions
*
* Return the total x and y maxes and mins in the blob.
* Chlid outlines are not counted.
**********************************************************************/
int32_t C_BLOB::count_transitions( // area
int32_t threshold // on size
) {
C_OUTLINE *outline; // current outline
C_OUTLINE_IT it = &outlines; // outlines of blob
int32_t total; // total area
total = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
total += outline->count_transitions(threshold);
}
return total;
}
/**********************************************************************
* C_BLOB::move
*
* Move C_BLOB by vector
**********************************************************************/
void C_BLOB::move( // reposition blob
const ICOORD vec // by vector
) {
C_OUTLINE_IT it(&outlines); // iterator
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->move(vec); // move each outline
}
}
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
static void RotateOutlineList(const FCOORD &rotation, C_OUTLINE_LIST *outlines) {
C_OUTLINE_LIST new_outlines;
C_OUTLINE_IT src_it(outlines);
C_OUTLINE_IT dest_it(&new_outlines);
while (!src_it.empty()) {
C_OUTLINE *old_outline = src_it.extract();
src_it.forward();
auto *new_outline = new C_OUTLINE(old_outline, rotation);
if (!old_outline->child()->empty()) {
RotateOutlineList(rotation, old_outline->child());
C_OUTLINE_IT child_it(new_outline->child());
child_it.add_list_after(old_outline->child());
}
delete old_outline;
dest_it.add_to_end(new_outline);
}
src_it.add_list_after(&new_outlines);
}
/**********************************************************************
* C_BLOB::rotate
*
* Rotate C_BLOB by rotation.
* Warning! has to rebuild all the C_OUTLINEs.
**********************************************************************/
void C_BLOB::rotate(const FCOORD &rotation) {
RotateOutlineList(rotation, &outlines);
}
// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
// outline list and its children.
static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
if (pix != nullptr && pixGetDepth(pix) == 8) {
outline->ComputeEdgeOffsets(threshold, pix);
} else {
outline->ComputeBinaryOffsets();
}
if (!outline->child()->empty()) {
ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
}
}
}
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
// if the supplied pix is 8-bit or the binary edges if nullptr.
void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
}
// Estimates and returns the baseline position based on the shape of the
// outlines.
// We first find the minimum y-coord (y_mins) at each x-coord within the blob.
// If there is a run of some y or y+1 in y_mins that is longer than the total
// number of positions at bottom or bottom+1, subject to the additional
// condition that at least one side of the y/y+1 run is higher than y+1, so it
// is not a local minimum, then y, not the bottom, makes a good candidate
// baseline position for this blob. Eg
// | ---|
// | |
// |- -----------| <= Good candidate baseline position.
// |- -|
// | -|
// |---| <= Bottom of blob
int16_t C_BLOB::EstimateBaselinePosition() {
TBOX box = bounding_box();
int left = box.left();
int width = box.width();
int bottom = box.bottom();
if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
return bottom; // This is only for non-CJK blobs.
}
// Get the minimum y coordinate at each x-coordinate.
std::vector<int> y_mins;
y_mins.resize(width + 1, box.top());
C_OUTLINE_IT it(&outlines);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
ICOORD pos = outline->start_pos();
for (int s = 0; s < outline->pathlength(); ++s) {
if (pos.y() < y_mins[pos.x() - left]) {
y_mins[pos.x() - left] = pos.y();
}
pos += outline->step(s);
}
}
// Find the total extent of the bottom or bottom + 1.
int bottom_extent = 0;
for (int x = 0; x <= width; ++x) {
if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
++bottom_extent;
}
}
// Find the lowest run longer than the bottom extent that is not the bottom.
int best_min = box.top();
int prev_run = 0;
int prev_y = box.top();
int prev_prev_y = box.top();
for (int x = 0; x < width; x += prev_run) {
// Find the length of the current run.
int y_at_x = y_mins[x];
int run = 1;
while (x + run <= width && y_mins[x + run] == y_at_x) {
++run;
}
if (y_at_x > bottom + 1) {
// Possible contender.
int total_run = run;
// Find extent of current value or +1 to the right of x.
while (x + total_run <= width &&
(y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
++total_run;
}
// At least one end has to be higher so it is not a local max.
if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
// If the prev_run is at y + 1, then we can add that too. There cannot
// be a suitable run at y before that or we would have found it already.
if (prev_run > 0 && prev_y == y_at_x + 1) {
total_run += prev_run;
}
if (total_run > bottom_extent && y_at_x < best_min) {
best_min = y_at_x;
}
}
}
prev_run = run;
prev_prev_y = prev_y;
prev_y = y_at_x;
}
return best_min == box.top() ? bottom : best_min;
}
static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
outline->render(left, top, pix);
if (!outline->child()->empty()) {
render_outline_list(outline->child(), left, top, pix);
}
}
}
static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
outline->render_outline(left, top, pix);
}
}
// Returns a Pix rendering of the blob. pixDestroy after use.
Image C_BLOB::render() {
TBOX box = bounding_box();
Image pix = pixCreate(box.width(), box.height(), 1);
render_outline_list(&outlines, box.left(), box.top(), pix);
return pix;
}
// Returns a Pix rendering of the outline of the blob. (no fill).
// pixDestroy after use.
Image C_BLOB::render_outline() {
TBOX box = bounding_box();
Image pix = pixCreate(box.width(), box.height(), 1);
render_outline_list_outline(&outlines, box.left(), box.top(), pix);
return pix;
}
/**********************************************************************
* C_BLOB::plot
*
* Draw the C_BLOB in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void C_BLOB::plot(ScrollView *window, // window to draw in
ScrollView::Color blob_colour, // main colour
ScrollView::Color child_colour) { // for holes
plot_outline_list(&outlines, window, blob_colour, child_colour);
}
// Draws the blob in the given colour, and child_colour, normalized
// using the given denorm, making use of sub-pixel accurate information
// if available.
void C_BLOB::plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
ScrollView::Color child_colour, ScrollView *window) {
plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour, window);
}
#endif
} // namespace tesseract

View File

@ -0,0 +1,136 @@
/**********************************************************************
* File: stepblob.h (Formerly cblob.h)
* Description: Code for C_BLOB class.
* Author: Ray Smith
* Created: Tue Oct 08 10:41:13 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef STEPBLOB_H
#define STEPBLOB_H
#include "coutln.h" // for C_OUTLINE_LIST, C_OUTLINE
#include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
#include "points.h" // for FCOORD, ICOORD (ptr only)
#include "rect.h" // for TBOX
#include "scrollview.h" // for ScrollView, ScrollView::Color
#include <cstdint> // for int32_t, int16_t
struct Pix;
namespace tesseract {
class C_BLOB;
class DENORM;
ELISTIZEH(C_BLOB)
class TESS_API C_BLOB : public ELIST_LINK {
public:
C_BLOB() = default;
explicit C_BLOB(C_OUTLINE_LIST *outline_list);
// Simpler constructor to build a blob from a single outline that has
// already been fully initialized.
explicit C_BLOB(C_OUTLINE *outline);
// Builds a set of one or more blobs from a list of outlines.
// Input: one outline on outline_list contains all the others, but the
// nesting and order are undefined.
// If good_blob is true, the blob is added to good_blobs_it, unless
// an illegal (generation-skipping) parent-child relationship is found.
// If so, the parent blob goes to bad_blobs_it, and the immediate children
// are promoted to the top level, recursively being sent to good_blobs_it.
// If good_blob is false, all created blobs will go to the bad_blobs_it.
// Output: outline_list is empty. One or more blobs are added to
// good_blobs_it and/or bad_blobs_it.
static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list,
C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it);
// Sets the COUT_INVERSE flag appropriately on the outlines and their
// children recursively, reversing the outlines if needed so that
// everything has an anticlockwise top-level.
void CheckInverseFlagAndDirection();
// Build and return a fake blob containing a single fake outline with no
// steps.
static C_BLOB *FakeBlob(const TBOX &box);
C_OUTLINE_LIST *out_list() { // get outline list
return &outlines;
}
TBOX bounding_box() const; // compute bounding box
int32_t area(); // compute area
int32_t perimeter(); // Total perimeter of outlines and 1st level children.
int32_t outer_area(); // compute area
int32_t count_transitions( // count maxima
int32_t threshold); // size threshold
void move(const ICOORD vec); // repostion blob by vector
void rotate(const FCOORD &rotation); // Rotate by given vector.
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
// if the supplied pix is 8-bit or the binary edges if nullptr.
void ComputeEdgeOffsets(int threshold, Image pix);
// Estimates and returns the baseline position based on the shape of the
// outlines.
int16_t EstimateBaselinePosition();
// Returns a Pix rendering of the blob. pixDestroy after use.
Image render();
// Returns a Pix rendering of the outline of the blob. (no fill).
// pixDestroy after use.
Image render_outline();
#ifndef GRAPHICS_DISABLED
void plot( // draw one
ScrollView *window, // window to draw in
ScrollView::Color blob_colour, // for outer bits
ScrollView::Color child_colour); // for holes
// Draws the blob in the given colour, and child_colour, normalized
// using the given denorm, making use of sub-pixel accurate information
// if available.
void plot_normed(const DENORM &denorm, ScrollView::Color blob_colour,
ScrollView::Color child_colour, ScrollView *window);
#endif // !GRAPHICS_DISABLED
C_BLOB &operator=(const C_BLOB &source) {
if (!outlines.empty()) {
outlines.clear();
}
outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy);
return *this;
}
static C_BLOB *deep_copy(const C_BLOB *src) {
auto *blob = new C_BLOB;
*blob = *src;
return blob;
}
static int SortByXMiddle(const void *v1, const void *v2) {
const C_BLOB *blob1 = *static_cast<const C_BLOB *const *>(v1);
const C_BLOB *blob2 = *static_cast<const C_BLOB *const *>(v2);
return blob1->bounding_box().x_middle() - blob2->bounding_box().x_middle();
}
private:
C_OUTLINE_LIST outlines; // master elements
};
} // namespace tesseract
#endif

View File

@ -0,0 +1,67 @@
/******************************************************************************
* File: tabletransfer.h
* Description: Infrastructure for the transfer of table detection results
* Author: Stefan Brechtken
*
* (C) Copyright 2021, Stefan Brechtken
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
****************************************************************************/
#ifndef TESSERACT_CCSTRUCT_TABLETRANSFER_H_
#define TESSERACT_CCSTRUCT_TABLETRANSFER_H_
#include <memory>
#include <vector>
#include "rect.h"
namespace tesseract {
/// Structure for data transfer from table detector
struct TessTable {
tesseract::TBOX box;
std::vector<tesseract::TBOX> rows;
std::vector<tesseract::TBOX> cols;
};
/** \brief You can use this small template function to ensure that one and
* only one object of type T exists. It implements the Singleton Pattern.
*
* T must be default-constructable.
* Usage examples:
* A& a = uniqueInstance<A>();
* a.xyz();
* uniqueInstance<A>(make_unique<A>(42)); // replace instance
* a.foo();
* or
* uniqueInstance<A>().xyz();
*/
template<typename T>
T& uniqueInstance(std::unique_ptr<T> new_instance = nullptr)
{
static std::unique_ptr<T> _instance = std::make_unique<T>();
if (new_instance) {
_instance = std::move(new_instance);
}
return *_instance.get();
}
/// return const version of \see uniqueInstance
template<typename T>
const T& constUniqueInstance(std::unique_ptr<T> new_instance = nullptr)
{
return uniqueInstance<T>(std::move(new_instance));
}
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_TABLETRANSFER_H_

View File

@ -0,0 +1,584 @@
/**********************************************************************
* File: werd.cpp (Formerly word.c)
* Description: Code for the WERD class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "werd.h"
#include "linlsq.h"
#include "helpers.h"
namespace tesseract {
#define FIRST_COLOUR ScrollView::RED ///< first rainbow colour
#define LAST_COLOUR ScrollView::AQUAMARINE ///< last rainbow colour
#define CHILD_COLOUR ScrollView::BROWN ///< colour of children
/**
* WERD::WERD
*
* Constructor to build a WERD from a list of C_BLOBs.
* blob_list The C_BLOBs (in word order) are not copied;
* we take its elements and put them in our lists.
* blank_count blanks in front of the word
* text correct text, outlives this WERD
*/
WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
: blanks(blank_count), flags(0), script_id_(0), correct(text ? text : "") {
C_BLOB_IT start_it = &cblobs;
C_BLOB_IT rej_cblob_it = &rej_cblobs;
C_OUTLINE_IT c_outline_it;
int16_t inverted_vote = 0;
int16_t non_inverted_vote = 0;
// Move blob_list's elements into cblobs.
start_it.add_list_after(blob_list);
/*
Set white on black flag for the WERD, moving any duff blobs onto the
rej_cblobs list.
First, walk the cblobs checking the inverse flag for each outline of each
cblob. If a cblob has inconsistent flag settings for its different
outlines, move the blob to the reject list. Otherwise, increment the
appropriate w-on-b or b-on-w vote for the word.
Now set the inversion flag for the WERD by maximum vote.
Walk the blobs again, moving any blob whose inversion flag does not agree
with the concencus onto the reject list.
*/
start_it.set_to_list(&cblobs);
if (start_it.empty()) {
return;
}
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
bool reject_blob = false;
bool blob_inverted;
c_outline_it.set_to_list(start_it.data()->out_list());
blob_inverted = c_outline_it.data()->flag(COUT_INVERSE);
for (c_outline_it.mark_cycle_pt(); !c_outline_it.cycled_list() && !reject_blob;
c_outline_it.forward()) {
reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted;
}
if (reject_blob) {
rej_cblob_it.add_after_then_move(start_it.extract());
} else {
if (blob_inverted) {
inverted_vote++;
} else {
non_inverted_vote++;
}
}
}
flags.set(W_INVERSE, (inverted_vote > non_inverted_vote));
start_it.set_to_list(&cblobs);
if (start_it.empty()) {
return;
}
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
c_outline_it.set_to_list(start_it.data()->out_list());
if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE]) {
rej_cblob_it.add_after_then_move(start_it.extract());
}
}
}
/**
* WERD::WERD
*
* Constructor to build a WERD from a list of C_BLOBs.
* The C_BLOBs are not copied so the source list is emptied.
*/
WERD::WERD(C_BLOB_LIST *blob_list, ///< In word order
WERD *clone) ///< Source of flags
: flags(clone->flags), script_id_(clone->script_id_), correct(clone->correct) {
C_BLOB_IT start_it = blob_list; // iterator
C_BLOB_IT end_it = blob_list; // another
while (!end_it.at_last()) {
end_it.forward(); // move to last
}
(reinterpret_cast<C_BLOB_LIST *>(&cblobs))->assign_to_sublist(&start_it, &end_it);
// move to our list
blanks = clone->blanks;
// fprintf(stderr,"Wrong constructor!!!!\n");
}
// Construct a WERD from a single_blob and clone the flags from this.
// W_BOL and W_EOL flags are set according to the given values.
WERD *WERD::ConstructFromSingleBlob(bool bol, bool eol, C_BLOB *blob) {
C_BLOB_LIST temp_blobs;
C_BLOB_IT temp_it(&temp_blobs);
temp_it.add_after_then_move(blob);
WERD *blob_word = new WERD(&temp_blobs, this);
blob_word->set_flag(W_BOL, bol);
blob_word->set_flag(W_EOL, eol);
return blob_word;
}
/**
* WERD::bounding_box
*
* Return the bounding box of the WERD.
* This is quite a mess to compute!
* ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the
* words on the row were re-sorted. The original words were built with reject
* blobs included. The FUZZY SPACE flags were set accordingly. If ALL the
* blobs in a word are rejected the BB for the word is nullptr, causing the sort
* to screw up, leading to the erroneous possibility of the first word in a
* row being marked as FUZZY space.
*/
TBOX WERD::bounding_box() const {
return restricted_bounding_box(true, true);
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
TBOX box = true_bounding_box();
int bottom = box.bottom();
int top = box.top();
// This is a read-only iteration of the rejected blobs.
C_BLOB_IT it(const_cast<C_BLOB_LIST *>(&rej_cblobs));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
TBOX dot_box = it.data()->bounding_box();
if ((upper_dots || dot_box.bottom() <= top) && (lower_dots || dot_box.top() >= bottom)) {
box += dot_box;
}
}
return box;
}
// Returns the bounding box of only the good blobs.
TBOX WERD::true_bounding_box() const {
TBOX box; // box being built
// This is a read-only iteration of the good blobs.
C_BLOB_IT it(const_cast<C_BLOB_LIST *>(&cblobs));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->bounding_box();
}
return box;
}
/**
* WERD::move
*
* Reposition WERD by vector
* NOTE!! REJECT CBLOBS ARE NOT MOVED
*/
void WERD::move(const ICOORD vec) {
C_BLOB_IT cblob_it(&cblobs); // cblob iterator
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
cblob_it.data()->move(vec);
}
}
/**
* WERD::join_on
*
* Join other word onto this one. Delete the old word.
*/
void WERD::join_on(WERD *other) {
C_BLOB_IT blob_it(&cblobs);
C_BLOB_IT src_it(&other->cblobs);
C_BLOB_IT rej_cblob_it(&rej_cblobs);
C_BLOB_IT src_rej_it(&other->rej_cblobs);
while (!src_it.empty()) {
blob_it.add_to_end(src_it.extract());
src_it.forward();
}
while (!src_rej_it.empty()) {
rej_cblob_it.add_to_end(src_rej_it.extract());
src_rej_it.forward();
}
}
/**
* WERD::copy_on
*
* Copy blobs from other word onto this one.
*/
void WERD::copy_on(WERD *other) {
bool reversed = other->bounding_box().left() < bounding_box().left();
C_BLOB_IT c_blob_it(&cblobs);
C_BLOB_LIST c_blobs;
c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
if (reversed) {
c_blob_it.add_list_before(&c_blobs);
} else {
c_blob_it.move_to_last();
c_blob_it.add_list_after(&c_blobs);
}
if (!other->rej_cblobs.empty()) {
C_BLOB_IT rej_c_blob_it(&rej_cblobs);
C_BLOB_LIST new_rej_c_blobs;
new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
if (reversed) {
rej_c_blob_it.add_list_before(&new_rej_c_blobs);
} else {
rej_c_blob_it.move_to_last();
rej_c_blob_it.add_list_after(&new_rej_c_blobs);
}
}
}
/**
* WERD::print
*
* Display members
*/
void WERD::print() const {
tprintf("Blanks= %d\n", blanks);
bounding_box().print();
tprintf("Flags = %lu = 0%lo\n", flags.to_ulong(), flags.to_ulong());
tprintf(" W_SEGMENTED = %s\n", flags[W_SEGMENTED] ? "TRUE" : "FALSE");
tprintf(" W_ITALIC = %s\n", flags[W_ITALIC] ? "TRUE" : "FALSE");
tprintf(" W_BOL = %s\n", flags[W_BOL] ? "TRUE" : "FALSE");
tprintf(" W_EOL = %s\n", flags[W_EOL] ? "TRUE" : "FALSE");
tprintf(" W_NORMALIZED = %s\n", flags[W_NORMALIZED] ? "TRUE" : "FALSE");
tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n", flags[W_SCRIPT_HAS_XHEIGHT] ? "TRUE" : "FALSE");
tprintf(" W_SCRIPT_IS_LATIN = %s\n", flags[W_SCRIPT_IS_LATIN] ? "TRUE" : "FALSE");
tprintf(" W_DONT_CHOP = %s\n", flags[W_DONT_CHOP] ? "TRUE" : "FALSE");
tprintf(" W_REP_CHAR = %s\n", flags[W_REP_CHAR] ? "TRUE" : "FALSE");
tprintf(" W_FUZZY_SP = %s\n", flags[W_FUZZY_SP] ? "TRUE" : "FALSE");
tprintf(" W_FUZZY_NON = %s\n", flags[W_FUZZY_NON] ? "TRUE" : "FALSE");
tprintf("Correct= %s\n", correct.c_str());
tprintf("Rejected cblob count = %d\n", rej_cblobs.length());
tprintf("Script = %d\n", script_id_);
}
/**
* WERD::plot
*
* Draw the WERD in the given colour.
*/
#ifndef GRAPHICS_DISABLED
void WERD::plot(ScrollView *window, ScrollView::Color colour) {
C_BLOB_IT it = &cblobs;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot(window, colour, colour);
}
plot_rej_blobs(window);
}
// Get the next color in the (looping) rainbow.
ScrollView::Color WERD::NextColor(ScrollView::Color colour) {
auto next = static_cast<ScrollView::Color>(colour + 1);
if (next >= LAST_COLOUR || next < FIRST_COLOUR) {
next = FIRST_COLOUR;
}
return next;
}
/**
* WERD::plot
*
* Draw the WERD in rainbow colours in window.
*/
void WERD::plot(ScrollView *window) {
ScrollView::Color colour = FIRST_COLOUR;
C_BLOB_IT it = &cblobs;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot(window, colour, CHILD_COLOUR);
colour = NextColor(colour);
}
plot_rej_blobs(window);
}
/**
* WERD::plot_rej_blobs
*
* Draw the WERD rejected blobs in window - ALWAYS GREY
*/
void WERD::plot_rej_blobs(ScrollView *window) {
C_BLOB_IT it = &rej_cblobs;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot(window, ScrollView::GREY, ScrollView::GREY);
}
}
#endif // !GRAPHICS_DISABLED
/**
* WERD::shallow_copy()
*
* Make a shallow copy of a word
*/
WERD *WERD::shallow_copy() {
WERD *new_word = new WERD;
new_word->blanks = blanks;
new_word->flags = flags;
new_word->correct = correct;
return new_word;
}
/**
* WERD::operator=
*
* Assign a word, DEEP copying the blob list
*/
WERD &WERD::operator=(const WERD &source) {
this->ELIST2_LINK::operator=(source);
blanks = source.blanks;
flags = source.flags;
script_id_ = source.script_id_;
correct = source.correct;
cblobs.clear();
cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
rej_cblobs.clear();
rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
return *this;
}
/**
* word_comparator()
*
* word comparator used to sort a word list so that words are in increasing
* order of left edge.
*/
int word_comparator(const void *word1p, const void *word2p) {
const WERD *word1 = *reinterpret_cast<const WERD *const *>(word1p);
const WERD *word2 = *reinterpret_cast<const WERD *const *>(word2p);
return word1->bounding_box().left() - word2->bounding_box().left();
}
/**
* WERD::ConstructWerdWithNewBlobs()
*
* This method returns a new werd constructed using the blobs in the input
* all_blobs list, which correspond to the blobs in this werd object. The
* blobs used to construct the new word are consumed and removed from the
* input all_blobs list.
* Returns nullptr if the word couldn't be constructed.
* Returns original blobs for which no matches were found in the output list
* orphan_blobs (appends).
*/
WERD *WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs) {
C_BLOB_LIST current_blob_list;
C_BLOB_IT werd_blobs_it(&current_blob_list);
// Add the word's c_blobs.
werd_blobs_it.add_list_after(cblob_list());
// New blob list. These contain the blobs which will form the new word.
C_BLOB_LIST new_werd_blobs;
C_BLOB_IT new_blobs_it(&new_werd_blobs);
// not_found_blobs contains the list of current word's blobs for which a
// corresponding blob wasn't found in the input all_blobs list.
C_BLOB_LIST not_found_blobs;
C_BLOB_IT not_found_it(&not_found_blobs);
not_found_it.move_to_last();
werd_blobs_it.move_to_first();
for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list(); werd_blobs_it.forward()) {
C_BLOB *werd_blob = werd_blobs_it.extract();
TBOX werd_blob_box = werd_blob->bounding_box();
bool found = false;
// Now find the corresponding blob for this blob in the all_blobs
// list. For now, follow the inefficient method of pairwise
// comparisons. Ideally, one can pre-bucket the blobs by row.
C_BLOB_IT all_blobs_it(all_blobs);
for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) {
C_BLOB *a_blob = all_blobs_it.data();
// Compute the overlap of the two blobs. If major, a_blob should
// be added to the new blobs list.
TBOX a_blob_box = a_blob->bounding_box();
if (a_blob_box.null_box()) {
tprintf("Bounding box couldn't be ascertained\n");
}
if (werd_blob_box.contains(a_blob_box) || werd_blob_box.major_overlap(a_blob_box)) {
// Old blobs are from minimal splits, therefore are expected to be
// bigger. The new small blobs should cover a significant portion.
// This is it.
all_blobs_it.extract();
new_blobs_it.add_after_then_move(a_blob);
found = true;
}
}
if (!found) {
not_found_it.add_after_then_move(werd_blob);
} else {
delete werd_blob;
}
}
// Iterate over all not found blobs. Some of them may be due to
// under-segmentation (which is OK, since the corresponding blob is already
// in the list in that case.
not_found_it.move_to_first();
for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) {
C_BLOB *not_found = not_found_it.data();
TBOX not_found_box = not_found->bounding_box();
C_BLOB_IT existing_blobs_it(new_blobs_it);
for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
existing_blobs_it.forward()) {
C_BLOB *a_blob = existing_blobs_it.data();
TBOX a_blob_box = a_blob->bounding_box();
if ((not_found_box.major_overlap(a_blob_box) || a_blob_box.major_overlap(not_found_box)) &&
not_found_box.y_overlap_fraction(a_blob_box) > 0.8) {
// Already taken care of.
delete not_found_it.extract();
break;
}
}
}
if (orphan_blobs) {
C_BLOB_IT orphan_blobs_it(orphan_blobs);
orphan_blobs_it.move_to_last();
orphan_blobs_it.add_list_after(&not_found_blobs);
}
// New blobs are ready. Create a new werd object with these.
WERD *new_werd = nullptr;
if (!new_werd_blobs.empty()) {
new_werd = new WERD(&new_werd_blobs, this);
} else {
// Add the blobs back to this word so that it can be reused.
C_BLOB_IT this_list_it(cblob_list());
this_list_it.add_list_after(&not_found_blobs);
}
return new_werd;
}
// Removes noise from the word by moving small outlines to the rej_cblobs
// list, based on the size_threshold.
void WERD::CleanNoise(float size_threshold) {
C_BLOB_IT blob_it(&cblobs);
C_BLOB_IT rej_it(&rej_cblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB *blob = blob_it.data();
C_OUTLINE_IT ol_it(blob->out_list());
for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
C_OUTLINE *outline = ol_it.data();
TBOX ol_box = outline->bounding_box();
int ol_size = ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height();
if (ol_size < size_threshold) {
// This outline is too small. Move it to a separate blob in the
// reject blobs list.
auto *rej_blob = new C_BLOB(ol_it.extract());
rej_it.add_after_then_move(rej_blob);
}
}
if (blob->out_list()->empty()) {
delete blob_it.extract();
}
}
}
// Extracts all the noise outlines and stuffs the pointers into the given
// vector of outlines. Afterwards, the outlines vector owns the pointers.
void WERD::GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines) {
C_BLOB_IT rej_it(&rej_cblobs);
for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) {
C_BLOB *blob = rej_it.extract();
C_OUTLINE_IT ol_it(blob->out_list());
outlines->push_back(ol_it.extract());
delete blob;
}
}
// Adds the selected outlines to the indcated real blobs, and puts the rest
// back in rej_cblobs where they came from. Where the target_blobs entry is
// nullptr, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// vector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
const std::vector<C_BLOB *> &target_blobs,
const std::vector<C_OUTLINE *> &outlines,
bool *make_next_word_fuzzy) {
bool outline_added_to_start = false;
if (make_next_word_fuzzy != nullptr) {
*make_next_word_fuzzy = false;
}
C_BLOB_IT rej_it(&rej_cblobs);
for (unsigned i = 0; i < outlines.size(); ++i) {
C_OUTLINE *outline = outlines[i];
if (outline == nullptr) {
continue; // Already used it.
}
if (wanted[i]) {
C_BLOB *target_blob = target_blobs[i];
TBOX noise_box = outline->bounding_box();
if (target_blob == nullptr) {
target_blob = new C_BLOB(outline);
// Need to find the insertion point.
C_BLOB_IT blob_it(&cblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB *blob = blob_it.data();
TBOX blob_box = blob->bounding_box();
if (blob_box.left() > noise_box.left()) {
if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) {
// We might want to join this word to its predecessor.
outline_added_to_start = true;
}
blob_it.add_before_stay_put(target_blob);
break;
}
}
if (blob_it.cycled_list()) {
blob_it.add_to_end(target_blob);
if (make_next_word_fuzzy != nullptr) {
*make_next_word_fuzzy = true;
}
}
// Add all consecutive wanted, but null-blob outlines to same blob.
C_OUTLINE_IT ol_it(target_blob->out_list());
while (i + 1 < outlines.size() && wanted[i + 1] && target_blobs[i + 1] == nullptr) {
++i;
ol_it.add_to_end(outlines[i]);
}
} else {
// Insert outline into this blob.
C_OUTLINE_IT ol_it(target_blob->out_list());
ol_it.add_to_end(outline);
}
} else {
// Put back on noise list.
rej_it.add_to_end(new C_BLOB(outline));
}
}
return outline_added_to_start;
}
} // namespace tesseract

View File

@ -0,0 +1,212 @@
/**********************************************************************
* File: werd.h
* Description: Code for the WERD class.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef WERD_H
#define WERD_H
#include "elst2.h"
#include "params.h"
#include "stepblob.h"
#include <bitset>
namespace tesseract {
enum WERD_FLAGS {
W_SEGMENTED, ///< correctly segmented
W_ITALIC, ///< italic text
W_BOLD, ///< bold text
W_BOL, ///< start of line
W_EOL, ///< end of line
W_NORMALIZED, ///< flags
W_SCRIPT_HAS_XHEIGHT, ///< x-height concept makes sense.
W_SCRIPT_IS_LATIN, ///< Special case latin for y. splitting.
W_DONT_CHOP, ///< fixed pitch chopped
W_REP_CHAR, ///< repeated character
W_FUZZY_SP, ///< fuzzy space
W_FUZZY_NON, ///< fuzzy nonspace
W_INVERSE ///< white on black
};
enum DISPLAY_FLAGS {
/* Display flags bit number allocations */
DF_BOX, ///< Bounding box
DF_TEXT, ///< Correct ascii
DF_POLYGONAL, ///< Polyg approx
DF_EDGE_STEP, ///< Edge steps
DF_BN_POLYGONAL, ///< BL normalisd polyapx
DF_BLAMER ///< Blamer information
};
class ROW; // forward decl
class TESS_API WERD : public ELIST2_LINK {
public:
WERD() = default;
// WERD constructed with:
// blob_list - blobs of the word (we take this list's contents)
// blanks - number of blanks before the word
// text - correct text (outlives WERD)
WERD(C_BLOB_LIST *blob_list, uint8_t blanks, const char *text);
// WERD constructed from:
// blob_list - blobs in the word
// clone - werd to clone flags, etc from.
WERD(C_BLOB_LIST *blob_list, WERD *clone);
// Construct a WERD from a single_blob and clone the flags from this.
// W_BOL and W_EOL flags are set according to the given values.
WERD *ConstructFromSingleBlob(bool bol, bool eol, C_BLOB *blob);
~WERD() = default;
// assignment
WERD &operator=(const WERD &source);
// This method returns a new werd constructed using the blobs in the input
// all_blobs list, which correspond to the blobs in this werd object. The
// blobs used to construct the new word are consumed and removed from the
// input all_blobs list.
// Returns nullptr if the word couldn't be constructed.
// Returns original blobs for which no matches were found in the output list
// orphan_blobs (appends).
WERD *ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs);
// Accessors for reject / DUFF blobs in various formats
C_BLOB_LIST *rej_cblob_list() { // compact format
return &rej_cblobs;
}
// Accessors for good blobs in various formats.
C_BLOB_LIST *cblob_list() { // get compact blobs
return &cblobs;
}
uint8_t space() const { // access function
return blanks;
}
void set_blanks(uint8_t new_blanks) {
blanks = new_blanks;
}
int script_id() const {
return script_id_;
}
void set_script_id(int id) {
script_id_ = id;
}
// Returns the (default) bounding box including all the dots.
TBOX bounding_box() const; // compute bounding box
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
// Returns the bounding box of only the good blobs.
TBOX true_bounding_box() const;
const char *text() const {
return correct.c_str();
}
void set_text(const char *new_text) {
correct = new_text;
}
bool flag(WERD_FLAGS mask) const {
return flags[mask];
}
void set_flag(WERD_FLAGS mask, bool value) {
flags.set(mask, value);
}
bool display_flag(uint8_t flag) const {
return disp_flags[flag];
}
void set_display_flag(uint8_t flag, bool value) {
disp_flags.set(flag, value);
}
WERD *shallow_copy(); // shallow copy word
// reposition word by vector
void move(const ICOORD vec);
// join other's blobs onto this werd, emptying out other.
void join_on(WERD *other);
// copy other's blobs onto this word, leaving other intact.
void copy_on(WERD *other);
// tprintf word metadata (but not blob innards)
void print() const;
#ifndef GRAPHICS_DISABLED
// plot word on window in a uniform colour
void plot(ScrollView *window, ScrollView::Color colour);
// Get the next color in the (looping) rainbow.
static ScrollView::Color NextColor(ScrollView::Color colour);
// plot word on window in a rainbow of colours
void plot(ScrollView *window);
// plot rejected blobs in a rainbow of colours
void plot_rej_blobs(ScrollView *window);
#endif // !GRAPHICS_DISABLED
// Removes noise from the word by moving small outlines to the rej_cblobs
// list, based on the size_threshold.
void CleanNoise(float size_threshold);
// Extracts all the noise outlines and stuffs the pointers into the given
// vector of outlines. Afterwards, the outlines vector owns the pointers.
void GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines);
// Adds the selected outlines to the indcated real blobs, and puts the rest
// back in rej_cblobs where they came from. Where the target_blobs entry is
// nullptr, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// vector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool AddSelectedOutlines(const std::vector<bool> &wanted,
const std::vector<C_BLOB *> &target_blobs,
const std::vector<C_OUTLINE *> &outlines, bool *make_next_word_fuzzy);
private:
uint8_t blanks = 0; // no of blanks
std::bitset<16> flags; // flags about word
std::bitset<16> disp_flags; // display flags
int16_t script_id_ = 0; // From unicharset.
std::string correct; // correct text
C_BLOB_LIST cblobs; // compacted blobs
C_BLOB_LIST rej_cblobs; // DUFF blobs
};
ELIST2IZEH(WERD)
} // namespace tesseract
#include "ocrrow.h" // placed here due to
namespace tesseract {
// compare words by increasing order of left edge, suitable for qsort(3)
int word_comparator(const void *word1p, const void *word2p);
} // namespace tesseract
#endif