Description: 由于仓库中的Tesseract不是最新版本导致产生了一个bug,因此将Tesseract源码集成到项目中 Log: no Change-Id: I088de95d6c6ab670406daa8d47ed2ed46929c2c0
276 lines
8.5 KiB
C++
276 lines
8.5 KiB
C++
/******************************************************************************
|
|
*
|
|
* File: seam.cpp (Formerly seam.c)
|
|
* Author: Mark Seaman, OCR Technology
|
|
*
|
|
* (c) Copyright 1987, Hewlett-Packard Company.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
*****************************************************************************/
|
|
/*----------------------------------------------------------------------
|
|
I n c l u d e s
|
|
----------------------------------------------------------------------*/
|
|
#include "seam.h"
|
|
|
|
#include "blobs.h"
|
|
#include "tprintf.h"
|
|
|
|
namespace tesseract {
|
|
|
|
/*----------------------------------------------------------------------
|
|
Public Function Code
|
|
----------------------------------------------------------------------*/
|
|
|
|
// Returns the bounding box of all the points in the seam.
|
|
TBOX SEAM::bounding_box() const {
|
|
TBOX box(location_.x, location_.y, location_.x, location_.y);
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
box += splits_[s].bounding_box();
|
|
}
|
|
return box;
|
|
}
|
|
|
|
// Returns true if the splits in *this SEAM appear OK in the sense that they
|
|
// do not cross any outlines and do not chop off any ridiculously small
|
|
// pieces.
|
|
bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
|
|
// TODO(rays) Try testing all the splits. Duplicating original code for now,
|
|
// which tested only the first.
|
|
return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
|
|
}
|
|
|
|
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
|
|
// seam, which is about to be inserted at insert_index. Returns false if
|
|
// any of the computations fails, as this indicates an invalid chop.
|
|
// widthn_/widthp_ are only changed if modify is true.
|
|
bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
|
|
const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
|
|
for (int s = 0; s < insert_index; ++s) {
|
|
if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
|
|
return false;
|
|
}
|
|
}
|
|
if (!FindBlobWidth(blobs, insert_index, modify)) {
|
|
return false;
|
|
}
|
|
for (unsigned s = insert_index; s < seams.size(); ++s) {
|
|
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Computes the widthp_/widthn_ range. Returns false if not all the splits
|
|
// are accounted for. widthn_/widthp_ are only changed if modify is true.
|
|
bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) {
|
|
int num_found = 0;
|
|
if (modify) {
|
|
widthp_ = 0;
|
|
widthn_ = 0;
|
|
}
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
const SPLIT &split = splits_[s];
|
|
bool found_split = split.ContainedByBlob(*blobs[index]);
|
|
// Look right.
|
|
for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
|
|
found_split = split.ContainedByBlob(*blobs[b]);
|
|
if (found_split && b - index > widthp_ && modify) {
|
|
widthp_ = b - index;
|
|
}
|
|
}
|
|
// Look left.
|
|
for (int b = index - 1; !found_split && b >= 0; --b) {
|
|
found_split = split.ContainedByBlob(*blobs[b]);
|
|
if (found_split && index - b > widthn_ && modify) {
|
|
widthn_ = index - b;
|
|
}
|
|
}
|
|
if (found_split) {
|
|
++num_found;
|
|
}
|
|
}
|
|
return num_found == num_splits_;
|
|
}
|
|
|
|
// Splits this blob into two blobs by applying the splits included in
|
|
// *this SEAM
|
|
void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const {
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
splits_[s].SplitOutlineList(blob->outlines);
|
|
}
|
|
blob->ComputeBoundingBoxes();
|
|
|
|
divide_blobs(blob, other_blob, italic_blob, location_);
|
|
|
|
blob->EliminateDuplicateOutlines();
|
|
other_blob->EliminateDuplicateOutlines();
|
|
|
|
blob->CorrectBlobOrder(other_blob);
|
|
}
|
|
|
|
// Undoes ApplySeam by removing the seam between these two blobs.
|
|
// Produces one blob as a result, and deletes other_blob.
|
|
void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
|
|
if (blob->outlines == nullptr) {
|
|
blob->outlines = other_blob->outlines;
|
|
other_blob->outlines = nullptr;
|
|
}
|
|
|
|
TESSLINE *outline = blob->outlines;
|
|
while (outline->next) {
|
|
outline = outline->next;
|
|
}
|
|
outline->next = other_blob->outlines;
|
|
other_blob->outlines = nullptr;
|
|
delete other_blob;
|
|
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
splits_[s].UnsplitOutlineList(blob);
|
|
}
|
|
blob->ComputeBoundingBoxes();
|
|
blob->EliminateDuplicateOutlines();
|
|
}
|
|
|
|
// Prints everything in *this SEAM.
|
|
void SEAM::Print(const char *label) const {
|
|
tprintf("%s", label);
|
|
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, widthp_, widthn_);
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
splits_[s].Print();
|
|
if (s + 1 < num_splits_) {
|
|
tprintf(", ");
|
|
}
|
|
}
|
|
tprintf("\n");
|
|
}
|
|
|
|
// Prints a collection of SEAMs.
|
|
/* static */
|
|
void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
|
|
if (!seams.empty()) {
|
|
tprintf("%s\n", label);
|
|
for (unsigned x = 0; x < seams.size(); ++x) {
|
|
tprintf("%2u: ", x);
|
|
seams[x]->Print("");
|
|
}
|
|
tprintf("\n");
|
|
}
|
|
}
|
|
|
|
#ifndef GRAPHICS_DISABLED
|
|
// Draws the seam in the given window.
|
|
void SEAM::Mark(ScrollView *window) const {
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
splits_[s].Mark(window);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Break up the blobs in this chain so that they are all independent.
|
|
// This operation should undo the affect of join_pieces.
|
|
/* static */
|
|
void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
|
|
int first, int last) {
|
|
for (int x = first; x < last; ++x) {
|
|
seams[x]->Reveal();
|
|
}
|
|
|
|
TESSLINE *outline = blobs[first]->outlines;
|
|
int next_blob = first + 1;
|
|
|
|
while (outline != nullptr && next_blob <= last) {
|
|
if (outline->next == blobs[next_blob]->outlines) {
|
|
outline->next = nullptr;
|
|
outline = blobs[next_blob]->outlines;
|
|
++next_blob;
|
|
} else {
|
|
outline = outline->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Join a group of base level pieces into a single blob that can then
|
|
// be classified.
|
|
/* static */
|
|
void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
|
|
int first, int last) {
|
|
TESSLINE *outline = blobs[first]->outlines;
|
|
if (!outline) {
|
|
return;
|
|
}
|
|
|
|
for (int x = first; x < last; ++x) {
|
|
SEAM *seam = seams[x];
|
|
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
|
|
seam->Hide();
|
|
}
|
|
while (outline->next) {
|
|
outline = outline->next;
|
|
}
|
|
outline->next = blobs[x + 1]->outlines;
|
|
}
|
|
}
|
|
|
|
// Hides the seam so the outlines appear not to be cut by it.
|
|
void SEAM::Hide() const {
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
splits_[s].Hide();
|
|
}
|
|
}
|
|
|
|
// Undoes hide, so the outlines are cut by the seam.
|
|
void SEAM::Reveal() const {
|
|
for (int s = 0; s < num_splits_; ++s) {
|
|
splits_[s].Reveal();
|
|
}
|
|
}
|
|
|
|
// Computes and returns, but does not set, the full priority of *this SEAM.
|
|
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
|
|
double center_knob, double width_change_knob) const {
|
|
if (num_splits_ == 0) {
|
|
return 0.0f;
|
|
}
|
|
for (int s = 1; s < num_splits_; ++s) {
|
|
splits_[s].SplitOutline();
|
|
}
|
|
float full_priority =
|
|
priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob,
|
|
width_change_knob);
|
|
for (int s = num_splits_ - 1; s >= 1; --s) {
|
|
splits_[s].UnsplitOutlines();
|
|
}
|
|
return full_priority;
|
|
}
|
|
|
|
/**
|
|
* @name start_seam_list
|
|
*
|
|
* Initialize a list of seams that match the original number of blobs
|
|
* present in the starting segmentation. Each of the seams created
|
|
* by this routine have location information only.
|
|
*/
|
|
void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) {
|
|
seam_array->clear();
|
|
TPOINT location;
|
|
|
|
for (int b = 1; b < word->NumBlobs(); ++b) {
|
|
TBOX bbox = word->blobs[b - 1]->bounding_box();
|
|
TBOX nbox = word->blobs[b]->bounding_box();
|
|
location.x = (bbox.right() + nbox.left()) / 2;
|
|
location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
|
|
seam_array->push_back(new SEAM(0.0f, location));
|
|
}
|
|
}
|
|
|
|
} // namespace tesseract
|