diff options
Diffstat (limited to 'tesseract/unittest/textlineprojection_test.cc')
-rw-r--r-- | tesseract/unittest/textlineprojection_test.cc | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/tesseract/unittest/textlineprojection_test.cc b/tesseract/unittest/textlineprojection_test.cc new file mode 100644 index 00000000..f8423615 --- /dev/null +++ b/tesseract/unittest/textlineprojection_test.cc @@ -0,0 +1,262 @@ +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <allheaders.h> +#include <string> // for std::string + +#include "absl/strings/str_format.h" // for absl::StrFormat +#include "include_gunit.h" + +#include <tesseract/baseapi.h> +#include "colfind.h" +#include "log.h" // for LOG +#include "mutableiterator.h" +#include <tesseract/osdetect.h> +#include "pageres.h" +#include "tesseractclass.h" +#include "textlineprojection.h" + +namespace tesseract { + +// Minimum score for a STRONG_CHAIN textline. +// NOTE: Keep in sync with textlineprojection.cc. +const int kMinStrongTextValue = 6; + +// The fixture for testing Tesseract. +class TextlineProjectionTest : public testing::Test { + protected: + std::string OutputNameToPath(const std::string& name) { + file::MakeTmpdir(); + return file::JoinPath(FLAGS_test_tmpdir, name); + } + + TextlineProjectionTest() { + src_pix_ = nullptr; + bin_pix_ = nullptr; + tesseract_ = nullptr; + finder_ = nullptr; + denorm_ = nullptr; + projection_ = nullptr; + } + virtual ~TextlineProjectionTest() { + pixDestroy(&src_pix_); + pixDestroy(&bin_pix_); + delete finder_; + delete tesseract_; + } + + void SetImage(const char* filename) { + pixDestroy(&src_pix_); + src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str()); + api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY); + api_.SetPageSegMode(tesseract::PSM_AUTO_OSD); + api_.SetImage(src_pix_); + } + + // Ugly hacked-together function sets up projection_ and denorm_ by setting + // up for auto pagelayout, setting up a ColumnFinder, running it, and + // using accessors to get at the internal denorm and projection. + // If the coordinates have been rotated, the denorm should match + // correctly and transform coordinates back to the projection. + // We throw away all the blocks, blobs etc, and test the projection with + // the resultiterator from a separate BaseAPI run. + void SetupProjection() { + tesseract::TessdataManager mgr; + Tesseract* osd_tess = new Tesseract; + OSResults osr; + EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, nullptr, "osd", + tesseract::OEM_TESSERACT_ONLY, nullptr, 0, + nullptr, nullptr, false, &mgr), + 0); + tesseract_ = new Tesseract; + EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, nullptr, "eng", + tesseract::OEM_TESSERACT_ONLY, nullptr, 0, + nullptr, nullptr, false, &mgr), + 0); + bin_pix_ = api_.GetThresholdedImage(); + *tesseract_->mutable_pix_binary() = pixClone(bin_pix_); + osd_tess->set_source_resolution(api_.tesseract()->source_resolution()); + tesseract_->set_source_resolution(api_.tesseract()->source_resolution()); + int width = pixGetWidth(bin_pix_); + int height = pixGetHeight(bin_pix_); + // First make a single block covering the whole image. + BLOCK* block = new BLOCK("", true, 0, 0, 0, 0, width, height); + block->set_right_to_left(false); + BLOCK_LIST src_blocks; + BLOCK_IT block_it(&src_blocks); + block_it.add_to_end(block); + Pix* photomask_pix = nullptr; + // The blocks made by the ColumnFinder. Moved to blocks before return. + BLOCK_LIST found_blocks; + TO_BLOCK_LIST temp_blocks; + finder_ = tesseract_->SetupPageSegAndDetectOrientation( + tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess, &osr, &temp_blocks, + &photomask_pix, nullptr); + TO_BLOCK_IT to_block_it(&temp_blocks); + TO_BLOCK* to_block = to_block_it.data(); + denorm_ = finder_->denorm(); + TO_BLOCK_LIST to_blocks; + BLOBNBOX_LIST diacritic_blobs; + EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, + photomask_pix, nullptr, nullptr, nullptr, + &found_blocks, &diacritic_blobs, &to_blocks), + 0); + projection_ = finder_->projection(); + pixDestroy(&photomask_pix); + delete osd_tess; + } + + // Helper evaluates the given box, expects the result to be greater_than + // or !greater_than the target_value and provides diagnostics if not. + void EvaluateBox(const TBOX& box, bool greater_or_equal, int target_value, + const char* text, const char* message) { + int value = projection_->EvaluateBox(box, denorm_, false); + if (greater_or_equal != (value > target_value)) { + LOG(INFO) << absl::StrFormat( + "EvaluateBox too %s:%d vs %d for %s word '%s' at:", + greater_or_equal ? "low" : "high", value, target_value, message, + text); + box.print(); + value = projection_->EvaluateBox(box, denorm_, true); + } else { + LOG(INFO) << absl::StrFormat("EvaluateBox OK(%d) for %s word '%s'", + value, message, text); + } + if (greater_or_equal) { + EXPECT_GE(value, target_value); + } else { + EXPECT_LT(value, target_value); + } + } + + // Helper evaluates the DistanceOfBoxFromBox function by expecting that + // box should be nearer to true_box than false_box. + void EvaluateDistance(const TBOX& box, const TBOX& true_box, + const TBOX& false_box, const char* text, + const char* message) { + int true_dist = + projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false); + int false_dist = + projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false); + if (false_dist <= true_dist) { + LOG(INFO) << absl::StrFormat( + "Distance wrong:%d vs %d for %s word '%s' at:", + false_dist, true_dist, message, text); + true_box.print(); + projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true); + projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true); + } else { + LOG(INFO) << absl::StrFormat("Distance OK(%d vs %d) for %s word '%s'", + false_dist, true_dist, message, text); + } + } + + // Tests the projection on the word boxes of the given image. + // line_height is the cap + descender size of the text. + void VerifyBoxes(const char* imagefile, int line_height) { + SetImage(imagefile); + api_.Recognize(nullptr); + SetupProjection(); + MutableIterator* it = api_.GetMutableIterator(); + do { + char* text = it->GetUTF8Text(tesseract::RIL_WORD); + const PAGE_RES_IT* pr_it = it->PageResIt(); + WERD_RES* word = pr_it->word(); + // The word_box refers to the internal, possibly rotated, coords. + TBOX word_box = word->word->bounding_box(); + bool small_word = word_box.height() * 1.5 < line_height; + bool tall_word = word_box.height() * 1.125 > line_height; + // We pad small and tall words differently because ascenders and + // descenders affect the position and size of the upper/lower boxes. + int padding; + if (small_word) { + padding = word_box.height(); + } else if (tall_word) { + padding = word_box.height() / 3; + } else { + padding = word_box.height() / 2; + } + // Test that the word box gets a good score. + EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word"); + + // Now test a displaced box, both above and below the word. + TBOX upper_box(word_box); + upper_box.set_bottom(word_box.top()); + upper_box.set_top(word_box.top() + padding); + EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word"); + EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical"); + TBOX lower_box = word_box; + lower_box.set_top(word_box.bottom()); + lower_box.set_bottom(word_box.bottom() - padding); + if (tall_word) lower_box.move(ICOORD(0, padding / 2)); + EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word"); + EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical"); + + // Since some words have no text below and some words have no text above + // check that at least one of the boxes satisfies BoxOutOfTextline. + bool upper_or_lower_out_of_textline = + projection_->BoxOutOfHTextline(upper_box, denorm_, false) || + projection_->BoxOutOfHTextline(lower_box, denorm_, false); + if (!upper_or_lower_out_of_textline) { + projection_->BoxOutOfHTextline(upper_box, denorm_, true); + projection_->BoxOutOfHTextline(lower_box, denorm_, true); + } + EXPECT_TRUE(upper_or_lower_out_of_textline); + + // Now test DistanceOfBoxFromBox by faking a challenger word, and asking + // that each pad box be nearer to its true textline than the + // challenger. Due to the tight spacing of latin text, getting + // the right position and size of these test boxes is quite fiddly. + padding = line_height / 4; + upper_box.set_top(upper_box.bottom() + padding); + TBOX target_box(word_box); + if (!small_word) { + upper_box.move(ICOORD(0, -padding * 3 / 2)); + } + target_box.set_top(upper_box.bottom()); + TBOX upper_challenger(upper_box); + upper_challenger.set_bottom(upper_box.top()); + upper_challenger.set_top(upper_box.top() + word_box.height()); + EvaluateDistance(upper_box, target_box, upper_challenger, text, + "Upper Word"); + if (tall_word) lower_box.move(ICOORD(0, padding / 2)); + lower_box.set_bottom(lower_box.top() - padding); + target_box = word_box; + target_box.set_bottom(lower_box.top()); + TBOX lower_challenger(lower_box); + lower_challenger.set_top(lower_box.bottom()); + lower_challenger.set_bottom(lower_box.bottom() - word_box.height()); + EvaluateDistance(lower_box, target_box, lower_challenger, text, + "Lower Word"); + + delete[] text; + } while (it->Next(tesseract::RIL_WORD)); + delete it; + } + + Pix* src_pix_; + Pix* bin_pix_; + BLOCK_LIST blocks_; + std::string ocr_text_; + tesseract::TessBaseAPI api_; + Tesseract* tesseract_; + ColumnFinder* finder_; + const DENORM* denorm_; + const TextlineProjection* projection_; +}; + +// Tests all word boxes on an unrotated image. +TEST_F(TextlineProjectionTest, Unrotated) { VerifyBoxes("phototest.tif", 31); } + +// Tests character-level applyboxes on italic Times New Roman. +TEST_F(TextlineProjectionTest, Rotated) { VerifyBoxes("phototestrot.tif", 31); } + +} // namespace |