summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'tesseract/src/textord/devanagari_processing.h')
-rw-r--r--tesseract/src/textord/devanagari_processing.h210
1 files changed, 210 insertions, 0 deletions
diff --git a/tesseract/src/textord/devanagari_processing.h b/tesseract/src/textord/devanagari_processing.h
new file mode 100644
index 00000000..cd0bfeb6
--- /dev/null
+++ b/tesseract/src/textord/devanagari_processing.h
@@ -0,0 +1,210 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+// Author: shobhitsaxena@google.com (Shobhit Saxena)
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
+#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
+
+#include "allheaders.h"
+#include "ocrblock.h"
+#include "params.h"
+
+struct Pix;
+struct Box;
+struct Boxa;
+
+namespace tesseract {
+
+extern
+INT_VAR_H(devanagari_split_debuglevel, 0,
+ "Debug level for split shiro-rekha process.");
+
+extern
+BOOL_VAR_H(devanagari_split_debugimage, 0,
+ "Whether to create a debug image for split shiro-rekha process.");
+
+class TBOX;
+class DebugPixa;
+
+class PixelHistogram {
+ public:
+ PixelHistogram() {
+ hist_ = nullptr;
+ length_ = 0;
+ }
+
+ ~PixelHistogram() {
+ Clear();
+ }
+
+ void Clear() {
+ delete[] hist_;
+ length_ = 0;
+ }
+
+ int* hist() const { return hist_; }
+
+ int length() const {
+ return length_;
+ }
+
+ // Methods to construct histograms from images. These clear any existing data.
+ void ConstructVerticalCountHist(Pix* pix);
+ void ConstructHorizontalCountHist(Pix* pix);
+
+ // This method returns the global-maxima for the histogram. The frequency of
+ // the global maxima is returned in count, if specified.
+ int GetHistogramMaximum(int* count) const;
+
+ private:
+ int* hist_;
+ int length_;
+};
+
+class ShiroRekhaSplitter {
+ public:
+ enum SplitStrategy {
+ NO_SPLIT = 0, // No splitting is performed for the phase.
+ MINIMAL_SPLIT, // Blobs are split minimally.
+ MAXIMAL_SPLIT // Blobs are split maximally.
+ };
+
+ ShiroRekhaSplitter();
+ virtual ~ShiroRekhaSplitter();
+
+ // Top-level method to perform splitting based on current settings.
+ // Returns true if a split was actually performed.
+ // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
+ // splitting. If false, the ocr_split_strategy_ is used.
+ bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
+
+ // Clears the memory held by this object.
+ void Clear();
+
+ // Refreshes the words in the segmentation block list by using blobs in the
+ // input blob list.
+ // The segmentation block list must be set.
+ void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
+
+ // Returns true if the split strategies for pageseg and ocr are different.
+ bool HasDifferentSplitStrategies() const {
+ return pageseg_split_strategy_ != ocr_split_strategy_;
+ }
+
+ // This only keeps a copy of the block list pointer. At split call, the list
+ // object should still be alive. This block list is used as a golden
+ // segmentation when performing splitting.
+ void set_segmentation_block_list(BLOCK_LIST* block_list) {
+ segmentation_block_list_ = block_list;
+ }
+
+ static const int kUnspecifiedXheight = -1;
+
+ void set_global_xheight(int xheight) {
+ global_xheight_ = xheight;
+ }
+
+ void set_perform_close(bool perform) {
+ perform_close_ = perform;
+ }
+
+ // Returns the image obtained from shiro-rekha splitting. The returned object
+ // is owned by this class. Callers may want to clone the returned pix to keep
+ // it alive beyond the life of ShiroRekhaSplitter object.
+ Pix* splitted_image() {
+ return splitted_image_;
+ }
+
+ // On setting the input image, a clone of it is owned by this class.
+ void set_orig_pix(Pix* pix);
+
+ // Returns the input image provided to the object. This object is owned by
+ // this class. Callers may want to clone the returned pix to work with it.
+ Pix* orig_pix() {
+ return orig_pix_;
+ }
+
+ SplitStrategy ocr_split_strategy() const {
+ return ocr_split_strategy_;
+ }
+
+ void set_ocr_split_strategy(SplitStrategy strategy) {
+ ocr_split_strategy_ = strategy;
+ }
+
+ SplitStrategy pageseg_split_strategy() const {
+ return pageseg_split_strategy_;
+ }
+
+ void set_pageseg_split_strategy(SplitStrategy strategy) {
+ pageseg_split_strategy_ = strategy;
+ }
+
+ BLOCK_LIST* segmentation_block_list() {
+ return segmentation_block_list_;
+ }
+
+ // This method returns the computed mode-height of blobs in the pix.
+ // It also prunes very small blobs from calculation. Could be used to provide
+ // a global xheight estimate for images which have the same point-size text.
+ static int GetModeHeight(Pix* pix);
+
+ private:
+ // Method to perform a close operation on the input image. The xheight
+ // estimate decides the size of sel used.
+ static void PerformClose(Pix* pix, int xheight_estimate);
+
+ // This method resolves the cc bbox to a particular row and returns the row's
+ // xheight. This uses block_list_ if available, else just returns the
+ // global_xheight_ estimate currently set in the object.
+ int GetXheightForCC(Box* cc_bbox);
+
+ // Returns a list of regions (boxes) which should be cleared in the original
+ // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
+ // (or less) word only. Xheight measure could be the global estimate, the row
+ // estimate, or unspecified. If unspecified, over splitting may occur, since a
+ // conservative estimate of stroke width along with an associated multiplier
+ // is used in its place. It is advisable to have a specified xheight when
+ // splitting for classification/training.
+ void SplitWordShiroRekha(SplitStrategy split_strategy,
+ Pix* pix,
+ int xheight,
+ int word_left,
+ int word_top,
+ Boxa* regions_to_clear);
+
+ // Returns a new box object for the corresponding TBOX, based on the original
+ // image's coordinate system.
+ Box* GetBoxForTBOX(const TBOX& tbox) const;
+
+ // This method returns y-extents of the shiro-rekha computed from the input
+ // word image.
+ static void GetShiroRekhaYExtents(Pix* word_pix,
+ int* shirorekha_top,
+ int* shirorekha_bottom,
+ int* shirorekha_ylevel);
+
+ Pix* orig_pix_; // Just a clone of the input image passed.
+ Pix* splitted_image_; // Image produced after the last splitting round. The
+ // object is owned by this class.
+ SplitStrategy pageseg_split_strategy_;
+ SplitStrategy ocr_split_strategy_;
+ Pix* debug_image_;
+ // This block list is used as a golden segmentation when performing splitting.
+ BLOCK_LIST* segmentation_block_list_;
+ int global_xheight_;
+ bool perform_close_; // Whether a morphological close operation should be
+ // performed before CCs are run through splitting.
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_