1 files changed, 188 insertions, 0 deletions
diff --git a/tesseract/include/tesseract/thresholder.h b/tesseract/include/tesseract/thresholder.h
new file mode 100644
index 00000000..4c6ab998
--- /dev/null
+++ b/tesseract/include/tesseract/thresholder.h
@@ -0,0 +1,188 @@
+///////////////////////////////////////////////////////////////////////
+// File:        thresholder.h
+// Description: Base API for thresholding images in tesseract.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
+#define TESSERACT_CCMAIN_THRESHOLDER_H_
+
+#include "export.h"
+#include "publictypes.h"
+
+struct Pix;
+
+namespace tesseract {
+
+/// Base class for all tesseract image thresholding classes.
+/// Specific classes can add new thresholding methods by
+/// overriding ThresholdToPix.
+/// Each instance deals with a single image, but the design is intended to
+/// be useful for multiple calls to SetRectangle and ThresholdTo* if
+/// desired.
+class TESS_API ImageThresholder {
+ public:
+  ImageThresholder();
+  virtual ~ImageThresholder();
+
+  /// Destroy the Pix if there is one, freeing memory.
+  virtual void Clear();
+
+  /// Return true if no image has been set.
+  bool IsEmpty() const;
+
+  /// SetImage makes a copy of all the image data, so it may be deleted
+  /// immediately after this call.
+  /// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
+  /// Palette color images will not work properly and must be converted to
+  /// 24 bit.
+  /// Binary images of 1 bit per pixel may also be given but they must be
+  /// byte packed with the MSB of the first byte being the first pixel, and a
+  /// one pixel is WHITE. For binary images set bytes_per_pixel=0.
+  void SetImage(const unsigned char* imagedata, int width, int height,
+                int bytes_per_pixel, int bytes_per_line);
+
+  /// Store the coordinates of the rectangle to process for later use.
+  /// Doesn't actually do any thresholding.
+  void SetRectangle(int left, int top, int width, int height);
+
+  /// Get enough parameters to be able to rebuild bounding boxes in the
+  /// original image (not just within the rectangle).
+  /// Left and top are enough with top-down coordinates, but
+  /// the height of the rectangle and the image are needed for bottom-up.
+  virtual void GetImageSizes(int* left, int* top, int* width, int* height,
+                             int* imagewidth, int* imageheight);
+
+  /// Return true if the source image is color.
+  bool IsColor() const {
+    return pix_channels_ >= 3;
+  }
+
+  /// Returns true if the source image is binary.
+  bool IsBinary() const {
+    return pix_channels_ == 0;
+  }
+
+  int GetScaleFactor() const {
+    return scale_;
+  }
+
+  // Set the resolution of the source image in pixels per inch.
+  // This should be called right after SetImage(), and will let us return
+  // appropriate font sizes for the text.
+  void SetSourceYResolution(int ppi) {
+    yres_ = ppi;
+    estimated_res_ = ppi;
+  }
+  int GetSourceYResolution() const {
+    return yres_;
+  }
+  int GetScaledYResolution() const {
+    return scale_ * yres_;
+  }
+  // Set the resolution of the source image in pixels per inch, as estimated
+  // by the thresholder from the text size found during thresholding.
+  // This value will be used to set internal size thresholds during recognition
+  // and will not influence the output "point size." The default value is
+  // the same as the source resolution. (yres_)
+  void SetEstimatedResolution(int ppi) {
+    estimated_res_ = ppi;
+  }
+  // Returns the estimated resolution, including any active scaling.
+  // This value will be used to set internal size thresholds during recognition.
+  int GetScaledEstimatedResolution() const {
+    return scale_ * estimated_res_;
+  }
+
+  /// Pix vs raw, which to use? Pix is the preferred input for efficiency,
+  /// since raw buffers are copied.
+  /// SetImage for Pix clones its input, so the source pix may be pixDestroyed
+  /// immediately after, but may not go away until after the Thresholder has
+  /// finished with it.
+  void SetImage(const Pix* pix);
+
+  /// Threshold the source image as efficiently as possible to the output Pix.
+  /// Creates a Pix and sets pix to point to the resulting pointer.
+  /// Caller must use pixDestroy to free the created Pix.
+  /// Returns false on error.
+  virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
+
+  // Gets a pix that contains an 8 bit threshold value at each pixel. The
+  // returned pix may be an integer reduction of the binary image such that
+  // the scale factor may be inferred from the ratio of the sizes, even down
+  // to the extreme of a 1x1 pixel thresholds image.
+  // Ideally the 8 bit threshold should be the exact threshold used to generate
+  // the binary image in ThresholdToPix, but this is not a hard constraint.
+  // Returns nullptr if the input is binary. PixDestroy after use.
+  virtual Pix* GetPixRectThresholds();
+
+  /// Get a clone/copy of the source image rectangle.
+  /// The returned Pix must be pixDestroyed.
+  /// This function will be used in the future by the page layout analysis, and
+  /// the layout analysis that uses it will only be available with Leptonica,
+  /// so there is no raw equivalent.
+  Pix* GetPixRect();
+
+  // Get a clone/copy of the source image rectangle, reduced to greyscale,
+  // and at the same resolution as the output binary.
+  // The returned Pix must be pixDestroyed.
+  // Provided to the classifier to extract features from the greyscale image.
+  virtual Pix* GetPixRectGrey();
+
+ protected:
+  // ----------------------------------------------------------------------
+  // Utility functions that may be useful components for other thresholders.
+
+  /// Common initialization shared between SetImage methods.
+  virtual void Init();
+
+  /// Return true if we are processing the full image.
+  bool IsFullImage() const {
+    return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ &&
+           rect_height_ == image_height_;
+  }
+
+  // Otsu thresholds the rectangle, taking the rectangle from *this.
+  void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
+
+  /// Threshold the rectangle, taking everything except the src_pix
+  /// from the class, using thresholds/hi_values to the output pix.
+  /// NOTE that num_channels is the size of the thresholds and hi_values
+  // arrays and also the bytes per pixel in src_pix.
+  void ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds,
+                          const int* hi_values, Pix** pix) const;
+
+ protected:
+  /// Clone or other copy of the source Pix.
+  /// The pix will always be PixDestroy()ed on destruction of the class.
+  Pix* pix_;
+
+  int image_width_;   ///< Width of source pix_.
+  int image_height_;  ///< Height of source pix_.
+  int pix_channels_;  ///< Number of 8-bit channels in pix_.
+  int pix_wpl_;       ///< Words per line of pix_.
+  // Limits of image rectangle to be processed.
+  int scale_;          ///< Scale factor from original image.
+  int yres_;           ///< y pixels/inch in source image.
+  int estimated_res_;  ///< Resolution estimate from text size.
+  int rect_left_;
+  int rect_top_;
+  int rect_width_;
+  int rect_height_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_THRESHOLDER_H_