diff options
Diffstat (limited to 'tesseract/include/tesseract/thresholder.h')
-rw-r--r-- | tesseract/include/tesseract/thresholder.h | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/tesseract/include/tesseract/thresholder.h b/tesseract/include/tesseract/thresholder.h new file mode 100644 index 00000000..4c6ab998 --- /dev/null +++ b/tesseract/include/tesseract/thresholder.h @@ -0,0 +1,188 @@ +/////////////////////////////////////////////////////////////////////// +// File: thresholder.h +// Description: Base API for thresholding images in tesseract. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCMAIN_THRESHOLDER_H_ +#define TESSERACT_CCMAIN_THRESHOLDER_H_ + +#include "export.h" +#include "publictypes.h" + +struct Pix; + +namespace tesseract { + +/// Base class for all tesseract image thresholding classes. +/// Specific classes can add new thresholding methods by +/// overriding ThresholdToPix. +/// Each instance deals with a single image, but the design is intended to +/// be useful for multiple calls to SetRectangle and ThresholdTo* if +/// desired. +class TESS_API ImageThresholder { + public: + ImageThresholder(); + virtual ~ImageThresholder(); + + /// Destroy the Pix if there is one, freeing memory. + virtual void Clear(); + + /// Return true if no image has been set. + bool IsEmpty() const; + + /// SetImage makes a copy of all the image data, so it may be deleted + /// immediately after this call. + /// Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + /// Palette color images will not work properly and must be converted to + /// 24 bit. + /// Binary images of 1 bit per pixel may also be given but they must be + /// byte packed with the MSB of the first byte being the first pixel, and a + /// one pixel is WHITE. For binary images set bytes_per_pixel=0. + void SetImage(const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line); + + /// Store the coordinates of the rectangle to process for later use. + /// Doesn't actually do any thresholding. + void SetRectangle(int left, int top, int width, int height); + + /// Get enough parameters to be able to rebuild bounding boxes in the + /// original image (not just within the rectangle). + /// Left and top are enough with top-down coordinates, but + /// the height of the rectangle and the image are needed for bottom-up. + virtual void GetImageSizes(int* left, int* top, int* width, int* height, + int* imagewidth, int* imageheight); + + /// Return true if the source image is color. + bool IsColor() const { + return pix_channels_ >= 3; + } + + /// Returns true if the source image is binary. + bool IsBinary() const { + return pix_channels_ == 0; + } + + int GetScaleFactor() const { + return scale_; + } + + // Set the resolution of the source image in pixels per inch. + // This should be called right after SetImage(), and will let us return + // appropriate font sizes for the text. + void SetSourceYResolution(int ppi) { + yres_ = ppi; + estimated_res_ = ppi; + } + int GetSourceYResolution() const { + return yres_; + } + int GetScaledYResolution() const { + return scale_ * yres_; + } + // Set the resolution of the source image in pixels per inch, as estimated + // by the thresholder from the text size found during thresholding. + // This value will be used to set internal size thresholds during recognition + // and will not influence the output "point size." The default value is + // the same as the source resolution. (yres_) + void SetEstimatedResolution(int ppi) { + estimated_res_ = ppi; + } + // Returns the estimated resolution, including any active scaling. + // This value will be used to set internal size thresholds during recognition. + int GetScaledEstimatedResolution() const { + return scale_ * estimated_res_; + } + + /// Pix vs raw, which to use? Pix is the preferred input for efficiency, + /// since raw buffers are copied. + /// SetImage for Pix clones its input, so the source pix may be pixDestroyed + /// immediately after, but may not go away until after the Thresholder has + /// finished with it. + void SetImage(const Pix* pix); + + /// Threshold the source image as efficiently as possible to the output Pix. + /// Creates a Pix and sets pix to point to the resulting pointer. + /// Caller must use pixDestroy to free the created Pix. + /// Returns false on error. + virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix** pix); + + // Gets a pix that contains an 8 bit threshold value at each pixel. The + // returned pix may be an integer reduction of the binary image such that + // the scale factor may be inferred from the ratio of the sizes, even down + // to the extreme of a 1x1 pixel thresholds image. + // Ideally the 8 bit threshold should be the exact threshold used to generate + // the binary image in ThresholdToPix, but this is not a hard constraint. + // Returns nullptr if the input is binary. PixDestroy after use. + virtual Pix* GetPixRectThresholds(); + + /// Get a clone/copy of the source image rectangle. + /// The returned Pix must be pixDestroyed. + /// This function will be used in the future by the page layout analysis, and + /// the layout analysis that uses it will only be available with Leptonica, + /// so there is no raw equivalent. + Pix* GetPixRect(); + + // Get a clone/copy of the source image rectangle, reduced to greyscale, + // and at the same resolution as the output binary. + // The returned Pix must be pixDestroyed. + // Provided to the classifier to extract features from the greyscale image. + virtual Pix* GetPixRectGrey(); + + protected: + // ---------------------------------------------------------------------- + // Utility functions that may be useful components for other thresholders. + + /// Common initialization shared between SetImage methods. + virtual void Init(); + + /// Return true if we are processing the full image. + bool IsFullImage() const { + return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ && + rect_height_ == image_height_; + } + + // Otsu thresholds the rectangle, taking the rectangle from *this. + void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const; + + /// Threshold the rectangle, taking everything except the src_pix + /// from the class, using thresholds/hi_values to the output pix. + /// NOTE that num_channels is the size of the thresholds and hi_values + // arrays and also the bytes per pixel in src_pix. + void ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds, + const int* hi_values, Pix** pix) const; + + protected: + /// Clone or other copy of the source Pix. + /// The pix will always be PixDestroy()ed on destruction of the class. + Pix* pix_; + + int image_width_; ///< Width of source pix_. + int image_height_; ///< Height of source pix_. + int pix_channels_; ///< Number of 8-bit channels in pix_. + int pix_wpl_; ///< Words per line of pix_. + // Limits of image rectangle to be processed. + int scale_; ///< Scale factor from original image. + int yres_; ///< y pixels/inch in source image. + int estimated_res_; ///< Resolution estimate from text size. + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_CCMAIN_THRESHOLDER_H_ |