diff options
Diffstat (limited to 'tesseract/src/training/pango/stringrenderer.h')
-rw-r--r-- | tesseract/src/training/pango/stringrenderer.h | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/tesseract/src/training/pango/stringrenderer.h b/tesseract/src/training/pango/stringrenderer.h new file mode 100644 index 00000000..49f1c3fd --- /dev/null +++ b/tesseract/src/training/pango/stringrenderer.h @@ -0,0 +1,227 @@ +/********************************************************************** + * File: stringrenderer.h + * Description: Class for rendering UTF-8 text to an image, and retrieving + * bounding boxes around each grapheme cluster. + * + * Instances are created using a font description string + * (eg. "Arial Italic 12"; see pango_font_info.h for the format) + * and the page dimensions. Other renderer properties such as + * spacing, ligaturization, as well a preprocessing behavior such + * as removal of unrenderable words and a special n-gram mode may + * be set using respective set_* methods. + * + * Author: Ranjith Unnikrishnan + * + * (C) Copyright 2013, Google Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + **********************************************************************/ + +#ifndef TESSERACT_TRAINING_STRINGRENDERER_H_ +#define TESSERACT_TRAINING_STRINGRENDERER_H_ + +#include "export.h" + +#include "pango_font_info.h" +#include "pango/pango-layout.h" +#include "pango/pangocairo.h" + +#include <string> +#include <unordered_map> +#include <vector> + +struct Boxa; +struct Pix; + +#ifdef _MSC_VER +# define strdup(s) _strdup(s) +#endif + +namespace tesseract { + +class BoxChar; + +class TESS_PANGO_TRAINING_API StringRenderer { + public: + StringRenderer(const std::string& font_desc, int page_width, int page_height); + ~StringRenderer(); + + // Renders the text with the chosen font and returns the byte offset up to + // which the text could be rendered so as to fit the specified page + // dimensions. + int RenderToImage(const char* text, int text_length, Pix** pix); + int RenderToGrayscaleImage(const char* text, int text_length, Pix** pix); + int RenderToBinaryImage(const char* text, int text_length, int threshold, + Pix** pix); + // Renders a line of text with all available fonts that were able to render + // at least min_coverage fraction of the input text. Use 1.0 to require that + // a font be able to render all the text. + int RenderAllFontsToImage(double min_coverage, const char* text, + int text_length, std::string* font_used, Pix** pix); + + bool set_font(const std::string& desc); + // Char spacing is in PIXELS!!!!. + void set_char_spacing(int char_spacing) { char_spacing_ = char_spacing; } + void set_leading(int leading) { + leading_ = leading; + } + void set_resolution(const int resolution); + void set_vertical_text(bool vertical_text) { + vertical_text_ = vertical_text; + } + void set_gravity_hint_strong(bool gravity_hint_strong) { + gravity_hint_strong_ = gravity_hint_strong; + } + void set_render_fullwidth_latin(bool render_fullwidth_latin) { + render_fullwidth_latin_ = render_fullwidth_latin; + } + // Sets the probability (value in [0, 1]) of starting to render a word with an + // underline. This implementation consider words to be space-delimited + // sequences of characters. + void set_underline_start_prob(const double frac); + // Set the probability (value in [0, 1]) of continuing a started underline to + // the next word. + void set_underline_continuation_prob(const double frac); + void set_underline_style(const PangoUnderline style) { + underline_style_ = style; + } + void set_features(const char* features) { + free(features_); + features_ = strdup(features); + } + void set_page(int page) { + page_ = page; + } + void set_box_padding(int val) { + box_padding_ = val; + } + void set_drop_uncovered_chars(bool val) { + drop_uncovered_chars_ = val; + } + void set_strip_unrenderable_words(bool val) { + strip_unrenderable_words_ = val; + } + void set_output_word_boxes(bool val) { + output_word_boxes_ = val; + } + // Before rendering the string, replace latin characters with their optional + // ligatured forms (such as "fi", "ffi" etc.) if the font_ covers those + // unicodes. + void set_add_ligatures(bool add_ligatures) { + add_ligatures_ = add_ligatures; + } + // Set the rgb value of the text ink. Values range in [0, 1.0] + void set_pen_color(double r, double g, double b) { + pen_color_[0] = r; + pen_color_[1] = g; + pen_color_[2] = b; + } + void set_h_margin(const int h_margin) { + h_margin_ = h_margin; + } + void set_v_margin(const int v_margin) { + v_margin_ = v_margin; + } + const PangoFontInfo& font() const { + return font_; + } + int h_margin() const { return h_margin_; } + int v_margin() const { return v_margin_; } + + // Get the boxchars of all clusters rendered thus far (or since the last call + // to ClearBoxes()). + const std::vector<BoxChar*>& GetBoxes() const; + // Get the rendered page bounding boxes of all pages created thus far (or + // since last call to ClearBoxes()). + Boxa* GetPageBoxes() const; + + // Rotate the boxes on the most recent page by the given rotation. + void RotatePageBoxes(float rotation); + // Delete all boxes. + void ClearBoxes(); + // Returns the boxes in a boxfile string. + std::string GetBoxesStr(); + // Writes the boxes to a boxfile. + void WriteAllBoxes(const std::string& filename); + // Removes space-delimited words from the string that are not renderable by + // the current font and returns the count of such words. + int StripUnrenderableWords(std::string* utf8_text) const; + + // Insert a Word Joiner symbol (U+2060) between adjacent characters, excluding + // spaces and combining types, in each word before rendering to ensure words + // are not broken across lines. The output boxchars will not contain the + // joiner. + static std::string InsertWordJoiners(const std::string& text); + + // Helper functions to convert fullwidth Latin and halfwidth Basic Latin. + static std::string ConvertBasicLatinToFullwidthLatin(const std::string& text); + static std::string ConvertFullwidthLatinToBasicLatin(const std::string& text); + + protected: + // Init and free local renderer objects. + void InitPangoCairo(); + void FreePangoCairo(); + // Set rendering properties. + void SetLayoutProperties(); + void SetWordUnderlineAttributes(const std::string& page_text); + // Compute bounding boxes around grapheme clusters. + void ComputeClusterBoxes(); + void CorrectBoxPositionsToLayout(std::vector<BoxChar*>* boxchars); + bool GetClusterStrings(std::vector<std::string>* cluster_text); + int FindFirstPageBreakOffset(const char* text, int text_length); + + PangoFontInfo font_; + // Page properties + int page_width_, page_height_, h_margin_, v_margin_; + // Text rendering properties + double pen_color_[3]; + int char_spacing_; + int leading_, resolution_; + bool vertical_text_; + bool gravity_hint_strong_; + bool render_fullwidth_latin_; + double underline_start_prob_; + double underline_continuation_prob_; + PangoUnderline underline_style_; + char* features_; + // Text filtering options + bool drop_uncovered_chars_; + bool strip_unrenderable_words_; + bool add_ligatures_; + bool output_word_boxes_; + // Pango and cairo specific objects + cairo_surface_t* surface_; + cairo_t* cr_; + PangoLayout* layout_; + // Internal state of current page number, updated on successive calls to + // RenderToImage() + int start_box_; + int page_; + // Boxes and associated text for all pages rendered with RenderToImage() since + // the last call to ClearBoxes(). + std::vector<BoxChar*> boxchars_; + int box_padding_; + // Bounding boxes for pages since the last call to ClearBoxes(). + Boxa* page_boxes_; + + // Objects cached for subsequent calls to RenderAllFontsToImage() + std::unordered_map<char32, int64_t> char_map_; // Time-saving char histogram. + int total_chars_; // Number in the string to be rendered. + unsigned int font_index_; // Index of next font to use in font list. + int last_offset_; // Offset returned from last successful rendering + + private: + StringRenderer(const StringRenderer&); + void operator=(const StringRenderer&); +}; +} // namespace tesseract + +#endif // THIRD_PARTY_TESSERACT_TRAINING_STRINGRENDERER_H_ |