ccmain/baseapi.h

Go to the documentation of this file.
00001 
00002 // File:        baseapi.h
00003 // Description: Simple API for calling tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Fri Oct 06 15:35:01 PDT 2006
00006 //
00007 // (C) Copyright 2006, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef THIRD_PARTY_TESSERACT_CCMAIN_BASEAPI_H__
00021 #define THIRD_PARTY_TESSERACT_CCMAIN_BASEAPI_H__
00022 
00023 #include <string>
00024 
00025 #include "host.h"
00026 #include "ocrclass.h"
00027 
00028 class PAGE_RES;
00029 class BLOCK_LIST;
00030 
00031 // Base class for all tesseract APIs.
00032 // Specific classes can add ability to work on different inputs or produce
00033 // different outputs.
00034 
00035 class TessBaseAPI {
00036  public:
00037   // Start tesseract.
00038   // The datapath must be the name of the data directory or some other file
00039   // in which the data directory resides (for instance argv[0].)
00040   // The configfile is the name of a file in the tessconfigs directory
00041   // (eg batch) or NULL to run on defaults.
00042   // Outputbase may also be NULL, and is the basename of various output files.
00043   // If the output of any of these files is enabled, then a name must be given.
00044   // If numeric_mode is true, only possible digits and roman numbers are
00045   // returned. Returns 0 if successful. Crashes if not.
00046   // The argc and argv may be 0 and NULL respectively. They are used for
00047   // providing config files for debug/display purposes.
00048   // TODO(rays) get the facts straight. Is it OK to call
00049   // it more than once? Make it properly check for errors and return them.
00050   static int Init(const char* datapath, const char* outputbase,
00051                   const char* configfile, bool numeric_mode,
00052                   int argc, char* argv[]);
00053 
00054   // Recognize a rectangle from an image and return the result as a string.
00055   // May be called many times for a single Init.
00056   // Currently has no error checking.
00057   // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
00058   // Palette color images will not work properly and must be converted to
00059   // 24 bit.
00060   // Binary images of 1 bit per pixel may also be given but they must be
00061   // byte packed with the MSB of the first byte being the first pixel, and a
00062   // 1 represents WHITE. For binary images set bytes_per_pixel=0.
00063   // The recognized text is returned as a char* which (in future will be coded
00064   // as UTF8 and) must be freed with the delete [] operator.
00065   static char* TesseractRect(const UINT8* imagedata,
00066                              int bytes_per_pixel,
00067                              int bytes_per_line,
00068                              int left, int top, int width, int height);
00069 
00070   // Call between pages or documents etc to free up memory and forget
00071   // adaptive data.
00072   static void ClearAdaptiveClassifier();
00073 
00074   // Close down tesseract and free up memory.
00075   static void End();
00076 
00077   // Dump the internal binary image to a PGM file.
00078   static void DumpPGM(const char* filename);
00079 
00080  protected:
00081   // Copy the given image rectangle to Tesseract, with adaptive thresholding
00082   // if the image is not already binary.
00083   static void CopyImageToTesseract(const UINT8* imagedata,
00084                                    int bytes_per_pixel,
00085                                    int bytes_per_line,
00086                                    int left, int top, int width, int height);
00087 
00088   // Compute the Otsu threshold(s) for the given image rectangle, making one
00089   // for each channel. Each channel is always one byte per pixel.
00090   // Returns an array of threshold values and an array of hi_values, such
00091   // that a pixel value >threshold[channel] is considered foreground if
00092   // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
00093   // that there is no apparent foreground. At least one hi_value will not be -1.
00094   // thresholds and hi_values are assumed to be of bytes_per_pixel size.
00095   static void OtsuThreshold(const UINT8* imagedata,
00096                            int bytes_per_pixel,
00097                            int bytes_per_line,
00098                            int left, int top, int right, int bottom,
00099                            int* thresholds,
00100                            int* hi_values);
00101 
00102   // Compute the histogram for the given image rectangle, and the given
00103   // channel. (Channel pointed to by imagedata.) Each channel is always
00104   // one byte per pixel.
00105   // Bytes per pixel is used to skip channels not being
00106   // counted with this call in a multi-channel (pixel-major) image.
00107   // Histogram is always a 256 element array to count occurrences of
00108   // each pixel value.
00109   static void HistogramRect(const UINT8* imagedata,
00110                             int bytes_per_pixel,
00111                             int bytes_per_line,
00112                             int left, int top, int right, int bottom,
00113                             int* histogram);
00114 
00115   // Compute the Otsu threshold(s) for the given histogram.
00116   // Also returns H = total count in histogram, and
00117   // omega0 = count of histogram below threshold.
00118   static int OtsuStats(const int* histogram,
00119                        int* H_out,
00120                        int* omega0_out);
00121 
00122   // Threshold the given grey or color image into the tesseract global
00123   // image ready for recognition. Requires thresholds and hi_value
00124   // produced by OtsuThreshold above.
00125   static void ThresholdRect(const UINT8* imagedata,
00126                             int bytes_per_pixel,
00127                             int bytes_per_line,
00128                             int left, int top,
00129                             int width, int height,
00130                             const int* thresholds,
00131                             const int* hi_values);
00132 
00133   // Cut out the requested rectangle of the binary image to the
00134   // tesseract global image ready for recognition.
00135   static void CopyBinaryRect(const UINT8* imagedata,
00136                              int bytes_per_line,
00137                              int left, int top,
00138                              int width, int height);
00139 
00140   // Low-level function to recognize the current global image to a string.
00141   static char* RecognizeToString();
00142 
00143   // Find lines from the image making the BLOCK_LIST.
00144   static void FindLines(BLOCK_LIST* block_list);
00145 
00146   // Recognize the tesseract global image and return the result as Tesseract
00147   // internal structures.
00148   static PAGE_RES* Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor);
00149 
00150   // Convert (and free) the internal data structures into a text string.
00151   static char* TesseractToText(PAGE_RES* page_res);
00152 };
00153 
00154 #endif  // THIRD_PARTY_TESSERACT_CCMAIN_BASEAPI_H__

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1