TessBaseAPI Class Reference

#include <baseapi.h>

List of all members.


Detailed Description

Definition at line 35 of file baseapi.h.

Static Public Member Functions

Static Protected Member Functions


Member Function Documentation

void TessBaseAPI::ClearAdaptiveClassifier (  )  [static]

Definition at line 90 of file baseapi.cpp.

References ResetAdaptiveClassifier().

00090                                           {
00091   ResetAdaptiveClassifier();
00092 }

void TessBaseAPI::CopyBinaryRect ( const UINT8 imagedata,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
) [static, protected]

Definition at line 308 of file baseapi.cpp.

References IMAGE::capture(), copy_sub_image(), IMAGE::create(), and page_image.

Referenced by CopyImageToTesseract().

00311                                                         {
00312   // Copy binary image, cutting out the required rectangle.
00313   IMAGE image;
00314   image.capture(const_cast<UINT8*>(imagedata),
00315                 bytes_per_line*8, top + height, 1);
00316   page_image.create(width, height, 1);
00317   copy_sub_image(&image, left, top, width, height, &page_image, 0, 0, false);
00318 }

void TessBaseAPI::CopyImageToTesseract ( const UINT8 imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
) [static, protected]

Definition at line 119 of file baseapi.cpp.

References CopyBinaryRect(), OtsuThreshold(), and ThresholdRect().

Referenced by TesseractRect().

00123                                                               {
00124   if (bytes_per_pixel > 0) {
00125     // Threshold grey or color.
00126     int* thresholds = new int[bytes_per_pixel];
00127     int* hi_values = new int[bytes_per_pixel];
00128 
00129     // Compute the thresholds.
00130     OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
00131                   left, top, left + width, top + height,
00132                   thresholds, hi_values);
00133 
00134     // Threshold the image to the tesseract global page_image.
00135     ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line,
00136                   left, top, width, height,
00137                   thresholds, hi_values);
00138     delete [] thresholds;
00139     delete [] hi_values;
00140   } else {
00141     CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height);
00142   }
00143 }

void TessBaseAPI::DumpPGM ( const char *  filename  )  [static]

Definition at line 101 of file baseapi.cpp.

References IMAGE::get_line(), IMAGE::get_xsize(), IMAGE::get_ysize(), IMAGELINE::init(), INT32FORMAT, page_image, and IMAGELINE::pixels.

00101                                               {
00102   IMAGELINE line;
00103   line.init(page_image.get_xsize());
00104   FILE *fp = fopen(filename, "w");
00105   fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(),
00106           page_image.get_ysize());
00107   for (int j = page_image.get_ysize()-1; j >= 0 ; --j) {
00108     page_image.get_line(0, j, page_image.get_xsize(), &line, 0);
00109     for (int i = 0; i < page_image.get_xsize(); ++i) {
00110       UINT8 b = line.pixels[i] ? 255 : 0;
00111       fwrite(&b, 1, 1, fp);
00112     }
00113   }
00114   fclose(fp);
00115 }

void TessBaseAPI::End (  )  [static]

Definition at line 95 of file baseapi.cpp.

References end_tesseract(), and ResetAdaptiveClassifier().

Referenced by main().

00095                       {
00096   ResetAdaptiveClassifier();
00097   end_tesseract();
00098 }

void TessBaseAPI::FindLines ( BLOCK_LIST *  block_list  )  [static, protected]

Definition at line 333 of file baseapi.cpp.

References pgeditor_read_file().

Referenced by RecognizeToString().

00333                                                   {
00334   STRING input_file = "noname.tif";
00335   // The following call creates a full-page block and then runs connected
00336   // component analysis and text line creation.
00337   pgeditor_read_file(input_file, block_list);
00338 }

void TessBaseAPI::HistogramRect ( const UINT8 imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  right,
int  bottom,
int *  histogram 
) [static, protected]

Definition at line 211 of file baseapi.cpp.

Referenced by OtsuThreshold().

00215                                                 {
00216   int width = right - left;
00217   memset(histogram, 0, sizeof(*histogram) * 256);
00218   const UINT8* pix = imagedata +
00219                      top*bytes_per_line +
00220                      left*bytes_per_pixel;
00221   for (int y = top; y < bottom; ++y) {
00222     for (int x = 0; x < width; ++x) {
00223       ++histogram[pix[x * bytes_per_pixel]];
00224     }
00225     pix += bytes_per_line;
00226   }
00227 }

int TessBaseAPI::Init ( const char *  datapath,
const char *  outputbase,
const char *  configfile,
bool  numeric_mode,
int  argc,
char *  argv[] 
) [static]

Definition at line 52 of file baseapi.cpp.

References init_tesseract().

Referenced by main().

00054                                               {
00055   int result = init_tesseract(datapath, outputbase, configfile, argc, argv);
00056   bln_numericmode.set_value(numeric_mode);
00057   return result;
00058 }

int TessBaseAPI::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
) [static, protected]

Definition at line 232 of file baseapi.cpp.

References NULL.

Referenced by OtsuThreshold().

00234                                             {
00235   int H = 0;
00236   double mu_T = 0.0;
00237   for (int i = 0; i < 256; ++i) {
00238     H += histogram[i];
00239     mu_T += i * histogram[i];
00240   }
00241 
00242   // Now maximize sig_sq_B over t.
00243   // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
00244   int best_t = -1;
00245   int omega_0, omega_1;
00246   int best_omega_0 = 0;
00247   double best_sig_sq_B = 0.0;
00248   double mu_0, mu_1, mu_t;
00249   omega_0 = 0;
00250   mu_t = 0.0;
00251   for (int t = 0; t < 255; ++t) {
00252     omega_0 += histogram[t];
00253     mu_t += t * static_cast<double>(histogram[t]);
00254     if (omega_0 == 0)
00255       continue;
00256     omega_1 = H - omega_0;
00257     mu_0 = mu_t / omega_0;
00258     mu_1 = (mu_T - mu_t) / omega_1;
00259     double sig_sq_B = mu_1 - mu_0;
00260     sig_sq_B *= sig_sq_B * omega_0 * omega_1;
00261     if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
00262       best_sig_sq_B = sig_sq_B;
00263       best_t = t;
00264       best_omega_0 = omega_0;
00265     }
00266   }
00267   if (H_out != NULL) *H_out = H;
00268   if (omega0_out != NULL) *omega0_out = best_omega_0;
00269   return best_t;
00270 }

void TessBaseAPI::OtsuThreshold ( const UINT8 imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  right,
int  bottom,
int *  thresholds,
int *  hi_values 
) [static, protected]

Definition at line 152 of file baseapi.cpp.

References HistogramRect(), and OtsuStats().

Referenced by CopyImageToTesseract().

00157                                                 {
00158   // Of all channels with no good hi_value, keep the best so we can always
00159   // produce at least one answer.
00160   int best_hi_value = 0;
00161   int best_hi_index = 0;
00162   bool any_good_hivalue = false;
00163   double best_hi_dist = 0.0;
00164 
00165   for (int ch = 0; ch < bytes_per_pixel; ++ch) {
00166     thresholds[ch] = 0;
00167     hi_values[ch] = -1;
00168     // Compute the histogram of the image rectangle.
00169     int histogram[256];
00170     HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
00171                   left, top, right, bottom, histogram);
00172     int H;
00173     int best_omega_0;
00174     int best_t = OtsuStats(histogram, &H, &best_omega_0);
00175     // To be a convincing foreground we must have a small fraction of H
00176     // or to be a convincing background we must have a large fraction of H.
00177     // In between we assume this channel contains no thresholding information.
00178     int hi_value = best_omega_0 < H * 0.5;
00179     thresholds[ch] = best_t;
00180     if (best_omega_0 > H * 0.75) {
00181       any_good_hivalue = true;
00182       hi_values[ch] = 0;
00183     }
00184     else if (best_omega_0 < H * 0.25) {
00185       any_good_hivalue = true;
00186       hi_values[ch] = 1;
00187     }
00188     else {
00189       // In case all channels are like this, keep the best of the bad lot.
00190       double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
00191       if (hi_dist > best_hi_dist) {
00192         best_hi_dist = hi_dist;
00193         best_hi_value = hi_value;
00194         best_hi_index = ch;
00195       }
00196     }
00197   }
00198   if (!any_good_hivalue) {
00199     // Use the best of the ones that were not good enough.
00200     hi_values[best_hi_index] = best_hi_value;
00201   }
00202 }

PAGE_RES * TessBaseAPI::Recognize ( BLOCK_LIST *  block_list,
ETEXT_DESC monitor 
) [static, protected]

Definition at line 342 of file baseapi.cpp.

References apply_box_training(), apply_boxes(), pgeditor_main(), recog_all_words(), and start_variables_editor().

Referenced by RecognizeToString().

00342                                                                             {
00343   if (tessedit_resegment_from_boxes)
00344     apply_boxes(block_list);
00345   if (edit_variables)
00346     start_variables_editor();
00347 
00348   PAGE_RES* page_res = new PAGE_RES(block_list);
00349   if (interactive_mode) {
00350     pgeditor_main(block_list);                  //pgeditor user I/F
00351   } else if (tessedit_train_from_boxes) {
00352     apply_box_training(block_list);
00353   } else {
00354     // Now run the main recognition.
00355     recog_all_words(page_res, monitor);
00356   }
00357   return page_res;
00358 }

char * TessBaseAPI::RecognizeToString (  )  [static, protected]

Definition at line 321 of file baseapi.cpp.

References FindLines(), NULL, Recognize(), and TesseractToText().

Referenced by TesseractRect().

00321                                      {
00322   BLOCK_LIST    block_list;
00323 
00324   FindLines(&block_list);
00325 
00326   // Now run the main recognition.
00327   PAGE_RES* page_res = Recognize(&block_list, NULL);
00328 
00329   return TesseractToText(page_res);
00330 }

char * TessBaseAPI::TesseractRect ( const UINT8 imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height 
) [static]

Definition at line 71 of file baseapi.cpp.

References CopyImageToTesseract(), kMinRectSize, NULL, and RecognizeToString().

Referenced by main().

00075                                                         {
00076 #ifndef TEXT_VERBOSE
00077   if (width < kMinRectSize || height < kMinRectSize)
00078     return NULL;  // Nothing worth doing.
00079 #endif // TEXT_VERBOSE - useful for debugging single characters!
00080 
00081   // Copy/Threshold the image to the tesseract global page_image.
00082   CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line,
00083                        left, top, width, height);
00084 
00085   return RecognizeToString();
00086 }

char * TessBaseAPI::TesseractToText ( PAGE_RES page_res  )  [static, protected]

Definition at line 362 of file baseapi.cpp.

References WERD_RES::best_choice, PAGE_RES_IT::forward(), NULL, PAGE_RES_IT::restart_page(), W_EOL, and PAGE_RES_IT::word().

Referenced by RecognizeToString().

00362                                                      {
00363   if (page_res != NULL) {
00364     int total_length = 2;
00365     PAGE_RES_IT   page_res_it(page_res);
00366     // Iterate over the data structures to extract the recognition result.
00367     for (page_res_it.restart_page(); page_res_it.word () != NULL;
00368          page_res_it.forward()) {
00369       WERD_RES *word = page_res_it.word();
00370       WERD_CHOICE* choice = word->best_choice;
00371       if (choice != NULL) {
00372         total_length += choice->string().length() + 1;
00373       }
00374     }
00375     char* result = new char[total_length];
00376     char* ptr = result;
00377     for (page_res_it.restart_page(); page_res_it.word () != NULL;
00378          page_res_it.forward()) {
00379       WERD_RES *word = page_res_it.word();
00380       WERD_CHOICE* choice = word->best_choice;
00381       if (choice != NULL) {
00382         strcpy(ptr, choice->string().string());
00383         ptr += strlen(ptr);
00384         if (word->word->flag(W_EOL))
00385           *ptr++ = '\n';
00386         else
00387           *ptr++ = ' ';
00388       }
00389     }
00390     *ptr++ = '\n';
00391     *ptr = '\0';
00392     delete page_res;
00393     return result;
00394   }
00395   return NULL;
00396 }

void TessBaseAPI::ThresholdRect ( const UINT8 imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height,
const int *  thresholds,
const int *  hi_values 
) [static, protected]

Definition at line 4 of file api.cpp.

References IMAGE::create(), IMAGELINE::init(), page_image, IMAGELINE::pixels, and IMAGE::put_line().

Referenced by CopyImageToTesseract().

00010                                                       {
00011   IMAGELINE line;
00012   page_image.create(width, height, 1);
00013   line.init(width);
00014   // For each line in the image, fill the IMAGELINE class and put it into the
00015   // Tesseract global page_image. Note that Tesseract stores images with the
00016   // bottom at y=0 and 0 is black, so we need 2 kinds of inversion.
00017   //fmg: unless told otherwise, I'll assume that UNIT8* is == char*
00018 
00019 
00020   const UINT8* data = imagedata + top*bytes_per_line + left*bytes_per_pixel;
00021   for (int y = height - 1 ; y >= 0; --y) {
00022     const UINT8* pix = data;
00023     for (int x = 0; x < width; ++x, pix += bytes_per_pixel) {
00024       line.pixels[x] = 1;
00025       for (int ch = 0; ch < bytes_per_pixel; ++ch) {
00026         if (hi_values[ch] >= 0 && (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
00027           line.pixels[x] = 0;
00028           break;
00029         }//if
00030       }//for bytes in each pixel
00031     }//for x
00032     page_image.put_line(0, y, width, &line, 0);
00033     data += bytes_per_line;
00034   }//for y
00035 }//TessBaseAPI::ThresholdRect


The documentation for this class was generated from the following files:
Generated on Wed Feb 28 19:49:34 2007 for Tesseract by  doxygen 1.5.1