#include <baseapi.h>
Definition at line 35 of file baseapi.h.
void TessBaseAPI::ClearAdaptiveClassifier | ( | ) | [static] |
Definition at line 90 of file baseapi.cpp.
References ResetAdaptiveClassifier().
00090 { 00091 ResetAdaptiveClassifier(); 00092 }
void TessBaseAPI::CopyBinaryRect | ( | const UINT8 * | imagedata, | |
int | bytes_per_line, | |||
int | left, | |||
int | top, | |||
int | width, | |||
int | height | |||
) | [static, protected] |
Definition at line 308 of file baseapi.cpp.
References IMAGE::capture(), copy_sub_image(), IMAGE::create(), and page_image.
Referenced by CopyImageToTesseract().
00311 { 00312 // Copy binary image, cutting out the required rectangle. 00313 IMAGE image; 00314 image.capture(const_cast<UINT8*>(imagedata), 00315 bytes_per_line*8, top + height, 1); 00316 page_image.create(width, height, 1); 00317 copy_sub_image(&image, left, top, width, height, &page_image, 0, 0, false); 00318 }
void TessBaseAPI::CopyImageToTesseract | ( | const UINT8 * | imagedata, | |
int | bytes_per_pixel, | |||
int | bytes_per_line, | |||
int | left, | |||
int | top, | |||
int | width, | |||
int | height | |||
) | [static, protected] |
Definition at line 119 of file baseapi.cpp.
References CopyBinaryRect(), OtsuThreshold(), and ThresholdRect().
Referenced by TesseractRect().
00123 { 00124 if (bytes_per_pixel > 0) { 00125 // Threshold grey or color. 00126 int* thresholds = new int[bytes_per_pixel]; 00127 int* hi_values = new int[bytes_per_pixel]; 00128 00129 // Compute the thresholds. 00130 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, 00131 left, top, left + width, top + height, 00132 thresholds, hi_values); 00133 00134 // Threshold the image to the tesseract global page_image. 00135 ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line, 00136 left, top, width, height, 00137 thresholds, hi_values); 00138 delete [] thresholds; 00139 delete [] hi_values; 00140 } else { 00141 CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height); 00142 } 00143 }
void TessBaseAPI::DumpPGM | ( | const char * | filename | ) | [static] |
Definition at line 101 of file baseapi.cpp.
References IMAGE::get_line(), IMAGE::get_xsize(), IMAGE::get_ysize(), IMAGELINE::init(), INT32FORMAT, page_image, and IMAGELINE::pixels.
00101 { 00102 IMAGELINE line; 00103 line.init(page_image.get_xsize()); 00104 FILE *fp = fopen(filename, "w"); 00105 fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(), 00106 page_image.get_ysize()); 00107 for (int j = page_image.get_ysize()-1; j >= 0 ; --j) { 00108 page_image.get_line(0, j, page_image.get_xsize(), &line, 0); 00109 for (int i = 0; i < page_image.get_xsize(); ++i) { 00110 UINT8 b = line.pixels[i] ? 255 : 0; 00111 fwrite(&b, 1, 1, fp); 00112 } 00113 } 00114 fclose(fp); 00115 }
void TessBaseAPI::End | ( | ) | [static] |
Definition at line 95 of file baseapi.cpp.
References end_tesseract(), and ResetAdaptiveClassifier().
Referenced by main().
00095 { 00096 ResetAdaptiveClassifier(); 00097 end_tesseract(); 00098 }
void TessBaseAPI::FindLines | ( | BLOCK_LIST * | block_list | ) | [static, protected] |
Definition at line 333 of file baseapi.cpp.
References pgeditor_read_file().
Referenced by RecognizeToString().
00333 { 00334 STRING input_file = "noname.tif"; 00335 // The following call creates a full-page block and then runs connected 00336 // component analysis and text line creation. 00337 pgeditor_read_file(input_file, block_list); 00338 }
void TessBaseAPI::HistogramRect | ( | const UINT8 * | imagedata, | |
int | bytes_per_pixel, | |||
int | bytes_per_line, | |||
int | left, | |||
int | top, | |||
int | right, | |||
int | bottom, | |||
int * | histogram | |||
) | [static, protected] |
Definition at line 211 of file baseapi.cpp.
Referenced by OtsuThreshold().
00215 { 00216 int width = right - left; 00217 memset(histogram, 0, sizeof(*histogram) * 256); 00218 const UINT8* pix = imagedata + 00219 top*bytes_per_line + 00220 left*bytes_per_pixel; 00221 for (int y = top; y < bottom; ++y) { 00222 for (int x = 0; x < width; ++x) { 00223 ++histogram[pix[x * bytes_per_pixel]]; 00224 } 00225 pix += bytes_per_line; 00226 } 00227 }
int TessBaseAPI::Init | ( | const char * | datapath, | |
const char * | outputbase, | |||
const char * | configfile, | |||
bool | numeric_mode, | |||
int | argc, | |||
char * | argv[] | |||
) | [static] |
Definition at line 52 of file baseapi.cpp.
References init_tesseract().
Referenced by main().
00054 { 00055 int result = init_tesseract(datapath, outputbase, configfile, argc, argv); 00056 bln_numericmode.set_value(numeric_mode); 00057 return result; 00058 }
int TessBaseAPI::OtsuStats | ( | const int * | histogram, | |
int * | H_out, | |||
int * | omega0_out | |||
) | [static, protected] |
Definition at line 232 of file baseapi.cpp.
References NULL.
Referenced by OtsuThreshold().
00234 { 00235 int H = 0; 00236 double mu_T = 0.0; 00237 for (int i = 0; i < 256; ++i) { 00238 H += histogram[i]; 00239 mu_T += i * histogram[i]; 00240 } 00241 00242 // Now maximize sig_sq_B over t. 00243 // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf 00244 int best_t = -1; 00245 int omega_0, omega_1; 00246 int best_omega_0 = 0; 00247 double best_sig_sq_B = 0.0; 00248 double mu_0, mu_1, mu_t; 00249 omega_0 = 0; 00250 mu_t = 0.0; 00251 for (int t = 0; t < 255; ++t) { 00252 omega_0 += histogram[t]; 00253 mu_t += t * static_cast<double>(histogram[t]); 00254 if (omega_0 == 0) 00255 continue; 00256 omega_1 = H - omega_0; 00257 mu_0 = mu_t / omega_0; 00258 mu_1 = (mu_T - mu_t) / omega_1; 00259 double sig_sq_B = mu_1 - mu_0; 00260 sig_sq_B *= sig_sq_B * omega_0 * omega_1; 00261 if (best_t < 0 || sig_sq_B > best_sig_sq_B) { 00262 best_sig_sq_B = sig_sq_B; 00263 best_t = t; 00264 best_omega_0 = omega_0; 00265 } 00266 } 00267 if (H_out != NULL) *H_out = H; 00268 if (omega0_out != NULL) *omega0_out = best_omega_0; 00269 return best_t; 00270 }
void TessBaseAPI::OtsuThreshold | ( | const UINT8 * | imagedata, | |
int | bytes_per_pixel, | |||
int | bytes_per_line, | |||
int | left, | |||
int | top, | |||
int | right, | |||
int | bottom, | |||
int * | thresholds, | |||
int * | hi_values | |||
) | [static, protected] |
Definition at line 152 of file baseapi.cpp.
References HistogramRect(), and OtsuStats().
Referenced by CopyImageToTesseract().
00157 { 00158 // Of all channels with no good hi_value, keep the best so we can always 00159 // produce at least one answer. 00160 int best_hi_value = 0; 00161 int best_hi_index = 0; 00162 bool any_good_hivalue = false; 00163 double best_hi_dist = 0.0; 00164 00165 for (int ch = 0; ch < bytes_per_pixel; ++ch) { 00166 thresholds[ch] = 0; 00167 hi_values[ch] = -1; 00168 // Compute the histogram of the image rectangle. 00169 int histogram[256]; 00170 HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line, 00171 left, top, right, bottom, histogram); 00172 int H; 00173 int best_omega_0; 00174 int best_t = OtsuStats(histogram, &H, &best_omega_0); 00175 // To be a convincing foreground we must have a small fraction of H 00176 // or to be a convincing background we must have a large fraction of H. 00177 // In between we assume this channel contains no thresholding information. 00178 int hi_value = best_omega_0 < H * 0.5; 00179 thresholds[ch] = best_t; 00180 if (best_omega_0 > H * 0.75) { 00181 any_good_hivalue = true; 00182 hi_values[ch] = 0; 00183 } 00184 else if (best_omega_0 < H * 0.25) { 00185 any_good_hivalue = true; 00186 hi_values[ch] = 1; 00187 } 00188 else { 00189 // In case all channels are like this, keep the best of the bad lot. 00190 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; 00191 if (hi_dist > best_hi_dist) { 00192 best_hi_dist = hi_dist; 00193 best_hi_value = hi_value; 00194 best_hi_index = ch; 00195 } 00196 } 00197 } 00198 if (!any_good_hivalue) { 00199 // Use the best of the ones that were not good enough. 00200 hi_values[best_hi_index] = best_hi_value; 00201 } 00202 }
PAGE_RES * TessBaseAPI::Recognize | ( | BLOCK_LIST * | block_list, | |
ETEXT_DESC * | monitor | |||
) | [static, protected] |
Definition at line 342 of file baseapi.cpp.
References apply_box_training(), apply_boxes(), pgeditor_main(), recog_all_words(), and start_variables_editor().
Referenced by RecognizeToString().
00342 { 00343 if (tessedit_resegment_from_boxes) 00344 apply_boxes(block_list); 00345 if (edit_variables) 00346 start_variables_editor(); 00347 00348 PAGE_RES* page_res = new PAGE_RES(block_list); 00349 if (interactive_mode) { 00350 pgeditor_main(block_list); //pgeditor user I/F 00351 } else if (tessedit_train_from_boxes) { 00352 apply_box_training(block_list); 00353 } else { 00354 // Now run the main recognition. 00355 recog_all_words(page_res, monitor); 00356 } 00357 return page_res; 00358 }
char * TessBaseAPI::RecognizeToString | ( | ) | [static, protected] |
Definition at line 321 of file baseapi.cpp.
References FindLines(), NULL, Recognize(), and TesseractToText().
Referenced by TesseractRect().
00321 { 00322 BLOCK_LIST block_list; 00323 00324 FindLines(&block_list); 00325 00326 // Now run the main recognition. 00327 PAGE_RES* page_res = Recognize(&block_list, NULL); 00328 00329 return TesseractToText(page_res); 00330 }
char * TessBaseAPI::TesseractRect | ( | const UINT8 * | imagedata, | |
int | bytes_per_pixel, | |||
int | bytes_per_line, | |||
int | left, | |||
int | top, | |||
int | width, | |||
int | height | |||
) | [static] |
Definition at line 71 of file baseapi.cpp.
References CopyImageToTesseract(), kMinRectSize, NULL, and RecognizeToString().
Referenced by main().
00075 { 00076 #ifndef TEXT_VERBOSE 00077 if (width < kMinRectSize || height < kMinRectSize) 00078 return NULL; // Nothing worth doing. 00079 #endif // TEXT_VERBOSE - useful for debugging single characters! 00080 00081 // Copy/Threshold the image to the tesseract global page_image. 00082 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, 00083 left, top, width, height); 00084 00085 return RecognizeToString(); 00086 }
char * TessBaseAPI::TesseractToText | ( | PAGE_RES * | page_res | ) | [static, protected] |
Definition at line 362 of file baseapi.cpp.
References WERD_RES::best_choice, PAGE_RES_IT::forward(), NULL, PAGE_RES_IT::restart_page(), W_EOL, and PAGE_RES_IT::word().
Referenced by RecognizeToString().
00362 { 00363 if (page_res != NULL) { 00364 int total_length = 2; 00365 PAGE_RES_IT page_res_it(page_res); 00366 // Iterate over the data structures to extract the recognition result. 00367 for (page_res_it.restart_page(); page_res_it.word () != NULL; 00368 page_res_it.forward()) { 00369 WERD_RES *word = page_res_it.word(); 00370 WERD_CHOICE* choice = word->best_choice; 00371 if (choice != NULL) { 00372 total_length += choice->string().length() + 1; 00373 } 00374 } 00375 char* result = new char[total_length]; 00376 char* ptr = result; 00377 for (page_res_it.restart_page(); page_res_it.word () != NULL; 00378 page_res_it.forward()) { 00379 WERD_RES *word = page_res_it.word(); 00380 WERD_CHOICE* choice = word->best_choice; 00381 if (choice != NULL) { 00382 strcpy(ptr, choice->string().string()); 00383 ptr += strlen(ptr); 00384 if (word->word->flag(W_EOL)) 00385 *ptr++ = '\n'; 00386 else 00387 *ptr++ = ' '; 00388 } 00389 } 00390 *ptr++ = '\n'; 00391 *ptr = '\0'; 00392 delete page_res; 00393 return result; 00394 } 00395 return NULL; 00396 }
void TessBaseAPI::ThresholdRect | ( | const UINT8 * | imagedata, | |
int | bytes_per_pixel, | |||
int | bytes_per_line, | |||
int | left, | |||
int | top, | |||
int | width, | |||
int | height, | |||
const int * | thresholds, | |||
const int * | hi_values | |||
) | [static, protected] |
Definition at line 4 of file api.cpp.
References IMAGE::create(), IMAGELINE::init(), page_image, IMAGELINE::pixels, and IMAGE::put_line().
Referenced by CopyImageToTesseract().
00010 { 00011 IMAGELINE line; 00012 page_image.create(width, height, 1); 00013 line.init(width); 00014 // For each line in the image, fill the IMAGELINE class and put it into the 00015 // Tesseract global page_image. Note that Tesseract stores images with the 00016 // bottom at y=0 and 0 is black, so we need 2 kinds of inversion. 00017 //fmg: unless told otherwise, I'll assume that UNIT8* is == char* 00018 00019 00020 const UINT8* data = imagedata + top*bytes_per_line + left*bytes_per_pixel; 00021 for (int y = height - 1 ; y >= 0; --y) { 00022 const UINT8* pix = data; 00023 for (int x = 0; x < width; ++x, pix += bytes_per_pixel) { 00024 line.pixels[x] = 1; 00025 for (int ch = 0; ch < bytes_per_pixel; ++ch) { 00026 if (hi_values[ch] >= 0 && (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { 00027 line.pixels[x] = 0; 00028 break; 00029 }//if 00030 }//for bytes in each pixel 00031 }//for x 00032 page_image.put_line(0, y, width, &line, 0); 00033 data += bytes_per_line; 00034 }//for y 00035 }//TessBaseAPI::ThresholdRect