00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "baseapi.h"
00021
00022 #include "tessedit.h"
00023 #include "pageres.h"
00024 #include "tessvars.h"
00025 #include "control.h"
00026 #include "applybox.h"
00027 #include "pgedit.h"
00028 #include "varabled.h"
00029 #include "adaptmatch.h"
00030
00031 BOOL_VAR(tessedit_resegment_from_boxes, FALSE,
00032 "Take segmentation and labeling from box file");
00033 BOOL_VAR(tessedit_train_from_boxes, FALSE,
00034 "Generate training data from boxed chars");
00035
00036
00037 const int kMinRectSize = 10;
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 int TessBaseAPI::Init(const char* datapath, const char* outputbase,
00053 const char* configfile, bool numeric_mode,
00054 int argc, char* argv[]) {
00055 int result = init_tesseract(datapath, outputbase, configfile, argc, argv);
00056 bln_numericmode.set_value(numeric_mode);
00057 return result;
00058 }
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 char* TessBaseAPI::TesseractRect(const UINT8* imagedata,
00072 int bytes_per_pixel,
00073 int bytes_per_line,
00074 int left, int top,
00075 int width, int height) {
00076 #ifndef TEXT_VERBOSE
00077 if (width < kMinRectSize || height < kMinRectSize)
00078 return NULL;
00079 #endif // TEXT_VERBOSE - useful for debugging single characters!
00080
00081
00082 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line,
00083 left, top, width, height);
00084
00085 return RecognizeToString();
00086 }
00087
00088
00089
00090 void TessBaseAPI::ClearAdaptiveClassifier() {
00091 ResetAdaptiveClassifier();
00092 }
00093
00094
00095 void TessBaseAPI::End() {
00096 ResetAdaptiveClassifier();
00097 end_tesseract();
00098 }
00099
00100
00101 void TessBaseAPI::DumpPGM(const char* filename) {
00102 IMAGELINE line;
00103 line.init(page_image.get_xsize());
00104 FILE *fp = fopen(filename, "w");
00105 fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(),
00106 page_image.get_ysize());
00107 for (int j = page_image.get_ysize()-1; j >= 0 ; --j) {
00108 page_image.get_line(0, j, page_image.get_xsize(), &line, 0);
00109 for (int i = 0; i < page_image.get_xsize(); ++i) {
00110 UINT8 b = line.pixels[i] ? 255 : 0;
00111 fwrite(&b, 1, 1, fp);
00112 }
00113 }
00114 fclose(fp);
00115 }
00116
00117
00118
00119 void TessBaseAPI::CopyImageToTesseract(const UINT8* imagedata,
00120 int bytes_per_pixel,
00121 int bytes_per_line,
00122 int left, int top,
00123 int width, int height) {
00124 if (bytes_per_pixel > 0) {
00125
00126 int* thresholds = new int[bytes_per_pixel];
00127 int* hi_values = new int[bytes_per_pixel];
00128
00129
00130 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
00131 left, top, left + width, top + height,
00132 thresholds, hi_values);
00133
00134
00135 ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line,
00136 left, top, width, height,
00137 thresholds, hi_values);
00138 delete [] thresholds;
00139 delete [] hi_values;
00140 } else {
00141 CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height);
00142 }
00143 }
00144
00145
00146
00147
00148
00149
00150
00151
00152 void TessBaseAPI::OtsuThreshold(const UINT8* imagedata,
00153 int bytes_per_pixel,
00154 int bytes_per_line,
00155 int left, int top, int right, int bottom,
00156 int* thresholds,
00157 int* hi_values) {
00158
00159
00160 int best_hi_value = 0;
00161 int best_hi_index = 0;
00162 bool any_good_hivalue = false;
00163 double best_hi_dist = 0.0;
00164
00165 for (int ch = 0; ch < bytes_per_pixel; ++ch) {
00166 thresholds[ch] = 0;
00167 hi_values[ch] = -1;
00168
00169 int histogram[256];
00170 HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
00171 left, top, right, bottom, histogram);
00172 int H;
00173 int best_omega_0;
00174 int best_t = OtsuStats(histogram, &H, &best_omega_0);
00175
00176
00177
00178 int hi_value = best_omega_0 < H * 0.5;
00179 thresholds[ch] = best_t;
00180 if (best_omega_0 > H * 0.75) {
00181 any_good_hivalue = true;
00182 hi_values[ch] = 0;
00183 }
00184 else if (best_omega_0 < H * 0.25) {
00185 any_good_hivalue = true;
00186 hi_values[ch] = 1;
00187 }
00188 else {
00189
00190 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
00191 if (hi_dist > best_hi_dist) {
00192 best_hi_dist = hi_dist;
00193 best_hi_value = hi_value;
00194 best_hi_index = ch;
00195 }
00196 }
00197 }
00198 if (!any_good_hivalue) {
00199
00200 hi_values[best_hi_index] = best_hi_value;
00201 }
00202 }
00203
00204
00205
00206
00207
00208
00209
00210
00211 void TessBaseAPI::HistogramRect(const UINT8* imagedata,
00212 int bytes_per_pixel,
00213 int bytes_per_line,
00214 int left, int top, int right, int bottom,
00215 int* histogram) {
00216 int width = right - left;
00217 memset(histogram, 0, sizeof(*histogram) * 256);
00218 const UINT8* pix = imagedata +
00219 top*bytes_per_line +
00220 left*bytes_per_pixel;
00221 for (int y = top; y < bottom; ++y) {
00222 for (int x = 0; x < width; ++x) {
00223 ++histogram[pix[x * bytes_per_pixel]];
00224 }
00225 pix += bytes_per_line;
00226 }
00227 }
00228
00229
00230
00231
00232 int TessBaseAPI::OtsuStats(const int* histogram,
00233 int* H_out,
00234 int* omega0_out) {
00235 int H = 0;
00236 double mu_T = 0.0;
00237 for (int i = 0; i < 256; ++i) {
00238 H += histogram[i];
00239 mu_T += i * histogram[i];
00240 }
00241
00242
00243
00244 int best_t = -1;
00245 int omega_0, omega_1;
00246 int best_omega_0 = 0;
00247 double best_sig_sq_B = 0.0;
00248 double mu_0, mu_1, mu_t;
00249 omega_0 = 0;
00250 mu_t = 0.0;
00251 for (int t = 0; t < 255; ++t) {
00252 omega_0 += histogram[t];
00253 mu_t += t * static_cast<double>(histogram[t]);
00254 if (omega_0 == 0)
00255 continue;
00256 omega_1 = H - omega_0;
00257 mu_0 = mu_t / omega_0;
00258 mu_1 = (mu_T - mu_t) / omega_1;
00259 double sig_sq_B = mu_1 - mu_0;
00260 sig_sq_B *= sig_sq_B * omega_0 * omega_1;
00261 if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
00262 best_sig_sq_B = sig_sq_B;
00263 best_t = t;
00264 best_omega_0 = omega_0;
00265 }
00266 }
00267 if (H_out != NULL) *H_out = H;
00268 if (omega0_out != NULL) *omega0_out = best_omega_0;
00269 return best_t;
00270 }
00271
00272
00273
00274
00275 void TessBaseAPI::ThresholdRect(const UINT8* imagedata,
00276 int bytes_per_pixel,
00277 int bytes_per_line,
00278 int left, int top,
00279 int width, int height,
00280 const int* thresholds,
00281 const int* hi_values) {
00282 IMAGELINE line;
00283 page_image.create(width, height, 1);
00284 line.init(width);
00285
00286
00287
00288 const UINT8* data = imagedata + top*bytes_per_line + left*bytes_per_pixel;
00289 for (int y = height - 1 ; y >= 0; --y) {
00290 const UINT8* pix = data;
00291 for (int x = 0; x < width; ++x, pix += bytes_per_pixel) {
00292 line.pixels[x] = 1;
00293 for (int ch = 0; ch < bytes_per_pixel; ++ch) {
00294 if (hi_values[ch] >= 0 &&
00295 (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
00296 line.pixels[x] = 0;
00297 break;
00298 }
00299 }
00300 }
00301 page_image.put_line(0, y, width, &line, 0);
00302 data += bytes_per_line;
00303 }
00304 }
00305
00306
00307
00308 void TessBaseAPI::CopyBinaryRect(const UINT8* imagedata,
00309 int bytes_per_line,
00310 int left, int top,
00311 int width, int height) {
00312
00313 IMAGE image;
00314 image.capture(const_cast<UINT8*>(imagedata),
00315 bytes_per_line*8, top + height, 1);
00316 page_image.create(width, height, 1);
00317 copy_sub_image(&image, left, top, width, height, &page_image, 0, 0, false);
00318 }
00319
00320
00321 char* TessBaseAPI::RecognizeToString() {
00322 BLOCK_LIST block_list;
00323
00324 FindLines(&block_list);
00325
00326
00327 PAGE_RES* page_res = Recognize(&block_list, NULL);
00328
00329 return TesseractToText(page_res);
00330 }
00331
00332
00333 void TessBaseAPI::FindLines(BLOCK_LIST* block_list) {
00334 STRING input_file = "noname.tif";
00335
00336
00337 pgeditor_read_file(input_file, block_list);
00338 }
00339
00340
00341
00342 PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) {
00343 if (tessedit_resegment_from_boxes)
00344 apply_boxes(block_list);
00345 if (edit_variables)
00346 start_variables_editor();
00347
00348 PAGE_RES* page_res = new PAGE_RES(block_list);
00349 if (interactive_mode) {
00350 pgeditor_main(block_list);
00351 } else if (tessedit_train_from_boxes) {
00352 apply_box_training(block_list);
00353 } else {
00354
00355 recog_all_words(page_res, monitor);
00356 }
00357 return page_res;
00358 }
00359
00360
00361
00362 char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) {
00363 if (page_res != NULL) {
00364 int total_length = 2;
00365 PAGE_RES_IT page_res_it(page_res);
00366
00367 for (page_res_it.restart_page(); page_res_it.word () != NULL;
00368 page_res_it.forward()) {
00369 WERD_RES *word = page_res_it.word();
00370 WERD_CHOICE* choice = word->best_choice;
00371 if (choice != NULL) {
00372 total_length += choice->string().length() + 1;
00373 }
00374 }
00375 char* result = new char[total_length];
00376 char* ptr = result;
00377 for (page_res_it.restart_page(); page_res_it.word () != NULL;
00378 page_res_it.forward()) {
00379 WERD_RES *word = page_res_it.word();
00380 WERD_CHOICE* choice = word->best_choice;
00381 if (choice != NULL) {
00382 strcpy(ptr, choice->string().string());
00383 ptr += strlen(ptr);
00384 if (word->word->flag(W_EOL))
00385 *ptr++ = '\n';
00386 else
00387 *ptr++ = ' ';
00388 }
00389 }
00390 *ptr++ = '\n';
00391 *ptr = '\0';
00392 delete page_res;
00393 return result;
00394 }
00395 return NULL;
00396 }
00397