00001
00021 #include "mfcpch.h"
00022 #include <string.h>
00023 #include <ctype.h>
00024 #include "varable.h"
00025 #include "tessvars.h"
00026 #include "control.h"
00027 #include "reject.h"
00028 #include "fixxht.h"
00029 #include "secname.h"
00030 #ifdef TEXT_VERBOSE
00031 #include "callcpp.h"
00032 #endif
00033
00037 #define EXTERN
00038
00039 EXTERN double_VAR (x_ht_fraction_of_caps_ht, 0.7,
00040 "Fract of cps ht est of xht");
00041 EXTERN double_VAR (x_ht_variation, 0.35,
00042 "Err band as fract of caps/xht dist");
00043 EXTERN double_VAR (x_ht_sub_variation, 0.5,
00044 "Err band as fract of caps/xht dist");
00045 EXTERN BOOL_VAR (rej_trial_ambigs, TRUE,
00046 "reject x-ht ambigs when under trial");
00047 EXTERN BOOL_VAR (x_ht_conservative_ambigs, FALSE,
00048 "Dont rely on ambigs + maxht");
00049 EXTERN BOOL_VAR (x_ht_check_est, TRUE, "Cross check estimates");
00050 EXTERN BOOL_VAR (x_ht_case_flip, FALSE, "Flip or reject suspect case");
00051 EXTERN BOOL_VAR (x_ht_include_dodgy_blobs, TRUE,
00052 "Include blobs with possible noise?");
00053 EXTERN BOOL_VAR (x_ht_limit_flip_trials, TRUE,
00054 "Dont do trial flips when ambigs are close to xht?");
00055 EXTERN BOOL_VAR (rej_use_check_block_occ, TRUE,
00056 "Analyse rejection behaviour");
00057
00058 EXTERN STRING_VAR (chs_non_ambig_caps_ht,
00059 "!#$%&()/12346789?ABDEFGHIKLNQRT[]\\bdfhkl",
00060 "Reliable ascenders");
00061 EXTERN STRING_VAR (chs_x_ht, "acegmnopqrsuvwxyz", "X height chars");
00062 EXTERN STRING_VAR (chs_non_ambig_x_ht, "aenqr", "reliable X height chars");
00063 EXTERN STRING_VAR (chs_ambig_caps_x, "cCmMoO05sSuUvVwWxXzZ",
00064 "X ht or caps ht chars");
00065 EXTERN STRING_VAR (chs_bl_ambig_caps_x, "pPyY", " Caps or descender ambigs");
00066
00067
00068 EXTERN STRING_VAR (chs_caps_ht,
00069 "!#$%&()/0123456789?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]\\bdfhkl{|}",
00070 "Ascender chars");
00071 EXTERN STRING_VAR (chs_desc, "gjpqy", "Descender chars");
00072 EXTERN STRING_VAR (chs_non_ambig_bl,
00073 "!#$%&01246789?ABCDEFGHIKLMNORSTUVWXYZabcdehiklmnorstuvwxz",
00074 "Reliable baseline chars");
00075 EXTERN STRING_VAR (chs_odd_top, "ijt", "Chars with funny ascender region");
00076 EXTERN STRING_VAR (chs_odd_bot, "()35JQ[]\\/{}|", "Chars with funny base");
00077
00078
00079 EXTERN STRING_VAR (chs_bl,
00080 "!#$%&()/01246789?ABCDEFGHIJKLMNOPRSTUVWXYZ[]\\abcdefhiklmnorstuvwxz{}",
00081 "Baseline chars");
00082 EXTERN STRING_VAR (chs_non_ambig_desc, "gq", "Reliable descender chars");
00147 void re_estimate_x_ht(
00148 WERD_RES *word_res,
00149 float *trial_x_ht
00150 ) {
00151 PBLOB_IT blob_it;
00152 INT16 blob_ht_above_baseline;
00153
00154 const char *word_str;
00155 INT16 i;
00156
00157 STATS all_blobs_ht (0, 300);
00158 STATS x_ht (0, 300);
00159 STATS caps_ht (0, 300);
00160 STATS case_ambig (0, 300);
00161
00162 INT16 rej_blobs_count = 0;
00163 INT16 rej_blobs_max_height = 0;
00164 INT32 rej_blobs_max_area = 0;
00165 float x_ht_ok_variation;
00166 float max_blob_ht;
00167 float marginally_above_x_ht;
00168
00169 BOX blob_box;
00170 float est_x_ht = 0.0;
00171 float est_caps_ht = 0.0;
00172
00173 BOOL8 est_caps_ht_certain = FALSE;
00174 BOOL8 est_x_ht_certain = FALSE;
00175 BOOL8 trial = FALSE;
00176 BOOL8 no_comment = FALSE;
00177 float ambig_lc_x_est;
00178 float ambig_uc_caps_est;
00179 INT16 x_ht_ambigs = 0;
00180 INT16 caps_ht_ambigs = 0;
00181
00182 #ifdef TEXT_VERBOSE
00183
00184 cprintf("h");
00185 #endif
00186
00187 x_ht_ok_variation =
00188 (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation;
00189
00190 word_str = word_res->best_choice->string ().string ();
00191
00192
00193
00194 blob_it.set_to_list (word_res->outword->blob_list ());
00195 for (blob_it.mark_cycle_pt (), i = 0;
00196 !blob_it.cycled_list (); blob_it.forward (), i++) {
00197 if (!dodgy_blob (blob_it.data ())) {
00198 blob_box = blob_it.data ()->bounding_box ();
00199 blob_ht_above_baseline = blob_box.top () - bln_baseline_offset;
00200 all_blobs_ht.add (blob_ht_above_baseline, 1);
00201
00202 if (word_res->reject_map[i].rejected ()) {
00203 rej_blobs_count++;
00204 if (blob_box.height () > rej_blobs_max_height)
00205 rej_blobs_max_height = blob_box.height ();
00206 if (blob_box.area () > rej_blobs_max_area)
00207 rej_blobs_max_area = blob_box.area ();
00208 }
00209 else {
00210 if (STRING (chs_non_ambig_x_ht).contains (word_str[i]))
00211 x_ht.add (blob_ht_above_baseline, 1);
00212
00213 if (STRING (chs_non_ambig_caps_ht).contains (word_str[i]))
00214 caps_ht.add (blob_ht_above_baseline, 1);
00215
00216 if (STRING (chs_ambig_caps_x).contains (word_str[i])) {
00217 case_ambig.add (blob_ht_above_baseline, 1);
00218 if (STRING (chs_x_ht).contains (word_str[i]))
00219 x_ht_ambigs++;
00220 else
00221 caps_ht_ambigs++;
00222 }
00223
00224 if (STRING (chs_bl_ambig_caps_x).contains (word_str[i])) {
00225 if (STRING (chs_x_ht).contains (word_str[i])) {
00226
00227 if ((bln_baseline_offset - blob_box.bottom ()) /
00228 (float) blob_box.height () > 0.15)
00229 x_ht.add (blob_ht_above_baseline, 1);
00230 }
00231 else {
00232
00233 if ((bln_baseline_offset - blob_box.bottom ()) /
00234 (float) blob_box.height () < 0.05)
00235 caps_ht.add (blob_ht_above_baseline, 1);
00236 }
00237 }
00238 }
00239 }
00240 }
00241 est_caps_ht = estimate_from_stats (caps_ht);
00242 est_x_ht = estimate_from_stats (x_ht);
00243 est_ambigs(word_res, case_ambig, &ambig_lc_x_est, &ambig_uc_caps_est);
00244 max_blob_ht = all_blobs_ht.ile (0.9999);
00245
00246 #ifndef SECURE_NAMES
00247 if (debug_x_ht_level >= 20) {
00248 tprintf ("Mode20:A: %s ", word_str);
00249 word_res->reject_map.print (debug_fp);
00250 tprintf (" XHT:%f CAP:%f MAX:%f AMBIG X:%f CAP:%f\n",
00251 est_x_ht, est_caps_ht, max_blob_ht,
00252 ambig_lc_x_est, ambig_uc_caps_est);
00253 }
00254 #endif
00255 if (!x_ht_conservative_ambigs &&
00256 (ambig_lc_x_est > 0) &&
00257 (ambig_lc_x_est == ambig_uc_caps_est) &&
00258 (max_blob_ht > ambig_lc_x_est + x_ht_ok_variation)) {
00259
00260 ambig_uc_caps_est = est_caps_ht;
00261 #ifndef SECURE_NAMES
00262 if (debug_x_ht_level >= 20)
00263 tprintf ("Mode20:B: Fiddle ambig_uc_caps_est to %f\n",
00264 ambig_lc_x_est);
00265 #endif
00266 }
00267
00268
00269
00270 if ((est_x_ht > 0) ||
00271 (est_caps_ht > 0) ||
00272 ((ambig_lc_x_est > 0) && (ambig_lc_x_est != ambig_uc_caps_est))) {
00273
00274 #ifndef SECURE_NAMES
00275 if (debug_x_ht_level >= 20)
00276 tprintf ("Mode20:C: Sensible Data\n", ambig_lc_x_est);
00277 #endif
00278 if (est_x_ht > 0) {
00279 est_x_ht_certain = TRUE;
00280 if (est_caps_ht == 0) {
00281 if ((ambig_uc_caps_est > ambig_lc_x_est) &&
00282 (ambig_uc_caps_est > est_x_ht + x_ht_ok_variation))
00283 est_caps_ht = ambig_uc_caps_est;
00284 else
00285 est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht;
00286 }
00287 if (case_ambig.get_total () > 0)
00288 improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht);
00289 est_caps_ht_certain = caps_ht.get_total () > 0;
00290 #ifndef SECURE_NAMES
00291 if (debug_x_ht_level >= 20)
00292 tprintf ("Mode20:D: Est from xht XHT:%f CAP:%f\n",
00293 est_x_ht, est_caps_ht);
00294 #endif
00295 }
00296 else if (est_caps_ht > 0) {
00297 est_caps_ht_certain = TRUE;
00298 if ((ambig_lc_x_est > 0) &&
00299 (ambig_lc_x_est < est_caps_ht - x_ht_ok_variation))
00300 est_x_ht = ambig_lc_x_est;
00301 else
00302 est_x_ht = est_caps_ht * x_ht_fraction_of_caps_ht;
00303 if (ambig_lc_x_est + ambig_uc_caps_est > 0)
00304 improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht);
00305 est_x_ht_certain = x_ht.get_total () > 0;
00306 #ifndef SECURE_NAMES
00307 if (debug_x_ht_level >= 20)
00308 tprintf ("Mode20:E: Est from caps XHT:%f CAP:%f\n",
00309 est_x_ht, est_caps_ht);
00310 #endif
00311 }
00312 else {
00313
00314
00315
00316
00317 est_x_ht = ambig_lc_x_est;
00318 est_x_ht_certain = TRUE;
00319 if (ambig_uc_caps_est > ambig_lc_x_est) {
00320 est_caps_ht = ambig_uc_caps_est;
00321 est_caps_ht_certain = TRUE;
00322 }
00323 else
00324 est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht;
00325
00326 #ifndef SECURE_NAMES
00327 if (debug_x_ht_level >= 20)
00328 tprintf ("Mode20:F: Est from ambigs XHT:%f CAP:%f\n",
00329 est_x_ht, est_caps_ht);
00330 #endif
00331 }
00332
00333
00334
00335
00336
00337
00338 if (x_ht_check_est) {
00339 if ((caps_ht.get_total () > 0) &&
00340 (est_x_ht + x_ht_ok_variation >= caps_ht.ile (0.0001))) {
00341 trial = TRUE;
00342 est_caps_ht = est_x_ht;
00343 est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht;
00344
00345 #ifndef SECURE_NAMES
00346 if (debug_x_ht_level >= 20)
00347 tprintf ("Mode20:G: Trial XHT:%f CAP:%f\n",
00348 est_x_ht, est_caps_ht);
00349 #endif
00350 }
00351 else if ((x_ht.get_total () > 0) &&
00352 (est_caps_ht - x_ht_ok_variation <= x_ht.ile (0.9999))) {
00353 trial = TRUE;
00354 est_x_ht = est_caps_ht;
00355 est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht;
00356 #ifndef SECURE_NAMES
00357 if (debug_x_ht_level >= 20)
00358 tprintf ("Mode20:H: Trial XHT:%f CAP:%f\n",
00359 est_x_ht, est_caps_ht);
00360 #endif
00361 }
00362 }
00363 }
00364
00365 else {
00366
00367 marginally_above_x_ht = bln_x_height +
00368 x_ht_ok_variation * x_ht_sub_variation;
00369
00370
00371
00372
00373
00374
00375
00376
00377 #ifndef SECURE_NAMES
00378 if (debug_x_ht_level >= 20)
00379 tprintf ("Mode20:I: In the dark\n");
00380 #endif
00381
00382 if ((rej_blobs_count == 0) ||
00383 (rej_blobs_max_height < 0.3 * max_blob_ht) ||
00384 (rej_blobs_max_area < 0.3 * max_blob_ht * max_blob_ht)) {
00385 no_comment = TRUE;
00386 #ifndef SECURE_NAMES
00387 if (debug_x_ht_level >= 20)
00388 tprintf ("Mode20:J: No comment due to no rejects\n");
00389 #endif
00390 }
00391 else if (x_ht_limit_flip_trials &&
00392 ((max_blob_ht < marginally_above_x_ht) ||
00393 ((ambig_lc_x_est > 0) &&
00394 (ambig_lc_x_est == ambig_uc_caps_est) &&
00395 (ambig_lc_x_est < marginally_above_x_ht)))) {
00396 no_comment = TRUE;
00397 #ifndef SECURE_NAMES
00398 if (debug_x_ht_level >= 20)
00399 tprintf ("Mode20:K: No comment as close to xht %f < %f\n",
00400 ambig_lc_x_est, marginally_above_x_ht);
00401 #endif
00402 }
00403 else if (x_ht_conservative_ambigs && (ambig_uc_caps_est > 0)) {
00404 trial = TRUE;
00405 est_caps_ht = ambig_lc_x_est;
00406 est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht;
00407
00408 #ifndef SECURE_NAMES
00409 if (debug_x_ht_level >= 20)
00410 tprintf ("Mode20:L: Trial XHT:%f CAP:%f\n",
00411 est_x_ht, est_caps_ht);
00412 #endif
00413 }
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425 else {
00426 if (max_blob_ht <
00427 (bln_x_height + bln_x_height / x_ht_fraction_of_caps_ht) / 2.0) {
00428 trial = TRUE;
00429 est_x_ht = x_ht_fraction_of_caps_ht * max_blob_ht;
00430 est_caps_ht = max_blob_ht;
00431
00432 #ifndef SECURE_NAMES
00433 if (debug_x_ht_level >= 20)
00434 tprintf ("Mode20:M: Trial XHT:%f CAP:%f\n",
00435 est_x_ht, est_caps_ht);
00436 #endif
00437 }
00438 else {
00439 no_comment = TRUE;
00440 if (debug_x_ht_level >= 20)
00441 tprintf ("Mode20:N: No comment as nothing else matched\n");
00442 }
00443 }
00444 }
00445
00446
00447 if (!no_comment &&
00448 ((est_x_ht > 2 * bln_x_height) ||
00449 (est_x_ht / word_res->denorm.scale () <= min_sane_x_ht_pixels) ||
00450 (est_caps_ht <= est_x_ht) || (est_caps_ht >= 2.5 * est_x_ht))) {
00451 no_comment = TRUE;
00452 if (!trial && rej_use_xht) {
00453 if (debug_x_ht_level >= 2) {
00454 tprintf ("Sanity check rejecting %s ", word_str);
00455 word_res->reject_map.print (debug_fp);
00456 tprintf ("\n");
00457 }
00458 word_res->reject_map.rej_word_xht_fixup ();
00459
00460 }
00461 if (debug_x_ht_level >= 20)
00462 tprintf ("Mode20:O: No comment as nothing else matched\n");
00463 }
00464
00465 if (no_comment || trial) {
00466 word_res->x_height = bln_x_height / word_res->denorm.scale ();
00467 word_res->guessed_x_ht = TRUE;
00468 word_res->caps_height = (bln_x_height / x_ht_fraction_of_caps_ht) /
00469 word_res->denorm.scale ();
00470 word_res->guessed_caps_ht = TRUE;
00471
00472
00473
00474
00475
00476
00477
00478 if (rej_trial_ambigs &&
00479 ((word_res->reject_map.reject_count () > 0) ||
00480 (word_res->reject_map.length () == 1) ||
00481 ((x_ht_ambigs > 0) && (caps_ht_ambigs > 0)))) {
00482 #ifndef SECURE_NAMES
00483 if (debug_x_ht_level >= 2) {
00484 tprintf ("TRIAL Rej Ambigs %s ", word_str);
00485 word_res->reject_map.print (debug_fp);
00486 }
00487 #endif
00488 reject_ambigs(word_res);
00489 if (debug_x_ht_level >= 2) {
00490 tprintf (" ");
00491 word_res->reject_map.print (debug_fp);
00492 tprintf ("\n");
00493 }
00494 }
00495 }
00496 else {
00497 word_res->x_height = est_x_ht / word_res->denorm.scale ();
00498 word_res->guessed_x_ht = !est_x_ht_certain;
00499 word_res->caps_height = est_caps_ht / word_res->denorm.scale ();
00500 word_res->guessed_caps_ht = !est_caps_ht_certain;
00501 }
00502
00503 if (!no_comment && (fabs (est_x_ht - bln_x_height) > x_ht_ok_variation))
00504 *trial_x_ht = est_x_ht / word_res->denorm.scale ();
00505 else
00506 *trial_x_ht = 0.0;
00507
00508 #ifndef SECURE_NAMES
00509 if (((*trial_x_ht > 0) && (debug_x_ht_level >= 3)) ||
00510 (debug_x_ht_level >= 5)) {
00511 tprintf ("%s ", word_str);
00512 word_res->reject_map.print (debug_fp);
00513 tprintf
00514 (" X:%0.2f Cps:%0.2f Mxht:%0.2f RJ MxHt:%d MxAr:%d Rematch:%c\n",
00515 est_x_ht, est_caps_ht, max_blob_ht, rej_blobs_max_height,
00516 rej_blobs_max_area, *trial_x_ht > 0 ? '*' : ' ');
00517 }
00518 #endif
00519
00520 }
00521
00526 void check_block_occ(WERD_RES *word_res) {
00527 PBLOB_IT blob_it;
00528 STRING new_string;
00529 REJMAP new_map = word_res->reject_map;
00530 WERD_CHOICE *new_choice;
00531
00532 const char *word_str = word_res->best_choice->string ().string ();
00533 INT16 i;
00534 INT16 reject_count = 0;
00535 char confirmed_char;
00536 float x_ht;
00537 float caps_ht;
00538
00539 if (word_res->x_height > 0)
00540 x_ht = word_res->x_height * word_res->denorm.scale ();
00541 else
00542 x_ht = bln_x_height;
00543
00544 if (word_res->caps_height > 0)
00545 caps_ht = word_res->caps_height * word_res->denorm.scale ();
00546 else
00547 caps_ht = x_ht / x_ht_fraction_of_caps_ht;
00548
00549 blob_it.set_to_list (word_res->outword->blob_list ());
00550
00551 for (blob_it.mark_cycle_pt (), i = 0;
00552 !blob_it.cycled_list (); blob_it.forward (), i++) {
00553 new_string += word_str[i];
00554 if (word_res->reject_map[i].accepted ()) {
00555 confirmed_char = check_blob_occ (word_str[i],
00556 blob_it.data ()->bounding_box ().
00557 top () - bln_baseline_offset, x_ht,
00558 caps_ht);
00559
00560 if (confirmed_char == '\0') {
00561 if (rej_use_check_block_occ) {
00562 new_map[i].setrej_xht_fixup ();
00563 reject_count++;
00564 }
00565 }
00566 else
00567 new_string[i] = confirmed_char;
00568 }
00569 }
00570 if ((reject_count > 0) || (new_string != word_str)) {
00571 if (debug_x_ht_level >= 2) {
00572 tprintf ("Shape Verification: %s ", word_str);
00573 word_res->reject_map.print (debug_fp);
00574 tprintf (" -> %s ", new_string.string ());
00575 new_map.print (debug_fp);
00576 tprintf ("\n");
00577 }
00578 new_choice = new WERD_CHOICE (new_string.string (),
00579 word_res->best_choice->rating (),
00580 word_res->best_choice->certainty (),
00581 word_res->best_choice->permuter ());
00582 delete word_res->best_choice;
00583 word_res->best_choice = new_choice;
00584 word_res->reject_map = new_map;
00585 }
00586 }
00587
00588
00594 char check_blob_occ(char proposed_char,
00595 INT16 blob_ht_above_baseline,
00596 float x_ht,
00597 float caps_ht) {
00598 BOOL8 blob_definite_x_ht;
00599 BOOL8 blob_definite_caps_ht;
00600 float acceptable_variation;
00601
00602 acceptable_variation = (caps_ht - x_ht) * x_ht_variation;
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620 blob_definite_x_ht = blob_ht_above_baseline <= x_ht + acceptable_variation;
00621 blob_definite_caps_ht = blob_ht_above_baseline >=
00622 caps_ht - acceptable_variation;
00623
00624 if (STRING (chs_ambig_caps_x).contains (proposed_char)) {
00625 if ((!blob_definite_x_ht && !blob_definite_caps_ht) ||
00626 (proposed_char == '0' && !blob_definite_caps_ht) ||
00627 (proposed_char == 'o' && !blob_definite_x_ht))
00628 return '\0';
00629
00630 else if (blob_definite_caps_ht &&
00631 STRING (chs_x_ht).contains (proposed_char)) {
00632 if (x_ht_case_flip)
00633
00634 return (char) toupper (proposed_char);
00635 else
00636 return '\0';
00637 }
00638
00639 else if (blob_definite_x_ht &&
00640 !STRING (chs_x_ht).contains (proposed_char)) {
00641 if (x_ht_case_flip)
00642
00643 return (char) tolower (proposed_char);
00644 else
00645 return '\0';
00646 }
00647 }
00648 else
00649 if ((STRING (chs_non_ambig_x_ht).contains (proposed_char)
00650 && !blob_definite_x_ht)
00651 || (STRING (chs_non_ambig_caps_ht).contains (proposed_char)
00652 && !blob_definite_caps_ht))
00653 return '\0';
00654 return proposed_char;
00655 }
00656
00657
00663 float estimate_from_stats(STATS &stats) {
00664 if (stats.get_total () <= 0)
00665 return 0.0;
00666 else if (stats.get_total () >= 3)
00667 return stats.ile (0.5);
00668 else
00669 return stats.mean ();
00670 }
00671
00672
00679 void improve_estimate(WERD_RES *word_res,
00680 float &est_x_ht,
00681 float &est_caps_ht,
00682 STATS &x_ht,
00683 STATS &caps_ht) {
00684 PBLOB_IT blob_it;
00685 INT16 blob_ht_above_baseline;
00686
00687 const char *word_str;
00688 INT16 i;
00689 BOX blob_box;
00690 char confirmed_char;
00691 float new_val;
00692
00693 blob_it.set_to_list (word_res->outword->blob_list ());
00694 word_str = word_res->best_choice->string ().string ();
00695 for (blob_it.mark_cycle_pt (), i = 0;
00696 !blob_it.cycled_list (); blob_it.forward (), i++) {
00697 if ((STRING (chs_ambig_caps_x).contains (word_str[i])) &&
00698 (!dodgy_blob (blob_it.data ()))) {
00699 blob_box = blob_it.data ()->bounding_box ();
00700 blob_ht_above_baseline = blob_box.top () - bln_baseline_offset;
00701 confirmed_char = check_blob_occ (word_str[i],
00702 blob_ht_above_baseline,
00703 est_x_ht, est_caps_ht);
00704 if (confirmed_char != '\0')
00705 if (STRING (chs_x_ht).contains (confirmed_char))
00706 x_ht.add (blob_ht_above_baseline, 1);
00707 else
00708 caps_ht.add (blob_ht_above_baseline, 1);
00709 }
00710 }
00711 new_val = estimate_from_stats (x_ht);
00712 if (new_val > 0)
00713 est_x_ht = new_val;
00714 new_val = estimate_from_stats (caps_ht);
00715 if (new_val > 0)
00716 est_caps_ht = new_val;
00717 }
00718
00722 void reject_ambigs(
00723 WERD_RES *word) {
00724 const char *word_str;
00725 int i = 0;
00726
00727 word_str = word->best_choice->string ().string ();
00728 while (*word_str != '\0') {
00729 if (STRING (chs_ambig_caps_x).contains (*word_str))
00730 word->reject_map[i].setrej_xht_fixup ();
00731 word_str++;
00732 i++;
00733 }
00734 }
00735
00736
00740 void est_ambigs(
00741 WERD_RES *word_res,
00742 STATS &stats,
00743 float *ambig_lc_x_est,
00744 float *ambig_uc_caps_est
00745 ) {
00746 float x_ht_ok_variation;
00747 STATS short_ambigs (0, 300);
00748 STATS tall_ambigs (0, 300);
00749 PBLOB_IT blob_it;
00750 BOX blob_box;
00751 INT16 blob_ht_above_baseline;
00752
00753 const char *word_str;
00754 INT16 i;
00755 float min;
00756 float max;
00757 float short_limit;
00758 float tall_limit;
00759
00760 x_ht_ok_variation =
00761 (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation;
00762
00763 if (stats.get_total () == 0) {
00764 *ambig_lc_x_est = 0;
00765 *ambig_uc_caps_est = 0;
00766 }
00767 else {
00768 min = stats.ile (0.0);
00769 max = stats.ile (0.99999);
00770 if ((max - min) < x_ht_ok_variation) {
00771 *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean ();
00772
00773 }
00774 else {
00775
00776 short_limit = min + (max - min) * x_ht_variation;
00777 tall_limit = max - (max - min) * x_ht_variation;
00778 word_str = word_res->best_choice->string ().string ();
00779 blob_it.set_to_list (word_res->outword->blob_list ());
00780 for (blob_it.mark_cycle_pt (), i = 0;
00781 !blob_it.cycled_list (); blob_it.forward (), i++) {
00782 if (word_res->reject_map[i].accepted () &&
00783 STRING (chs_ambig_caps_x).contains (word_str[i]) &&
00784 (!dodgy_blob (blob_it.data ()))) {
00785 blob_box = blob_it.data ()->bounding_box ();
00786 blob_ht_above_baseline =
00787 blob_box.top () - bln_baseline_offset;
00788 if (blob_ht_above_baseline <= short_limit)
00789 short_ambigs.add (blob_ht_above_baseline, 1);
00790 else if (blob_ht_above_baseline >= tall_limit)
00791 tall_ambigs.add (blob_ht_above_baseline, 1);
00792 }
00793 }
00794 *ambig_lc_x_est = short_ambigs.mean ();
00795 *ambig_uc_caps_est = tall_ambigs.mean ();
00796
00797 if (*ambig_uc_caps_est - *ambig_lc_x_est <= x_ht_ok_variation)
00798 *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean ();
00799
00800 }
00801 }
00802 }
00803
00804
00814 BOOL8 dodgy_blob(PBLOB *blob) {
00815 OUTLINE_IT outline_it = blob->out_list ();
00816 INT16 highest_bottom = -MAX_INT16;
00817 INT16 lowest_top = MAX_INT16;
00818 BOX outline_box;
00819
00820 if (x_ht_include_dodgy_blobs)
00821 return FALSE;
00822 for (outline_it.mark_cycle_pt ();
00823 !outline_it.cycled_list (); outline_it.forward ()) {
00824 outline_box = outline_it.data ()->bounding_box ();
00825 if (lowest_top > outline_box.top ())
00826 lowest_top = outline_box.top ();
00827 if (highest_bottom < outline_box.bottom ())
00828 highest_bottom = outline_box.bottom ();
00829 }
00830 return highest_bottom >= lowest_top;
00831 }