#include "varable.h"
#include "statistc.h"
#include "pageres.h"
#include "notdll.h"
Go to the source code of this file.
char check_blob_occ | ( | char | proposed_char, | |
INT16 | blob_ht_above_baseline, | |||
float | x_ht, | |||
float | caps_ht | |||
) |
Checks blob for position relative to position above baseline
Returns 0 for reject, or (possibly case shifted) confirmed char
Definition at line 594 of file fixxht.cpp.
Referenced by check_block_occ(), and improve_estimate().
00597 { 00598 BOOL8 blob_definite_x_ht; 00599 BOOL8 blob_definite_caps_ht; 00600 float acceptable_variation; 00601 00602 acceptable_variation = (caps_ht - x_ht) * x_ht_variation; 00603 /* 00604 REJECT if: 00605 \li expected descender and nothing significantly below BL 00606 \li expected ascender and nothing significantly above x-ht 00607 */ 00608 00609 /* 00610 IF AMBIG_CAPS_X_CHS 00611 IF blob is definitely an ascender ( > xht + xht err )AND 00612 char is an x-ht char 00613 THEN 00614 flip case 00615 IF blob is defintiely an x-ht ( <= xht + xht err ) AND 00616 char is an ascender char 00617 THEN 00618 flip case 00619 */ 00620 blob_definite_x_ht = blob_ht_above_baseline <= x_ht + acceptable_variation; 00621 blob_definite_caps_ht = blob_ht_above_baseline >= 00622 caps_ht - acceptable_variation; 00623 00624 if (STRING (chs_ambig_caps_x).contains (proposed_char)) { 00625 if ((!blob_definite_x_ht && !blob_definite_caps_ht) || 00626 (proposed_char == '0' && !blob_definite_caps_ht) || 00627 (proposed_char == 'o' && !blob_definite_x_ht)) 00628 return '\0'; 00629 00630 else if (blob_definite_caps_ht && 00631 STRING (chs_x_ht).contains (proposed_char)) { 00632 if (x_ht_case_flip) 00633 //flip to upper case 00634 return (char) toupper (proposed_char); 00635 else 00636 return '\0'; 00637 } 00638 00639 else if (blob_definite_x_ht && 00640 !STRING (chs_x_ht).contains (proposed_char)) { 00641 if (x_ht_case_flip) 00642 //flip to lower case 00643 return (char) tolower (proposed_char); 00644 else 00645 return '\0'; 00646 } 00647 } 00648 else 00649 if ((STRING (chs_non_ambig_x_ht).contains (proposed_char) 00650 && !blob_definite_x_ht) 00651 || (STRING (chs_non_ambig_caps_ht).contains (proposed_char) 00652 && !blob_definite_caps_ht)) 00653 return '\0'; 00654 return proposed_char; 00655 }
void check_block_occ | ( | WERD_RES * | word_res | ) |
Checks word for coarse block occupancy, rejecting more chars and flipping case of case-ambiguous chars as required.
Definition at line 526 of file fixxht.cpp.
References WERD_RES::best_choice, WERD::blob_list(), WERD_RES::caps_height, check_blob_occ(), debug_fp, WERD_RES::denorm, new_choice(), WERD_RES::outword, REJMAP::print(), WERD_RES::reject_map, DENORM::scale(), STRING::string(), tprintf(), and WERD_RES::x_height.
Referenced by classify_word_pass2().
00526 { 00527 PBLOB_IT blob_it; 00528 STRING new_string; 00529 REJMAP new_map = word_res->reject_map; 00530 WERD_CHOICE *new_choice; 00531 00532 const char *word_str = word_res->best_choice->string ().string (); 00533 INT16 i; 00534 INT16 reject_count = 0; 00535 char confirmed_char; 00536 float x_ht; 00537 float caps_ht; 00538 00539 if (word_res->x_height > 0) 00540 x_ht = word_res->x_height * word_res->denorm.scale (); 00541 else 00542 x_ht = bln_x_height; 00543 00544 if (word_res->caps_height > 0) 00545 caps_ht = word_res->caps_height * word_res->denorm.scale (); 00546 else 00547 caps_ht = x_ht / x_ht_fraction_of_caps_ht; 00548 00549 blob_it.set_to_list (word_res->outword->blob_list ()); 00550 00551 for (blob_it.mark_cycle_pt (), i = 0; 00552 !blob_it.cycled_list (); blob_it.forward (), i++) { 00553 new_string += word_str[i]; //default copy 00554 if (word_res->reject_map[i].accepted ()) { 00555 confirmed_char = check_blob_occ (word_str[i], 00556 blob_it.data ()->bounding_box (). 00557 top () - bln_baseline_offset, x_ht, 00558 caps_ht); 00559 00560 if (confirmed_char == '\0') { 00561 if (rej_use_check_block_occ) { 00562 new_map[i].setrej_xht_fixup (); 00563 reject_count++; 00564 } 00565 } 00566 else 00567 new_string[i] = confirmed_char; 00568 } 00569 } 00570 if ((reject_count > 0) || (new_string != word_str)) { 00571 if (debug_x_ht_level >= 2) { 00572 tprintf ("Shape Verification: %s ", word_str); 00573 word_res->reject_map.print (debug_fp); 00574 tprintf (" -> %s ", new_string.string ()); 00575 new_map.print (debug_fp); 00576 tprintf ("\n"); 00577 } 00578 new_choice = new WERD_CHOICE (new_string.string (), 00579 word_res->best_choice->rating (), 00580 word_res->best_choice->certainty (), 00581 word_res->best_choice->permuter ()); 00582 delete word_res->best_choice; 00583 word_res->best_choice = new_choice; 00584 word_res->reject_map = new_map; 00585 } 00586 }
See if blob has more than one outline, one above the other.
Definition at line 814 of file fixxht.cpp.
References BOX::bottom(), FALSE, MAX_INT16, PBLOB::out_list(), outline_it, and BOX::top().
Referenced by est_ambigs(), improve_estimate(), and re_estimate_x_ht().
00814 { 00815 OUTLINE_IT outline_it = blob->out_list (); 00816 INT16 highest_bottom = -MAX_INT16; 00817 INT16 lowest_top = MAX_INT16; 00818 BOX outline_box; 00819 00820 if (x_ht_include_dodgy_blobs) 00821 return FALSE; //no blob is ever dodgy 00822 for (outline_it.mark_cycle_pt (); 00823 !outline_it.cycled_list (); outline_it.forward ()) { 00824 outline_box = outline_it.data ()->bounding_box (); 00825 if (lowest_top > outline_box.top ()) 00826 lowest_top = outline_box.top (); 00827 if (highest_bottom < outline_box.bottom ()) 00828 highest_bottom = outline_box.bottom (); 00829 } 00830 return highest_bottom >= lowest_top; 00831 }
void est_ambigs | ( | WERD_RES * | word_res, | |
STATS & | stats, | |||
float * | ambig_lc_x_est, | |||
float * | ambig_uc_caps_est | |||
) |
xht ambig ht stats
Definition at line 740 of file fixxht.cpp.
References STATS::add(), WERD_RES::best_choice, WERD::blob_list(), dodgy_blob(), STATS::get_total(), STATS::ile(), max, STATS::mean(), min, WERD_RES::outword, WERD_RES::reject_map, and BOX::top().
Referenced by re_estimate_x_ht().
00745 { 00746 float x_ht_ok_variation; 00747 STATS short_ambigs (0, 300); 00748 STATS tall_ambigs (0, 300); 00749 PBLOB_IT blob_it; 00750 BOX blob_box; //blob bounding box 00751 INT16 blob_ht_above_baseline; 00752 00753 const char *word_str; 00754 INT16 i; 00755 float min; //min ambig ch ht 00756 float max; //max ambig ch ht 00757 float short_limit; // for lower case 00758 float tall_limit; // for upper case 00759 00760 x_ht_ok_variation = 00761 (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation; 00762 00763 if (stats.get_total () == 0) { 00764 *ambig_lc_x_est = 0; 00765 *ambig_uc_caps_est = 0; 00766 } 00767 else { 00768 min = stats.ile (0.0); 00769 max = stats.ile (0.99999); 00770 if ((max - min) < x_ht_ok_variation) { 00771 *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean (); 00772 //close enough 00773 } 00774 else { 00775 /* Try reclustering into lower and upper case chars */ 00776 short_limit = min + (max - min) * x_ht_variation; 00777 tall_limit = max - (max - min) * x_ht_variation; 00778 word_str = word_res->best_choice->string ().string (); 00779 blob_it.set_to_list (word_res->outword->blob_list ()); 00780 for (blob_it.mark_cycle_pt (), i = 0; 00781 !blob_it.cycled_list (); blob_it.forward (), i++) { 00782 if (word_res->reject_map[i].accepted () && 00783 STRING (chs_ambig_caps_x).contains (word_str[i]) && 00784 (!dodgy_blob (blob_it.data ()))) { 00785 blob_box = blob_it.data ()->bounding_box (); 00786 blob_ht_above_baseline = 00787 blob_box.top () - bln_baseline_offset; 00788 if (blob_ht_above_baseline <= short_limit) 00789 short_ambigs.add (blob_ht_above_baseline, 1); 00790 else if (blob_ht_above_baseline >= tall_limit) 00791 tall_ambigs.add (blob_ht_above_baseline, 1); 00792 } 00793 } 00794 *ambig_lc_x_est = short_ambigs.mean (); 00795 *ambig_uc_caps_est = tall_ambigs.mean (); 00796 /* Cop out if we havent got sensible clusters. */ 00797 if (*ambig_uc_caps_est - *ambig_lc_x_est <= x_ht_ok_variation) 00798 *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean (); 00799 //close enough 00800 } 00801 } 00802 }
float estimate_from_stats | ( | STATS & | stats | ) |
Convert stats into estimates
returns 0.0, median, or mean
Definition at line 663 of file fixxht.cpp.
References STATS::get_total(), STATS::ile(), and STATS::mean().
Referenced by improve_estimate(), and re_estimate_x_ht().
00663 { 00664 if (stats.get_total () <= 0) 00665 return 0.0; 00666 else if (stats.get_total () >= 3) 00667 return stats.ile (0.5); //median 00668 else 00669 return stats.mean (); 00670 }
void improve_estimate | ( | WERD_RES * | word_res, | |
float & | est_x_ht, | |||
float & | est_caps_ht, | |||
STATS & | x_ht, | |||
STATS & | caps_ht | |||
) |
Improve estimates.
If good estimates, and case ambig chars, rescan blobs to fix case ambig blobs, re-estimate hts. FIX: maybe always do it after deciding x-height
Definition at line 679 of file fixxht.cpp.
References STATS::add(), WERD_RES::best_choice, WERD::blob_list(), check_blob_occ(), dodgy_blob(), estimate_from_stats(), WERD_RES::outword, and BOX::top().
Referenced by re_estimate_x_ht().
00683 { 00684 PBLOB_IT blob_it; 00685 INT16 blob_ht_above_baseline; 00686 00687 const char *word_str; 00688 INT16 i; 00689 BOX blob_box; //blob bounding box 00690 char confirmed_char; 00691 float new_val; 00692 00693 blob_it.set_to_list (word_res->outword->blob_list ()); 00694 word_str = word_res->best_choice->string ().string (); 00695 for (blob_it.mark_cycle_pt (), i = 0; 00696 !blob_it.cycled_list (); blob_it.forward (), i++) { 00697 if ((STRING (chs_ambig_caps_x).contains (word_str[i])) && 00698 (!dodgy_blob (blob_it.data ()))) { 00699 blob_box = blob_it.data ()->bounding_box (); 00700 blob_ht_above_baseline = blob_box.top () - bln_baseline_offset; 00701 confirmed_char = check_blob_occ (word_str[i], 00702 blob_ht_above_baseline, 00703 est_x_ht, est_caps_ht); 00704 if (confirmed_char != '\0') 00705 if (STRING (chs_x_ht).contains (confirmed_char)) 00706 x_ht.add (blob_ht_above_baseline, 1); 00707 else 00708 caps_ht.add (blob_ht_above_baseline, 1); 00709 } 00710 } 00711 new_val = estimate_from_stats (x_ht); 00712 if (new_val > 0) 00713 est_x_ht = new_val; 00714 new_val = estimate_from_stats (caps_ht); 00715 if (new_val > 0) 00716 est_caps_ht = new_val; 00717 }
void re_estimate_x_ht | ( | WERD_RES * | word_res, | |
float * | trial_x_ht | |||
) |
Definition at line 147 of file fixxht.cpp.
References STATS::add(), BOX::area(), WERD_RES::best_choice, WERD::blob_list(), BOX::bottom(), WERD_RES::caps_height, cprintf(), debug_fp, WERD_RES::denorm, dodgy_blob(), est_ambigs(), estimate_from_stats(), FALSE, STATS::get_total(), WERD_RES::guessed_caps_ht, WERD_RES::guessed_x_ht, BOX::height(), STATS::ile(), improve_estimate(), REJMAP::length(), WERD_RES::outword, REJMAP::print(), REJMAP::rej_word_xht_fixup(), reject_ambigs(), REJMAP::reject_count(), WERD_RES::reject_map, DENORM::scale(), BOX::top(), tprintf(), TRUE, and WERD_RES::x_height.
Referenced by classify_word_pass2().
00150 { 00151 PBLOB_IT blob_it; 00152 INT16 blob_ht_above_baseline; 00153 00154 const char *word_str; 00155 INT16 i; 00156 00157 STATS all_blobs_ht (0, 300); //every blob in word 00158 STATS x_ht (0, 300); //confirmed pts in wd 00159 STATS caps_ht (0, 300); //confirmed pts in wd 00160 STATS case_ambig (0, 300); //lower case ambigs 00161 00162 INT16 rej_blobs_count = 0; 00163 INT16 rej_blobs_max_height = 0; 00164 INT32 rej_blobs_max_area = 0; 00165 float x_ht_ok_variation; 00166 float max_blob_ht; 00167 float marginally_above_x_ht; 00168 00169 BOX blob_box; //blob bounding box 00170 float est_x_ht = 0.0; //word estimate 00171 float est_caps_ht = 0.0; //word estimate 00172 //based on hard data? 00173 BOOL8 est_caps_ht_certain = FALSE; 00174 BOOL8 est_x_ht_certain = FALSE;//based on hard data? 00175 BOOL8 trial = FALSE; //Sepeculative values? 00176 BOOL8 no_comment = FALSE; //No change in xht 00177 float ambig_lc_x_est; 00178 float ambig_uc_caps_est; 00179 INT16 x_ht_ambigs = 0; 00180 INT16 caps_ht_ambigs = 0; 00181 00182 #ifdef TEXT_VERBOSE 00183 // gets a 'h', see ccmain/tesseractmain.dox 00184 cprintf("h"); 00185 #endif 00186 /* Calculate default variation of blob x_ht from bln x_ht for bln word */ 00187 x_ht_ok_variation = 00188 (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation; 00189 00190 word_str = word_res->best_choice->string ().string (); 00191 /* 00192 Cycle blobs, allocating to one of the stats sets when possible. 00193 */ 00194 blob_it.set_to_list (word_res->outword->blob_list ()); 00195 for (blob_it.mark_cycle_pt (), i = 0; 00196 !blob_it.cycled_list (); blob_it.forward (), i++) { 00197 if (!dodgy_blob (blob_it.data ())) { 00198 blob_box = blob_it.data ()->bounding_box (); 00199 blob_ht_above_baseline = blob_box.top () - bln_baseline_offset; 00200 all_blobs_ht.add (blob_ht_above_baseline, 1); 00201 00202 if (word_res->reject_map[i].rejected ()) { 00203 rej_blobs_count++; 00204 if (blob_box.height () > rej_blobs_max_height) 00205 rej_blobs_max_height = blob_box.height (); 00206 if (blob_box.area () > rej_blobs_max_area) 00207 rej_blobs_max_area = blob_box.area (); 00208 } 00209 else { 00210 if (STRING (chs_non_ambig_x_ht).contains (word_str[i])) 00211 x_ht.add (blob_ht_above_baseline, 1); 00212 00213 if (STRING (chs_non_ambig_caps_ht).contains (word_str[i])) 00214 caps_ht.add (blob_ht_above_baseline, 1); 00215 00216 if (STRING (chs_ambig_caps_x).contains (word_str[i])) { 00217 case_ambig.add (blob_ht_above_baseline, 1); 00218 if (STRING (chs_x_ht).contains (word_str[i])) 00219 x_ht_ambigs++; 00220 else 00221 caps_ht_ambigs++; 00222 } 00223 00224 if (STRING (chs_bl_ambig_caps_x).contains (word_str[i])) { 00225 if (STRING (chs_x_ht).contains (word_str[i])) { 00226 /* confirm x_height provided > 15% total height below baseline */ 00227 if ((bln_baseline_offset - blob_box.bottom ()) / 00228 (float) blob_box.height () > 0.15) 00229 x_ht.add (blob_ht_above_baseline, 1); 00230 } 00231 else { 00232 /* confirm caps_height provided < 5% total height below baseline */ 00233 if ((bln_baseline_offset - blob_box.bottom ()) / 00234 (float) blob_box.height () < 0.05) 00235 caps_ht.add (blob_ht_above_baseline, 1); 00236 } 00237 } 00238 } 00239 } 00240 } 00241 est_caps_ht = estimate_from_stats (caps_ht); 00242 est_x_ht = estimate_from_stats (x_ht); 00243 est_ambigs(word_res, case_ambig, &ambig_lc_x_est, &ambig_uc_caps_est); 00244 max_blob_ht = all_blobs_ht.ile (0.9999); 00245 00246 #ifndef SECURE_NAMES 00247 if (debug_x_ht_level >= 20) { 00248 tprintf ("Mode20:A: %s ", word_str); 00249 word_res->reject_map.print (debug_fp); 00250 tprintf (" XHT:%f CAP:%f MAX:%f AMBIG X:%f CAP:%f\n", 00251 est_x_ht, est_caps_ht, max_blob_ht, 00252 ambig_lc_x_est, ambig_uc_caps_est); 00253 } 00254 #endif 00255 if (!x_ht_conservative_ambigs && 00256 (ambig_lc_x_est > 0) && 00257 (ambig_lc_x_est == ambig_uc_caps_est) && 00258 (max_blob_ht > ambig_lc_x_est + x_ht_ok_variation)) { 00259 //may be zero but believe xht 00260 ambig_uc_caps_est = est_caps_ht; 00261 #ifndef SECURE_NAMES 00262 if (debug_x_ht_level >= 20) 00263 tprintf ("Mode20:B: Fiddle ambig_uc_caps_est to %f\n", 00264 ambig_lc_x_est); 00265 #endif 00266 } 00267 00268 /* Now make some estimates */ 00269 00270 if ((est_x_ht > 0) || 00271 (est_caps_ht > 0) || 00272 ((ambig_lc_x_est > 0) && (ambig_lc_x_est != ambig_uc_caps_est))) { 00273 /* There is some sensible data to go on so make the most of it. */ 00274 #ifndef SECURE_NAMES 00275 if (debug_x_ht_level >= 20) 00276 tprintf ("Mode20:C: Sensible Data\n", ambig_lc_x_est); 00277 #endif 00278 if (est_x_ht > 0) { 00279 est_x_ht_certain = TRUE; 00280 if (est_caps_ht == 0) { 00281 if ((ambig_uc_caps_est > ambig_lc_x_est) && 00282 (ambig_uc_caps_est > est_x_ht + x_ht_ok_variation)) 00283 est_caps_ht = ambig_uc_caps_est; 00284 else 00285 est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht; 00286 } 00287 if (case_ambig.get_total () > 0) 00288 improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht); 00289 est_caps_ht_certain = caps_ht.get_total () > 0; 00290 #ifndef SECURE_NAMES 00291 if (debug_x_ht_level >= 20) 00292 tprintf ("Mode20:D: Est from xht XHT:%f CAP:%f\n", 00293 est_x_ht, est_caps_ht); 00294 #endif 00295 } 00296 else if (est_caps_ht > 0) { 00297 est_caps_ht_certain = TRUE; 00298 if ((ambig_lc_x_est > 0) && 00299 (ambig_lc_x_est < est_caps_ht - x_ht_ok_variation)) 00300 est_x_ht = ambig_lc_x_est; 00301 else 00302 est_x_ht = est_caps_ht * x_ht_fraction_of_caps_ht; 00303 if (ambig_lc_x_est + ambig_uc_caps_est > 0) 00304 improve_estimate(word_res, est_x_ht, est_caps_ht, x_ht, caps_ht); 00305 est_x_ht_certain = x_ht.get_total () > 0; 00306 #ifndef SECURE_NAMES 00307 if (debug_x_ht_level >= 20) 00308 tprintf ("Mode20:E: Est from caps XHT:%f CAP:%f\n", 00309 est_x_ht, est_caps_ht); 00310 #endif 00311 } 00312 else { 00313 /* 00314 Do something based on case ambig chars alone - we have 00315 guessed that the ambigs are lower case. 00316 */ 00317 est_x_ht = ambig_lc_x_est; 00318 est_x_ht_certain = TRUE; 00319 if (ambig_uc_caps_est > ambig_lc_x_est) { 00320 est_caps_ht = ambig_uc_caps_est; 00321 est_caps_ht_certain = TRUE; 00322 } 00323 else 00324 est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht; 00325 00326 #ifndef SECURE_NAMES 00327 if (debug_x_ht_level >= 20) 00328 tprintf ("Mode20:F: Est from ambigs XHT:%f CAP:%f\n", 00329 est_x_ht, est_caps_ht); 00330 #endif 00331 } 00332 /* 00333 Check for sane interpretation of evidence: 00334 Try shifting caps ht if min certain caps ht is not significantly greater 00335 than the estimated x ht or the max certain x ht is not significantly less 00336 than the estimated caps ht. 00337 */ 00338 if (x_ht_check_est) { 00339 if ((caps_ht.get_total () > 0) && 00340 (est_x_ht + x_ht_ok_variation >= caps_ht.ile (0.0001))) { 00341 trial = TRUE; 00342 est_caps_ht = est_x_ht; 00343 est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht; 00344 00345 #ifndef SECURE_NAMES 00346 if (debug_x_ht_level >= 20) 00347 tprintf ("Mode20:G: Trial XHT:%f CAP:%f\n", 00348 est_x_ht, est_caps_ht); 00349 #endif 00350 } 00351 else if ((x_ht.get_total () > 0) && 00352 (est_caps_ht - x_ht_ok_variation <= x_ht.ile (0.9999))) { 00353 trial = TRUE; 00354 est_x_ht = est_caps_ht; 00355 est_caps_ht = est_x_ht / x_ht_fraction_of_caps_ht; 00356 #ifndef SECURE_NAMES 00357 if (debug_x_ht_level >= 20) 00358 tprintf ("Mode20:H: Trial XHT:%f CAP:%f\n", 00359 est_x_ht, est_caps_ht); 00360 #endif 00361 } 00362 } 00363 } 00364 00365 else { 00366 /* There is no sensible data so we're in the dark. */ 00367 marginally_above_x_ht = bln_x_height + 00368 x_ht_ok_variation * x_ht_sub_variation; 00369 /* 00370 If there are no rejects, or the only rejects have a narrow height, 00371 or have a small area compared to a normal char, then estimate the x-height 00372 as the original one. (I.e dont fiddle about if the only rejects look like 00373 punctuation) - we use max height as mean or median will be too low if 00374 there are only two blobs - Eg "F." 00375 */ 00376 00377 #ifndef SECURE_NAMES 00378 if (debug_x_ht_level >= 20) 00379 tprintf ("Mode20:I: In the dark\n"); 00380 #endif 00381 00382 if ((rej_blobs_count == 0) || 00383 (rej_blobs_max_height < 0.3 * max_blob_ht) || 00384 (rej_blobs_max_area < 0.3 * max_blob_ht * max_blob_ht)) { 00385 no_comment = TRUE; 00386 #ifndef SECURE_NAMES 00387 if (debug_x_ht_level >= 20) 00388 tprintf ("Mode20:J: No comment due to no rejects\n"); 00389 #endif 00390 } 00391 else if (x_ht_limit_flip_trials && 00392 ((max_blob_ht < marginally_above_x_ht) || 00393 ((ambig_lc_x_est > 0) && 00394 (ambig_lc_x_est == ambig_uc_caps_est) && 00395 (ambig_lc_x_est < marginally_above_x_ht)))) { 00396 no_comment = TRUE; 00397 #ifndef SECURE_NAMES 00398 if (debug_x_ht_level >= 20) 00399 tprintf ("Mode20:K: No comment as close to xht %f < %f\n", 00400 ambig_lc_x_est, marginally_above_x_ht); 00401 #endif 00402 } 00403 else if (x_ht_conservative_ambigs && (ambig_uc_caps_est > 0)) { 00404 trial = TRUE; 00405 est_caps_ht = ambig_lc_x_est; 00406 est_x_ht = x_ht_fraction_of_caps_ht * est_caps_ht; 00407 00408 #ifndef SECURE_NAMES 00409 if (debug_x_ht_level >= 20) 00410 tprintf ("Mode20:L: Trial XHT:%f CAP:%f\n", 00411 est_x_ht, est_caps_ht); 00412 #endif 00413 } 00414 /* 00415 If the top of the word is nowhere near where we expect ascenders to be 00416 \(less than half the x_ht \-> caps_ht distance\) 00417 00418 That is, suspect an all caps word at the x-ht & estimate x-ht 00419 accordingly, but only as a TRIAL! 00420 00421 NOTE we do NOT check location of baseline. Commas can descend as much as 00422 real descenders so we would need to do something to make sure that any 00423 disqualifying descenders were not at the end. 00424 */ 00425 else { 00426 if (max_blob_ht < 00427 (bln_x_height + bln_x_height / x_ht_fraction_of_caps_ht) / 2.0) { 00428 trial = TRUE; 00429 est_x_ht = x_ht_fraction_of_caps_ht * max_blob_ht; 00430 est_caps_ht = max_blob_ht; 00431 00432 #ifndef SECURE_NAMES 00433 if (debug_x_ht_level >= 20) 00434 tprintf ("Mode20:M: Trial XHT:%f CAP:%f\n", 00435 est_x_ht, est_caps_ht); 00436 #endif 00437 } 00438 else { 00439 no_comment = TRUE; 00440 if (debug_x_ht_level >= 20) 00441 tprintf ("Mode20:N: No comment as nothing else matched\n"); 00442 } 00443 } 00444 } 00445 00446 /* Sanity check - reject word if fails */ 00447 if (!no_comment && 00448 ((est_x_ht > 2 * bln_x_height) || 00449 (est_x_ht / word_res->denorm.scale () <= min_sane_x_ht_pixels) || 00450 (est_caps_ht <= est_x_ht) || (est_caps_ht >= 2.5 * est_x_ht))) { 00451 no_comment = TRUE; 00452 if (!trial && rej_use_xht) { 00453 if (debug_x_ht_level >= 2) { 00454 tprintf ("Sanity check rejecting %s ", word_str); 00455 word_res->reject_map.print (debug_fp); 00456 tprintf ("\n"); 00457 } 00458 word_res->reject_map.rej_word_xht_fixup (); 00459 00460 } 00461 if (debug_x_ht_level >= 20) 00462 tprintf ("Mode20:O: No comment as nothing else matched\n"); 00463 } 00464 00465 if (no_comment || trial) { 00466 word_res->x_height = bln_x_height / word_res->denorm.scale (); 00467 word_res->guessed_x_ht = TRUE; 00468 word_res->caps_height = (bln_x_height / x_ht_fraction_of_caps_ht) / 00469 word_res->denorm.scale (); 00470 word_res->guessed_caps_ht = TRUE; 00471 /* 00472 Reject ambigs in the current word if we are uncertain and: 00473 \li there are rejects OR 00474 \li there is only one char which is an ambig OR 00475 \li there is conflict between the case of the ambigs even though there is 00476 \li no height separation Eg "Ms" recognised from "MS" 00477 */ 00478 if (rej_trial_ambigs && 00479 ((word_res->reject_map.reject_count () > 0) || 00480 (word_res->reject_map.length () == 1) || 00481 ((x_ht_ambigs > 0) && (caps_ht_ambigs > 0)))) { 00482 #ifndef SECURE_NAMES 00483 if (debug_x_ht_level >= 2) { 00484 tprintf ("TRIAL Rej Ambigs %s ", word_str); 00485 word_res->reject_map.print (debug_fp); 00486 } 00487 #endif 00488 reject_ambigs(word_res); 00489 if (debug_x_ht_level >= 2) { 00490 tprintf (" "); 00491 word_res->reject_map.print (debug_fp); 00492 tprintf ("\n"); 00493 } 00494 } 00495 } 00496 else { 00497 word_res->x_height = est_x_ht / word_res->denorm.scale (); 00498 word_res->guessed_x_ht = !est_x_ht_certain; 00499 word_res->caps_height = est_caps_ht / word_res->denorm.scale (); 00500 word_res->guessed_caps_ht = !est_caps_ht_certain; 00501 } 00502 00503 if (!no_comment && (fabs (est_x_ht - bln_x_height) > x_ht_ok_variation)) 00504 *trial_x_ht = est_x_ht / word_res->denorm.scale (); 00505 else 00506 *trial_x_ht = 0.0; 00507 00508 #ifndef SECURE_NAMES 00509 if (((*trial_x_ht > 0) && (debug_x_ht_level >= 3)) || 00510 (debug_x_ht_level >= 5)) { 00511 tprintf ("%s ", word_str); 00512 word_res->reject_map.print (debug_fp); 00513 tprintf 00514 (" X:%0.2f Cps:%0.2f Mxht:%0.2f RJ MxHt:%d MxAr:%d Rematch:%c\n", 00515 est_x_ht, est_caps_ht, max_blob_ht, rej_blobs_max_height, 00516 rej_blobs_max_area, *trial_x_ht > 0 ? '*' : ' '); 00517 } 00518 #endif 00519 00520 }
void reject_ambigs | ( | WERD_RES * | word | ) |
Reject any accepted xht ambig chars in word
Definition at line 722 of file fixxht.cpp.
References WERD_RES::best_choice, and WERD_RES::reject_map.
Referenced by re_estimate_x_ht().
00723 { 00724 const char *word_str; 00725 int i = 0; 00726 00727 word_str = word->best_choice->string ().string (); 00728 while (*word_str != '\0') { 00729 if (STRING (chs_ambig_caps_x).contains (*word_str)) 00730 word->reject_map[i].setrej_xht_fixup (); 00731 word_str++; 00732 i++; 00733 } 00734 }