ccmain/reject.h

Go to the documentation of this file.
00001 
00020 #ifndef           REJECT_H
00021 #define           REJECT_H
00022 
00023 #include          "varable.h"
00024 #include          "pageres.h"
00025 #include          "notdll.h"
00026 
00029 extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm");
00030 extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm");
00031 extern BOOL_VAR_H (tessedit_use_nn, TRUE, "");
00032 extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug");
00033 extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats");
00034 extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips");
00035 extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5,
00036 "Aspect ratio dot/hyphen test");
00037 extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8,
00038 "Aspect ratio dot/hyphen test");
00039 extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE,
00040 "Use DOC dawg in 11l conf. detector");
00041 extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test");
00042 extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check");
00043 extern BOOL_VAR_H (one_ell_conflict_default, TRUE,
00044 "one_ell_conflict default");
00045 extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?");
00046 extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?");
00047 extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?");
00048 extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches");
00049 extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check");
00050 extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE,
00051 "Double check for confusions");
00052 extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts");
00053 extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts");
00054 extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts");
00055 extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check");
00056 extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check");
00057 extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check");
00058 extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE,
00059 "Require stronger NN match");
00060 extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score");
00061 extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? ");
00062 extern INT_VAR_H (nn_conf_initial_i_level, 3,
00063 "NN accept initial Ii match level ");
00064 extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE,
00065 "Dubious chars next to reject?");
00066 extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE,
00067 "Stop unrej of non A/N wds?");
00068 extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?");
00069 extern BOOL_VAR_H (rej_use_tess_accepted, TRUE,
00070 "Individual rejection control");
00071 extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control");
00072 extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control");
00073 extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check");
00074 extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check");
00075 extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85,
00076 "if >this fract");
00077 extern INT_VAR_H (rej_mostly_reject_mode, 1,
00078 "0-never, 1-afterNN, 2-after new xht");
00079 extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2,
00080 "if >this fract then reject");
00081 extern INT_VAR_H (net_image_width, 40, "NN input image width");
00082 extern INT_VAR_H (net_image_height, 36, "NN input image height");
00083 extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height");
00084 extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit");
00085 extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes");
00086 extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score");
00087 extern double_VAR_H (nn_reject_head_and_shoulders, 0.6,
00088 "top scores sep factor");
00089 extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075",
00090 "Allow NN to unrej");
00091 extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075",
00092 "Allow NN to unrej");
00093 extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
00094 extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set");
00095 extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set");
00096 extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"",
00097 "Unreliable chars");
00098 extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"",
00099 "Unreliable chars");
00100 extern INT_VAR_H (min_sane_x_ht_pixels, 8,
00101 "Reject any x-ht lt or eq than this");
00104 void set_done(  //set done flag
00105               WERD_RES *word,
00106               INT16 pass);
00107 void make_reject_map(            //make rej map for wd //detailed results
00108                      WERD_RES *word,
00109                      BLOB_CHOICE_LIST_CLIST *blob_choices,
00110                      ROW *row,
00111                      INT16 pass  //1st or 2nd?
00112                     );
00113 void reject_blanks(WERD_RES *word); 
00114 void reject_I_1_L(WERD_RES *word); 
00115                                  //detailed results
00116 void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices); 
00117 float compute_reject_threshold(  //compute threshold //detailed results
00118                                BLOB_CHOICE_LIST_CLIST *blob_choices);
00119 int sort_floats(                   //qsort function
00120                 const void *arg1,  //ptrs to floats
00121                 const void *arg2);
00122 void reject_edge_blobs(WERD_RES *word); 
00123 BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map); 
00124 INT16 first_alphanum_pos(const char *word); 
00125 INT16 alpha_count(const char *word); 
00126 BOOL8 word_contains_non_1_digit(const char *word); 
00127 BOOL8 test_ambig_word(  //test for ambiguity
00128                       WERD_RES *word);
00129                                  //original word
00130 BOOL8 ambig_word(const char *start_word,
00131                  char *temp_word,     //alterable copy
00132                  INT16 test_char_pos  //idx to char to alter
00133                 );
00134 const char *char_ambiguities(char c); 
00135 
00136 #ifndef EMBEDDED
00137 void test_ambigs(const char *word);
00138 #endif
00139  
00140 void nn_recover_rejects(WERD_RES *word, ROW *row); 
00141 void nn_match_word(  //Match a word
00142                    WERD_RES *word,
00143                    ROW *row);
00144                                  //of character
00145 INT16 nn_match_char(IMAGE &scaled_image,
00146                     float baseline_pos,       //rel to scaled_image
00147                     BOOL8 dict_word,          //part of dict wd?
00148                     BOOL8 checked_dict_word,  //part of dict wd?
00149                     BOOL8 sensible_word,      //part acceptable str?
00150                     BOOL8 centre,             //not at word ends?
00151                     BOOL8 good_quality_word,  //initial segmentation
00152                     char tess_ch              //confirm this?
00153                    );
00154 INT16 evaluate_net_match(char top,
00155                          float top_score,
00156                          char next,
00157                          float next_score,
00158                          char tess_ch,
00159                          BOOL8 dict_word,
00160                          BOOL8 checked_dict_word,
00161                          BOOL8 sensible_word,
00162                          BOOL8 centre,
00163                          BOOL8 good_quality_word);
00164 void dont_allow_dubious_chars(WERD_RES *word); 
00165 
00166 void dont_allow_1Il(WERD_RES *word); 
00167 
00168 INT16 count_alphanums(  //how many alphanums
00169                       WERD_RES *word);
00170 void reject_mostly_rejects(  //rej all if most rejectd
00171                            WERD_RES *word);
00172 BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row); 
00173 BOOL8 repeated_ch_string(const char *rep_ch_str); 
00174 INT16 safe_dict_word(const char *s); 
00175 void flip_hyphens(WERD_RES *word); 
00176 void flip_0O(WERD_RES *word); 
00177 BOOL8 non_O_upper(char c); 
00178 BOOL8 non_0_digit(char c); 
00179 #endif

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1