00001
00020 #ifndef REJECT_H
00021 #define REJECT_H
00022
00023 #include "varable.h"
00024 #include "pageres.h"
00025 #include "notdll.h"
00026
00029 extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm");
00030 extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm");
00031 extern BOOL_VAR_H (tessedit_use_nn, TRUE, "");
00032 extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug");
00033 extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats");
00034 extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips");
00035 extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5,
00036 "Aspect ratio dot/hyphen test");
00037 extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8,
00038 "Aspect ratio dot/hyphen test");
00039 extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE,
00040 "Use DOC dawg in 11l conf. detector");
00041 extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test");
00042 extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check");
00043 extern BOOL_VAR_H (one_ell_conflict_default, TRUE,
00044 "one_ell_conflict default");
00045 extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?");
00046 extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?");
00047 extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?");
00048 extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches");
00049 extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check");
00050 extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE,
00051 "Double check for confusions");
00052 extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts");
00053 extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts");
00054 extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts");
00055 extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check");
00056 extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check");
00057 extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check");
00058 extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE,
00059 "Require stronger NN match");
00060 extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score");
00061 extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? ");
00062 extern INT_VAR_H (nn_conf_initial_i_level, 3,
00063 "NN accept initial Ii match level ");
00064 extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE,
00065 "Dubious chars next to reject?");
00066 extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE,
00067 "Stop unrej of non A/N wds?");
00068 extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?");
00069 extern BOOL_VAR_H (rej_use_tess_accepted, TRUE,
00070 "Individual rejection control");
00071 extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control");
00072 extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control");
00073 extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check");
00074 extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check");
00075 extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85,
00076 "if >this fract");
00077 extern INT_VAR_H (rej_mostly_reject_mode, 1,
00078 "0-never, 1-afterNN, 2-after new xht");
00079 extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2,
00080 "if >this fract then reject");
00081 extern INT_VAR_H (net_image_width, 40, "NN input image width");
00082 extern INT_VAR_H (net_image_height, 36, "NN input image height");
00083 extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height");
00084 extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit");
00085 extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes");
00086 extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score");
00087 extern double_VAR_H (nn_reject_head_and_shoulders, 0.6,
00088 "top scores sep factor");
00089 extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075",
00090 "Allow NN to unrej");
00091 extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075",
00092 "Allow NN to unrej");
00093 extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
00094 extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set");
00095 extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set");
00096 extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"",
00097 "Unreliable chars");
00098 extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"",
00099 "Unreliable chars");
00100 extern INT_VAR_H (min_sane_x_ht_pixels, 8,
00101 "Reject any x-ht lt or eq than this");
00104 void set_done(
00105 WERD_RES *word,
00106 INT16 pass);
00107 void make_reject_map(
00108 WERD_RES *word,
00109 BLOB_CHOICE_LIST_CLIST *blob_choices,
00110 ROW *row,
00111 INT16 pass
00112 );
00113 void reject_blanks(WERD_RES *word);
00114 void reject_I_1_L(WERD_RES *word);
00115
00116 void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices);
00117 float compute_reject_threshold(
00118 BLOB_CHOICE_LIST_CLIST *blob_choices);
00119 int sort_floats(
00120 const void *arg1,
00121 const void *arg2);
00122 void reject_edge_blobs(WERD_RES *word);
00123 BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
00124 INT16 first_alphanum_pos(const char *word);
00125 INT16 alpha_count(const char *word);
00126 BOOL8 word_contains_non_1_digit(const char *word);
00127 BOOL8 test_ambig_word(
00128 WERD_RES *word);
00129
00130 BOOL8 ambig_word(const char *start_word,
00131 char *temp_word,
00132 INT16 test_char_pos
00133 );
00134 const char *char_ambiguities(char c);
00135
00136 #ifndef EMBEDDED
00137 void test_ambigs(const char *word);
00138 #endif
00139
00140 void nn_recover_rejects(WERD_RES *word, ROW *row);
00141 void nn_match_word(
00142 WERD_RES *word,
00143 ROW *row);
00144
00145 INT16 nn_match_char(IMAGE &scaled_image,
00146 float baseline_pos,
00147 BOOL8 dict_word,
00148 BOOL8 checked_dict_word,
00149 BOOL8 sensible_word,
00150 BOOL8 centre,
00151 BOOL8 good_quality_word,
00152 char tess_ch
00153 );
00154 INT16 evaluate_net_match(char top,
00155 float top_score,
00156 char next,
00157 float next_score,
00158 char tess_ch,
00159 BOOL8 dict_word,
00160 BOOL8 checked_dict_word,
00161 BOOL8 sensible_word,
00162 BOOL8 centre,
00163 BOOL8 good_quality_word);
00164 void dont_allow_dubious_chars(WERD_RES *word);
00165
00166 void dont_allow_1Il(WERD_RES *word);
00167
00168 INT16 count_alphanums(
00169 WERD_RES *word);
00170 void reject_mostly_rejects(
00171 WERD_RES *word);
00172 BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
00173 BOOL8 repeated_ch_string(const char *rep_ch_str);
00174 INT16 safe_dict_word(const char *s);
00175 void flip_hyphens(WERD_RES *word);
00176 void flip_0O(WERD_RES *word);
00177 BOOL8 non_O_upper(char c);
00178 BOOL8 non_0_digit(char c);
00179 #endif