ccmain/control.h

Go to the documentation of this file.
00001 
00020 #ifndef           CONTROL_H
00021 #define           CONTROL_H
00022 
00023 #include          "varable.h"
00024 #include          "ocrblock.h"
00025 //#include                                      "epapdest.h"
00026 #include          "ratngs.h"
00027 #include          "statistc.h"
00028 //#include                                      "epapconv.h"
00029 #include          "ocrshell.h"
00030 #include          "pageres.h"
00031 #include          "charsample.h"
00032 #include          "notdll.h"
00033 
00034 
00039 enum ACCEPTABLE_WERD_TYPE
00040 {
00041   AC_UNACCEPTABLE,               
00042   AC_LOWER_CASE,                 
00043   AC_UPPER_CASE,                 
00044   AC_INITIAL_CAP,                
00045   AC_LC_ABBREV,                  
00046   AC_UC_ABBREV                   
00047 };
00048 
00049 typedef BOOL8 (*BLOB_REJECTOR) (PBLOB *, BLOB_CHOICE_IT *, void *);
00050 
00051 extern INT_VAR_H (tessedit_single_match, FALSE, "Top choice only from CP");
00052 //extern BOOL_VAR_H(tessedit_small_match,FALSE,"Use small matrix matcher");
00053 extern BOOL_VAR_H (tessedit_print_text, FALSE, "Write text to stdout");
00054 extern BOOL_VAR_H (tessedit_draw_words, FALSE, "Draw source words");
00055 extern BOOL_VAR_H (tessedit_draw_outwords, FALSE, "Draw output words");
00056 extern BOOL_VAR_H (tessedit_training_wiseowl, FALSE, "Call WO to learn blobs");
00057 extern BOOL_VAR_H (tessedit_training_tess, FALSE, "Call Tess to learn blobs");
00058 extern BOOL_VAR_H (tessedit_matcher_is_wiseowl, FALSE, "Call WO to classify");
00059 extern BOOL_VAR_H (tessedit_dump_choices, FALSE, "Dump char choices");
00060 extern BOOL_VAR_H (tessedit_fix_fuzzy_spaces, TRUE,
00061 "Try to improve fuzzy spaces");
00062 extern BOOL_VAR_H (tessedit_unrej_any_wd, FALSE,
00063 "Dont bother with word plausibility");
00064 extern BOOL_VAR_H (tessedit_fix_hyphens, TRUE, "Crunch double hyphens?");
00065 extern BOOL_VAR_H (tessedit_reject_fullstops, FALSE, "Reject all fullstops");
00066 extern BOOL_VAR_H (tessedit_reject_suspect_fullstops, FALSE,
00067 "Reject suspect fullstops");
00068 extern BOOL_VAR_H (tessedit_redo_xheight, TRUE, "Check/Correct x-height");
00069 extern BOOL_VAR_H (tessedit_cluster_adaption_on, TRUE,
00070 "Do our own adaption - ems only");
00071 extern BOOL_VAR_H (tessedit_enable_doc_dict, TRUE,
00072 "Add words to the document dictionary");
00073 extern BOOL_VAR_H (word_occ_first, FALSE, "Do word occ before re-est xht");
00074 extern BOOL_VAR_H (tessedit_xht_fiddles_on_done_wds, TRUE,
00075 "Apply xht fix up even if done");
00076 extern BOOL_VAR_H (tessedit_xht_fiddles_on_no_rej_wds, TRUE,
00077 "Apply xht fix up even in no rejects");
00078 extern INT_VAR_H (x_ht_check_word_occ, 2, "Check Char Block occupancy");
00079 extern INT_VAR_H (x_ht_stringency, 1, "How many confirmed a/n to accept?");
00080 extern BOOL_VAR_H (x_ht_quality_check, TRUE, "Dont allow worse quality");
00081 extern BOOL_VAR_H (tessedit_debug_block_rejection, FALSE, "Block and Row stats");
00082 extern INT_VAR_H (debug_x_ht_level, 0, "Reestimate debug");
00083 extern BOOL_VAR_H (rej_use_xht, TRUE, "Individual rejection control");
00084 extern BOOL_VAR_H (debug_acceptable_wds, FALSE, "Dump word pass/fail chk");
00085 extern STRING_VAR_H (chs_leading_punct, "('`\"", "Leading punctuation");
00086 extern STRING_VAR_H (chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation");
00087 extern STRING_VAR_H (chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation");
00088 extern double_VAR_H (quality_rej_pc, 0.08,
00089 "good_quality_doc lte rejection limit");
00090 extern double_VAR_H (quality_blob_pc, 0.0,
00091 "good_quality_doc gte good blobs limit");
00092 extern double_VAR_H (quality_outline_pc, 1.0,
00093 "good_quality_doc lte outline error limit");
00094 extern double_VAR_H (quality_char_pc, 0.95,
00095 "good_quality_doc gte good char limit");
00096 extern INT_VAR_H (quality_min_initial_alphas_reqd, 2,
00097 "alphas in a good word");
00098 extern BOOL_VAR_H (tessedit_tess_adapt_to_rejmap, FALSE,
00099 "Use reject map to control Tesseract adaption");
00100 extern INT_VAR_H (tessedit_tess_adaption_mode, 3,
00101 "Adaptation decision algorithm for tess");
00102 extern INT_VAR_H (tessedit_em_adaption_mode, 62,
00103 "Adaptation decision algorithm for ems matrix matcher");
00104 extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass1, FALSE,
00105 "Adapt using clusterer after pass 1");
00106 extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass2, FALSE,
00107 "Adapt using clusterer after pass 1");
00108 extern BOOL_VAR_H (tessedit_cluster_adapt_after_pass3, FALSE,
00109 "Adapt using clusterer after pass 1");
00110 extern BOOL_VAR_H (tessedit_cluster_adapt_before_pass1, FALSE,
00111 "Adapt using clusterer before Tess adaping during pass 1");
00112 extern INT_VAR_H (tessedit_cluster_adaption_mode, 0,
00113 "Adaptation decision algorithm for matrix matcher");
00114 extern BOOL_VAR_H (tessedit_adaption_debug, FALSE,
00115 "Generate and print debug information for adaption");
00116 extern BOOL_VAR_H (tessedit_minimal_rej_pass1, FALSE,
00117 "Do minimal rejection on pass 1 output");
00118 extern BOOL_VAR_H (tessedit_test_adaption, FALSE,
00119 "Test adaption criteria");
00120 extern BOOL_VAR_H (tessedit_global_adaption, FALSE,
00121 "Adapt to all docs over time");
00122 extern BOOL_VAR_H (tessedit_matcher_log, FALSE, "Log matcher activity");
00123 extern INT_VAR_H (tessedit_test_adaption_mode, 3,
00124 "Adaptation decision algorithm for tess");
00125 extern BOOL_VAR_H (test_pt, FALSE, "Test for point");
00126 extern double_VAR_H (test_pt_x, 99999.99, "xcoord");
00127 extern double_VAR_H (test_pt_y, 99999.99, "ycoord");
00128 void recog_pseudo_word(                         //recognize blobs
00129                        BLOCK_LIST *block_list,  //blocks to check
00130                        BOX &selection_box);
00131 BOOL8 recog_interactive(            //recognize blobs
00132                         BLOCK *,    //block
00133                         ROW *row,   //row of word
00134                         WERD *word  //word to recognize
00135                        );
00136 void recog_all_words(                              //process words
00137                      PAGE_RES *page_res,           //page structure
00138                      volatile ETEXT_DESC *monitor  //progress monitor
00139                     );
00140 void classify_word_pass1(                 //recog one word
00141                          WERD_RES *word,  //word to do
00142                          ROW *row,
00143                          BOOL8 cluster_adapt,
00144                          CHAR_SAMPLES_LIST *char_clusters,
00145                          CHAR_SAMPLE_LIST *chars_waiting);
00146                                  //word to do
00147 void classify_word_pass2(WERD_RES *word, ROW *row); 
00148 void match_word_pass2(                 //recog one word
00149                       WERD_RES *word,  //word to do
00150                       ROW *row,
00151                       float x_height);
00152 void fix_rep_char(                //Repeated char word
00153                   WERD_RES *word  //word to do
00154                  );
00155 void fix_quotes(               //make double quotes
00156                 char *string,  //string to fix
00157                 WERD *word,    //word to do //char choices
00158                 BLOB_CHOICE_LIST_CLIST *blob_choices);
00159 void fix_hyphens(               //crunch double hyphens
00160                  char *string,  //string to fix
00161                  WERD *word,    //word to do //char choices
00162                  BLOB_CHOICE_LIST_CLIST *blob_choices);
00163 void merge_blobs(               //combine 2 blobs
00164                  PBLOB *blob1,  //dest blob
00165                  PBLOB *blob2   //source blob
00166                 );
00167 void choice_dump_tester(                           //dump chars in word
00168                         PBLOB *,                   //blob
00169                         DENORM *,                  //de-normaliser
00170                         BOOL8 correct,             //ly segmented
00171                         char *text,                //correct text
00172                         INT32 count,               //chars in text
00173                         BLOB_CHOICE_LIST *ratings  //list of results
00174                        );
00175 WERD *make_bln_copy(WERD *src_word, ROW *row, float x_height, DENORM *denorm); 
00176 ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s); 
00177 BOOL8 check_debug_pt(WERD_RES *word, int location); 
00178 void set_word_fonts(                 //good chars in word
00179                     WERD_RES *word,  //word to adapt to //detailed results
00180                     BLOB_CHOICE_LIST_CLIST *blob_choices);
00181 void font_recognition_pass(  //good chars in word
00182                            PAGE_RES_IT &page_res_it);
00183 void add_in_one_row(               //good chars in word
00184                     ROW_RES *row,  //current row
00185                     STATS *fonts,  //font stats
00186                     INT8 *italic,  //output count
00187                     INT8 *bold     //output count
00188                    );
00189 void find_modal_font(                  //good chars in word
00190                      STATS *fonts,     //font stats
00191                      INT8 *font_out,   //output font
00192                      INT8 *font_count  //output count
00193                     );
00194 #endif

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1