ccmain/adaptions.h

Go to the documentation of this file.
00001 
00020 #ifndef           ADAPTIONS_H
00021 #define           ADAPTIONS_H
00022 
00023 #include          "charsample.h"
00024 #include          "charcut.h"
00025 #include          "notdll.h"
00026 
00029 extern BOOL_VAR_H (tessedit_reject_ems, FALSE, "Reject all m's");
00030 extern BOOL_VAR_H (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's");
00031 extern double_VAR_H (tessedit_cluster_t1, 0.20,
00032 "t1 threshold for clustering samples");
00033 extern double_VAR_H (tessedit_cluster_t2, 0.40,
00034 "t2 threshold for clustering samples");
00035 extern double_VAR_H (tessedit_cluster_t3, 0.12,
00036 "Extra threshold for clustering samples, only keep a new sample if best score greater than this value");
00037 extern double_VAR_H (tessedit_cluster_accept_fraction, 0.80,
00038 "Largest fraction of characters in cluster for it to be used for adaption");
00039 extern INT_VAR_H (tessedit_cluster_min_size, 3,
00040 "Smallest number of samples in a cluster for it to be used for adaption");
00041 extern BOOL_VAR_H (tessedit_cluster_debug, FALSE,
00042 "Generate and print debug information for adaption by clustering");
00043 extern BOOL_VAR_H (tessedit_use_best_sample, FALSE,
00044 "Use best sample from cluster when adapting");
00045 extern BOOL_VAR_H (tessedit_test_cluster_input, FALSE,
00046 "Set reject map to enable cluster input to be measured");
00047 extern BOOL_VAR_H (tessedit_matrix_match, TRUE, "Use matrix matcher");
00048 extern BOOL_VAR_H (tessedit_old_matrix_match, FALSE, "Use matrix matcher");
00049 extern BOOL_VAR_H (tessedit_mm_use_non_adaption_set, FALSE,
00050 "Don't try to adapt to characters on this list");
00051 extern STRING_VAR_H (tessedit_non_adaption_set, ",.;:'~@*",
00052 "Characters to be avoided when adapting");
00053 extern BOOL_VAR_H (tessedit_mm_adapt_using_prototypes, TRUE,
00054 "Use prototypes when adapting");
00055 extern BOOL_VAR_H (tessedit_mm_use_prototypes, TRUE,
00056 "Use prototypes as clusters are built");
00057 extern BOOL_VAR_H (tessedit_mm_use_rejmap, FALSE,
00058 "Adapt to characters using reject map");
00059 extern BOOL_VAR_H (tessedit_mm_all_rejects, FALSE,
00060 "Adapt to all characters using, matrix matcher");
00061 extern BOOL_VAR_H (tessedit_mm_only_match_same_char, FALSE,
00062 "Only match samples against clusters for the same character");
00063 extern BOOL_VAR_H (tessedit_process_rns, FALSE, "Handle m - rn ambigs");
00064 extern BOOL_VAR_H (tessedit_demo_adaption, FALSE,
00065 "Display cut images and matrix match for demo purposes");
00066 extern INT_VAR_H (tessedit_demo_word1, 62,
00067 "Word number of first word to display");
00068 extern INT_VAR_H (tessedit_demo_word2, 64,
00069 "Word number of second word to display");
00070 extern STRING_VAR_H (tessedit_demo_file, "academe",
00071 "Name of document containing demo words");
00073 BOOL8 word_adaptable(  //should we adapt?
00074                      WERD_RES *word,
00075                      UINT16 mode);
00076 void collect_ems_for_adaption(WERD_RES *word,
00077                               CHAR_SAMPLES_LIST *char_clusters,
00078                               CHAR_SAMPLE_LIST *chars_waiting);
00079 void collect_characters_for_adaption(WERD_RES *word,
00080                                      CHAR_SAMPLES_LIST *char_clusters,
00081                                      CHAR_SAMPLE_LIST *chars_waiting);
00082 void cluster_sample(CHAR_SAMPLE *sample,
00083                     CHAR_SAMPLES_LIST *char_clusters,
00084                     CHAR_SAMPLE_LIST *chars_waiting);
00085 void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
00086                      CHAR_SAMPLE *sample,
00087                      CHAR_SAMPLES *best_cluster);
00088 void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
00089                          CHAR_SAMPLE_LIST *chars_waiting);
00090 void adapt_to_good_ems(WERD_RES *word,
00091                        CHAR_SAMPLES_LIST *char_clusters,
00092                        CHAR_SAMPLE_LIST *chars_waiting);
00093 void adapt_to_good_samples(WERD_RES *word,
00094                            CHAR_SAMPLES_LIST *char_clusters,
00095                            CHAR_SAMPLE_LIST *chars_waiting);
00096 void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,
00097                     CHAR_SAMPLE_LIST *chars_waiting);
00098                                  //lines of the image
00099 CHAR_SAMPLE *clip_sample(PIXROW *pixrow,
00100                          IMAGELINE *imlines,
00101                          BOX pix_box,  //box of imlines extent
00102                          BOOL8 white_on_black,
00103                          char c);
00104 void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters); 
00105 void reject_all_ems(WERD_RES *word); 
00106 void reject_all_fullstops(WERD_RES *word); 
00107 void reject_suspect_ems(WERD_RES *word); 
00108 void reject_suspect_fullstops(WERD_RES *word); 
00109 BOOL8 suspect_em(WERD_RES *word, INT16 index); 
00110 BOOL8 suspect_fullstop(WERD_RES *word, INT16 i); 
00111 #endif

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1