#include "choicearr.h"
Go to the source code of this file.
#define RATING_PAD 4.0 |
Definition at line 37 of file permute.h.
Referenced by adjust_non_word(), adjust_number(), and adjust_word().
void add_document_word | ( | A_CHOICE * | best_choice | ) |
Add a word found on this document to the document specific dictionary.
Definition at line 928 of file permute.cpp.
References add_word_to_dawg(), case_sensative, CHARS_PER_LINE, class_certainty, class_string, CurrentWordAmbig(), document_words, FALSE, good_choice(), imagefile, MAX_DOC_EDGES, open_file(), pending_words, RESERVED_DOC_EDGES, valid_word(), and word_in_dawg().
Referenced by tess_add_doc_word().
00928 { 00929 char filename[CHARS_PER_LINE]; 00930 FILE *doc_word_file; 00931 char *string; 00932 int stringlen; //length of word 00933 00934 string = class_string (best_choice); 00935 stringlen = strlen (string); 00936 00937 if (!doc_dict_enable 00938 || valid_word (string) || CurrentWordAmbig () || stringlen < 2) 00939 return; 00940 00941 if (!good_choice (best_choice) || stringlen == 2) { 00942 if (class_certainty (best_choice) < permuter_pending_threshold) 00943 return; 00944 if (!word_in_dawg (pending_words, string)) { 00945 if (stringlen > 2 || isupper (string[0]) && isupper (string[1])) 00946 add_word_to_dawg(pending_words, 00947 string, 00948 MAX_DOC_EDGES, 00949 RESERVED_DOC_EDGES); 00950 return; 00951 } 00952 } 00953 00954 if (save_doc_words) { 00955 strcpy(filename, imagefile); 00956 strcat (filename, ".doc"); 00957 doc_word_file = open_file (filename, "a"); 00958 fprintf (doc_word_file, "%s\n", string); 00959 fclose(doc_word_file); 00960 } 00961 add_word_to_dawg(document_words, string, MAX_DOC_EDGES, RESERVED_DOC_EDGES); 00962 case_sensative = FALSE; 00963 }
void adjust_non_word | ( | A_CHOICE * | best_choice, | |
float | certainties[] | |||
) |
Assign an adjusted value to a string that is a non-word; where the value of that word is based on case and punctuation rules.
Definition at line 971 of file permute.cpp.
References adjust_debug, case_ok(), class_probability, class_string, cprintf(), garbage, LogNewWordChoice(), non_word, punctuation_ok(), and RATING_PAD.
Referenced by permute_top_choice().
00971 { 00972 char *this_word; 00973 float adjust_factor; 00974 00975 if (adjust_debug) 00976 cprintf ("%s %4.2f ", 00977 class_string (best_choice), class_probability (best_choice)); 00978 00979 this_word = class_string (best_choice); 00980 00981 class_probability (best_choice) += RATING_PAD; 00982 if (case_ok (this_word) && punctuation_ok (this_word) != -1) { 00983 class_probability (best_choice) *= non_word; 00984 adjust_factor = non_word; 00985 if (adjust_debug) 00986 cprintf (", %4.2f ", non_word); 00987 } 00988 else { 00989 class_probability (best_choice) *= garbage; 00990 adjust_factor = garbage; 00991 if (adjust_debug) { 00992 if (!case_ok (this_word)) 00993 cprintf (", C"); 00994 if (punctuation_ok (this_word) == -1) 00995 cprintf (", P"); 00996 cprintf (", %4.2f ", garbage); 00997 } 00998 } 00999 01000 class_probability (best_choice) -= RATING_PAD; 01001 01002 LogNewWordChoice(best_choice, adjust_factor, certainties); 01003 01004 if (adjust_debug) 01005 cprintf (" --> %4.2f\n", class_probability (best_choice)); 01006 }
char choose_il1 | ( | char | first_char, | |
char | second_char, | |||
char | third_char, | |||
char | prev_char, | |||
char | next_char, | |||
char | next_next_char | |||
) |
Choose between the candidate il1 chars.
first_char | first choice | |
second_char | second choice | |
third_char | third choice | |
prev_char | prev in word | |
next_char | next in word | |
next_next_char | after next next in word |
Definition at line 1522 of file permute.cpp.
References bigram_counts.
Referenced by permute_top_choice().
01527 { 01528 INT32 type1; //1/I/l type of first choice 01529 INT32 type2; //1/I/l type of second choice 01530 INT32 type3; //1/I/l type of third choice 01531 01532 if (first_char == 'l' && second_char != '\0') { 01533 if (second_char == 'I' 01534 && (isupper (prev_char) && !islower (next_char) 01535 && !isdigit (next_char) || isupper (next_char) 01536 && !islower (prev_char) && !isdigit (prev_char))) 01537 first_char = second_char; //override 01538 else if (second_char == '1' || third_char == '1') { 01539 if (isdigit (next_char) || isdigit (prev_char) 01540 || next_char == 'l' && isdigit (next_next_char)) { 01541 first_char = '1'; 01542 } 01543 else if (!islower (prev_char) 01544 && (!islower (next_char) || next_char == 's' 01545 && next_next_char == 't')) { 01546 if ((prev_char != '\'' && prev_char != '`' || next_char != '\0') 01547 && (next_char != '\'' && next_char != '`' 01548 || prev_char != '\0')) { 01549 first_char = '1'; 01550 } 01551 } 01552 } 01553 if (first_char == 'l' && next_char != '\0' && !isalpha (prev_char)) { 01554 type1 = 2; 01555 01556 if (second_char == '1') 01557 type2 = 0; 01558 else if (second_char == 'I') 01559 type2 = 1; 01560 else if (second_char == 'l') 01561 type2 = 2; 01562 else 01563 type2 = type1; 01564 01565 if (third_char == '1') 01566 type3 = 0; 01567 else if (third_char == 'I') 01568 type3 = 1; 01569 else if (third_char == 'l') 01570 type3 = 2; 01571 else 01572 type3 = type1; 01573 01574 if (bigram_counts[next_char][type2] > 01575 bigram_counts[next_char][type1]) { 01576 first_char = second_char; 01577 type1 = type2; 01578 } 01579 if (bigram_counts[next_char][type3] > 01580 bigram_counts[next_char][type1]) { 01581 first_char = third_char; 01582 } 01583 } 01584 } 01585 return first_char; 01586 }
void end_permute | ( | ) |
Free memory used for DAWG operations.
Definition at line 1056 of file permute.cpp.
References document_words, memfree(), NULL, pending_words, user_words, and word_dawg.
Referenced by program_editdown().
01056 { 01057 memfree(word_dawg); 01058 word_dawg = NULL; 01059 memfree(document_words); 01060 document_words = NULL; 01061 memfree(pending_words); 01062 pending_words = NULL; 01063 memfree(user_words); 01064 user_words = NULL; 01065 }
void init_permute | ( | ) |
Initialize anything that needs to be set up for the permute functions.
FIX: Later, tries changing iffy letters in a word to see if those changes fit another word better? Also, loads user dictionary for improving user-specific word recognition, eg: medical terms, etc.
Definition at line 1016 of file permute.cpp.
References case_sensative, cprintf(), demodir, document_words, FALSE, init_permdawg(), init_permnum(), initialize_dawg(), MAX_DOC_EDGES, MAX_NUM_EDGES, MAX_USER_EDGES, memalloc(), pending_words, read_squished_dawg(), read_word_list(), USER_RESERVED_EDGES, user_words, and word_dawg.
Referenced by init_ms_debug().
01016 { 01017 char name[1024]; 01018 make_adjust_debug(); 01019 make_compound_debug(); 01020 make_non_word(); 01021 make_garbage(); 01022 make_doc_words(); 01023 make_doc_dict(); 01024 01025 init_permdawg(); 01026 init_permnum(); 01027 01028 #ifdef TEXT_VERBOSE 01029 // gets a 'g', see ccmain/tesseractmain.dox 01030 cprintf("g"); 01031 #endif 01032 01033 word_dawg = (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_NUM_EDGES); 01034 strcpy(name, demodir); 01035 strcat (name, "tessdata/word-dawg"); 01036 read_squished_dawg(name, word_dawg, MAX_NUM_EDGES); 01037 01038 document_words = 01039 (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES); 01040 initialize_dawg(document_words, MAX_DOC_EDGES); 01041 01042 pending_words = 01043 (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES); 01044 initialize_dawg(pending_words, MAX_DOC_EDGES); 01045 01046 user_words = (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_USER_EDGES); 01047 strcpy(name, demodir); 01048 strcat (name, "tessdata/user-words"); 01049 read_word_list(name, user_words, MAX_USER_EDGES, USER_RESERVED_EDGES); 01050 case_sensative = FALSE; 01051 }
A_CHOICE* permute_all | ( | CHOICES_LIST | char_choices, | |
float | rating_limit, | |||
A_CHOICE * | raw_choice | |||
) |
Permute all the characters together using all of the different types of permuters/selectors available.
Each of the characters must have a non-NIL choice list.
Definition at line 1073 of file permute.cpp.
References array_count, class_probability, class_string, free_choice(), NULL, number_permute_and_select(), permute_compound_words(), permute_only_top, permute_top_choice(), and permute_words().
Referenced by permute_characters(), and permute_subword().
01075 { 01076 A_CHOICE *result_1; 01077 A_CHOICE *result_2 = NULL; 01078 BOOL8 any_alpha; 01079 01080 result_1 = permute_top_choice (char_choices, rating_limit, raw_choice, 01081 &any_alpha); 01082 if (result_1 == NULL) 01083 return (NULL); 01084 if (permute_only_top) 01085 return result_1; 01086 if (any_alpha && array_count (char_choices) <= 20) { 01087 result_2 = permute_words (char_choices, rating_limit); 01088 01089 if (class_probability (result_1) < class_probability (result_2) 01090 || class_string (result_2) == NULL) { 01091 free_choice(result_2); 01092 } 01093 else { 01094 free_choice(result_1); 01095 result_1 = result_2; 01096 } 01097 } 01098 01099 result_2 = number_permute_and_select (char_choices, rating_limit); 01100 01101 if (class_probability (result_1) < class_probability (result_2) 01102 || class_string (result_2) == NULL) { 01103 free_choice(result_2); 01104 } 01105 else { 01106 free_choice(result_1); 01107 result_1 = result_2; 01108 } 01109 01110 result_2 = permute_compound_words (char_choices, rating_limit); 01111 01112 if (!result_2 || 01113 class_probability (result_1) < class_probability (result_2) 01114 || class_string (result_2) == NULL) { 01115 free_choice(result_2); 01116 } 01117 else { 01118 free_choice(result_1); 01119 result_1 = result_2; 01120 } 01121 01122 return (result_1); 01123 }
void permute_characters | ( | CHOICES_LIST | char_choices, | |
float | limit, | |||
A_CHOICE * | best_choice, | |||
A_CHOICE * | raw_choice | |||
) |
Permute these characters together according to each of the different permuters that are enabled.
If you enable display_ratings, tesseract will show you what combinations it considered to arrive at any particular word it recognizes. Very useful!
Definition at line 1133 of file permute.cpp.
References class_certainty, class_probability, class_string, clone_choice, cprintf(), display_ratings, free_choice(), permutation_count, and permute_all().
Referenced by chop_word_main(), evaluate_state(), and improve_by_chopping().
01136 { 01137 A_CHOICE *this_choice; 01138 01139 permutation_count++; /* Global counter */ 01140 01141 this_choice = permute_all (char_choices, limit, raw_choice); 01142 01143 if (this_choice && 01144 class_probability (this_choice) < class_probability (best_choice)) { 01145 clone_choice(best_choice, this_choice); 01146 } 01147 free_choice(this_choice); 01148 01149 if (display_ratings) 01150 cprintf ("permute_characters: %-15s %4.2f %4.2f\n", 01151 class_string (best_choice), 01152 class_probability (best_choice), class_certainty (best_choice)); 01153 }
A_CHOICE* permute_compound_words | ( | CHOICES_LIST | character_choices, | |
float | rating_limit | |||
) |
Return the top choice for each character as the choice for the word.
Definition at line 1159 of file permute.cpp.
References array_count, array_loop, array_value, class_certainty, class_probability, class_string, COMPOUND_PERM, cprintf(), first, MAX_FLOAT32, MAX_WERD_LENGTH, min, new_choice(), NO_PERM, NULL, and permute_subword().
Referenced by permute_all().
01160 { 01161 A_CHOICE *first_choice; 01162 A_CHOICE *best_choice = NULL; 01163 char word[MAX_WERD_LENGTH + 1]; 01164 float rating = 0; 01165 float certainty = 10000; 01166 char char_choice; 01167 int x; 01168 int first_index = 0; 01169 char *ptr; 01170 01171 word[0] = '\0'; 01172 01173 if (array_count (character_choices) > MAX_WERD_LENGTH) { 01174 return (new_choice (NULL, MAX_FLOAT32, -MAX_FLOAT32, -1, NO_PERM)); 01175 } 01176 01177 array_loop(character_choices, x) { 01178 01179 first_choice = 01180 (A_CHOICE *) first ((CHOICES) array_value (character_choices, x)); 01181 01182 ptr = class_string (first_choice); 01183 char_choice = ptr != NULL ? *ptr : '\0'; 01184 if (x > first_index && (char_choice == '-' || char_choice == '/')) { 01185 if (compound_debug) 01186 cprintf ("Hyphenated word found\n"); 01187 01188 permute_subword (character_choices, rating_limit, 01189 first_index, x - 1, word, &rating, &certainty); 01190 01191 if (rating > rating_limit) 01192 break; 01193 first_index = x + 1; 01194 strcat (word, class_string (first_choice)); 01195 rating += class_probability (first_choice); 01196 certainty = min (class_certainty (first_choice), certainty); 01197 } 01198 } 01199 01200 if (first_index > 0 && first_index < x && rating <= rating_limit) { 01201 permute_subword (character_choices, rating_limit, 01202 first_index, x - 1, word, &rating, &certainty); 01203 01204 best_choice = new_choice (word, rating, certainty, -1, COMPOUND_PERM); 01205 } 01206 return (best_choice); 01207 }
void permute_subword | ( | CHOICES_LIST | character_choices, | |
float | rating_limit, | |||
int | start, | |||
int | end, | |||
char * | word, | |||
float * | rating, | |||
float * | certainty | |||
) |
Permute a part of a compound word this subword is bounded by hyphens and the start and end of the word.
character_choices | CHOICES_LIST | |
rating_limit | ||
start | ||
end | ||
word | ||
rating | ||
certainty |
When done, reclaims the memory that was used in the excercise. Also, sets flag used by LogNewRawChoice
Definition at line 1230 of file permute.cpp.
References array_count, array_push(), array_value, best_string, choicestruct::certainty, class_certainty, class_probability, class_string, cprintf(), dawg_debug, DisableChoiceAccum, EnableChoiceAccum, FALSE, free_choice(), free_choice_list, MAX_FLOAT32, MAX_INT16, min, new_choice_list, NULL, permute_all(), choicestruct::rating, strfree, choicestruct::string, and TRUE.
Referenced by permute_compound_words().
01236 { 01237 int x; 01238 A_CHOICE *best_choice = NULL; 01239 A_CHOICE raw_choice; 01240 CHOICES_LIST subchoices; 01241 CHOICES choices; 01242 char this_char; 01243 char *ptr; 01244 01245 DisableChoiceAccum(); 01246 raw_choice.string = NULL; 01247 raw_choice.rating = MAX_INT16; 01248 raw_choice.certainty = -MAX_INT16; 01249 01250 subchoices = new_choice_list (); 01251 for (x = start; x <= end; x++) { 01252 choices = (CHOICES) array_value (character_choices, x); 01253 ptr = best_string (choices); 01254 this_char = ptr != NULL ? *ptr : '\0'; 01255 if (this_char != '-' && this_char != '/') { 01256 subchoices = array_push (subchoices, choices); 01257 } else { 01258 const char* str = best_string(choices); 01259 strcat (word, str); 01260 } 01261 } 01262 01263 if (array_count (subchoices)) { 01264 if (compound_debug) 01265 dawg_debug = TRUE; 01266 best_choice = permute_all (subchoices, rating_limit, &raw_choice); 01267 if (compound_debug) 01268 dawg_debug = FALSE; 01269 01270 if (best_choice && class_string (best_choice)) { 01271 strcat (word, class_string (best_choice)); 01272 *rating += class_probability (best_choice); 01273 *certainty = min (class_certainty (best_choice), *certainty); 01274 } 01275 else { 01276 *rating = MAX_FLOAT32; 01277 } 01278 } 01279 else { 01280 *rating = MAX_FLOAT32; 01281 } 01282 01283 free_choice_list(subchoices); 01284 if (best_choice) 01285 free_choice(best_choice); 01286 01287 if (compound_debug && *rating < MAX_FLOAT32) { 01288 cprintf ("Subword permuted = %s, %5.2f, %5.2f\n\n", 01289 word, *rating, *certainty); 01290 } 01291 if (raw_choice.string) 01292 strfree(raw_choice.string); 01293 01294 /* Sets flag used to disable accumulation of word choices during 01295 compound word permutation. LogNewRawChoice */ 01296 EnableChoiceAccum(); 01297 }
A_CHOICE* permute_top_choice | ( | CHOICES_LIST | character_choices, | |
float | rating_limit, | |||
A_CHOICE * | raw_choice, | |||
BOOL8 * | any_alpha | |||
) |
Top choice for each character as the choice for the word.
character_choices | ||
rating_limit | ||
raw_choice | ||
any_alpha |
Definition at line 1313 of file permute.cpp.
References adjust_non_word(), array_count, array_loop, array_value, best_certainty, best_probability, best_string, choose_il1(), class_certainty, class_permuter, class_probability, class_string, clone_choice, cprintf(), FALSE, first, free_choice(), iterate_list, LogNewRawChoice(), LOWER_CASE_PERM, MAX_PERM_LENGTH, min, new_choice(), NULL, rest, strfree, strsave, TOP_CHOICE_PERM, TRUE, and UPPER_CASE_PERM.
Referenced by permute_all().
01316 { 01317 CHOICES char_list; 01318 A_CHOICE *first_choice; 01319 A_CHOICE *best_choice; 01320 A_CHOICE *other_choice; 01321 char *ptr; 01322 char first_char; //first choice 01323 char second_char; //second choice 01324 char third_char; //third choice 01325 char prev_char = '\0'; //prev in word 01326 char next_char = '\0'; //next in word 01327 char next_next_char = '\0'; //after next next in word 01328 01329 char word[MAX_PERM_LENGTH + 1]; 01330 char capital_word[MAX_PERM_LENGTH + 1]; 01331 char lower_word[MAX_PERM_LENGTH + 1]; 01332 int x; 01333 BOOL8 char_alpha; 01334 01335 float rating = 0; 01336 float upper_rating = 0; 01337 float lower_rating = 0; 01338 float first_rating = 0; 01339 01340 float certainty = 10000; 01341 float upper_certainty = 10000; 01342 float lower_certainty = 10000; 01343 01344 float certainties[MAX_PERM_LENGTH + 1]; 01345 float lower_certainties[MAX_PERM_LENGTH + 1]; 01346 float upper_certainties[MAX_PERM_LENGTH + 1]; 01347 01348 register CHOICES this_char; 01349 register char ch; 01350 register INT8 lower_done; 01351 register INT8 upper_done; 01352 01353 if (any_alpha != NULL) 01354 *any_alpha = FALSE; 01355 01356 if (array_count (character_choices) > MAX_PERM_LENGTH) { 01357 return (NULL); 01358 } 01359 01360 array_loop(character_choices, x) { 01361 if (x + 1 < array_count (character_choices)) { 01362 char_list = (CHOICES) array_value (character_choices, x + 1); 01363 first_choice = (A_CHOICE *) first (char_list); 01364 01365 ptr = class_string (first_choice); 01366 next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' '; 01367 } 01368 else 01369 next_char = '\0'; 01370 if (x + 2 < array_count (character_choices)) { 01371 char_list = (CHOICES) array_value (character_choices, x + 2); 01372 first_choice = (A_CHOICE *) first (char_list); 01373 01374 ptr = class_string (first_choice); 01375 next_next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' '; 01376 } 01377 else 01378 next_next_char = '\0'; 01379 01380 char_list = (CHOICES) array_value (character_choices, x); 01381 first_choice = (A_CHOICE *) first (char_list); 01382 01383 ptr = class_string (first_choice); 01384 word[x] = (ptr != NULL && *ptr != '\0') ? *ptr : ' '; 01385 01386 lower_word[x] = word[x]; 01387 capital_word[x] = word[x]; 01388 first_char = word[x]; 01389 first_rating = class_probability (first_choice); 01390 upper_rating += class_probability (first_choice); 01391 lower_rating += class_probability (first_choice); 01392 lower_certainty = min (class_certainty (first_choice), lower_certainty); 01393 upper_certainty = min (class_certainty (first_choice), upper_certainty); 01394 01395 certainties[x] = class_certainty (first_choice); 01396 lower_certainties[x] = class_certainty (first_choice); 01397 upper_certainties[x] = class_certainty (first_choice); 01398 01399 lower_done = FALSE; 01400 upper_done = FALSE; 01401 char_alpha = FALSE; 01402 second_char = '\0'; 01403 third_char = '\0'; 01404 iterate_list(this_char, char_list) { 01405 ptr = best_string (this_char); 01406 ch = ptr != NULL ? *ptr : '\0'; 01407 if (ch == 'l' && rest (this_char) != NULL 01408 && best_probability (rest (this_char)) == first_rating) { 01409 ptr = best_string (rest (this_char)); 01410 if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) { 01411 second_char = *ptr; 01412 this_char = rest (this_char); 01413 if (rest (this_char) != NULL 01414 && best_probability (rest (this_char)) == first_rating) { 01415 ptr = best_string (rest (this_char)); 01416 if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) { 01417 third_char = *ptr; 01418 this_char = rest (this_char); 01419 } 01420 } 01421 ch = choose_il1 (first_char, second_char, third_char, 01422 prev_char, next_char, next_next_char); 01423 if (ch != 'l' && word[x] == 'l') { 01424 word[x] = ch; 01425 lower_word[x] = ch; 01426 capital_word[x] = ch; 01427 } 01428 } 01429 } 01430 /* Find lower case */ 01431 if (!lower_done && (islower (ch) || (isupper (ch) && x == 0))) { 01432 lower_word[x] = ch; 01433 lower_rating += best_probability (this_char); 01434 lower_rating -= class_probability (first_choice); 01435 lower_certainty = min (best_certainty (this_char), lower_certainty); 01436 lower_certainties[x] = best_certainty (this_char); 01437 lower_done = TRUE; 01438 } 01439 /* Find upper case */ 01440 if (!upper_done && isupper (ch)) { 01441 capital_word[x] = ch; 01442 upper_rating += best_probability (this_char); 01443 upper_rating -= class_probability (first_choice); 01444 upper_certainty = min (best_certainty (this_char), upper_certainty); 01445 upper_certainties[x] = best_certainty (this_char); 01446 upper_done = TRUE; 01447 } 01448 if (!char_alpha && isalpha (ch)) 01449 char_alpha = TRUE; 01450 if (lower_done && upper_done) 01451 break; 01452 } 01453 if (char_alpha && any_alpha != NULL) 01454 *any_alpha = TRUE; 01455 01456 if (first_choice == NULL) { 01457 cprintf ("Permuter giving up due to null choices list"); 01458 word[x + 1] = '$'; 01459 word[x + 2] = '\0'; 01460 cprintf (" word=%s\n", word); 01461 return (NULL); 01462 } 01463 01464 rating += class_probability (first_choice); 01465 if (rating > rating_limit) 01466 return (NULL); 01467 01468 certainty = min (class_certainty (first_choice), certainty); 01469 prev_char = word[x]; 01470 } 01471 01472 lower_word[x] = '\0'; 01473 capital_word[x] = '\0'; 01474 word[x] = '\0'; 01475 01476 if (rating < class_probability (raw_choice)) { 01477 if (class_string (raw_choice)) 01478 strfree (class_string (raw_choice)); 01479 01480 class_probability (raw_choice) = rating; 01481 class_certainty (raw_choice) = certainty; 01482 class_string (raw_choice) = strsave (word); 01483 class_permuter (raw_choice) = TOP_CHOICE_PERM; 01484 01485 LogNewRawChoice (raw_choice, 1.0, certainties); 01486 } 01487 01488 best_choice = new_choice (word, rating, certainty, -1, TOP_CHOICE_PERM); 01489 adjust_non_word(best_choice, certainties); 01490 01491 other_choice = new_choice (lower_word, lower_rating, lower_certainty, 01492 -1, LOWER_CASE_PERM); 01493 adjust_non_word(other_choice, lower_certainties); 01494 if (class_probability (best_choice) > class_probability (other_choice)) { 01495 clone_choice(best_choice, other_choice); 01496 } 01497 free_choice(other_choice); 01498 01499 other_choice = new_choice (capital_word, upper_rating, upper_certainty, 01500 -1, UPPER_CASE_PERM); 01501 adjust_non_word(other_choice, upper_certainties); 01502 if (class_probability (best_choice) > class_probability (other_choice)) { 01503 clone_choice(best_choice, other_choice); 01504 } 01505 free_choice(other_choice); 01506 01507 return (best_choice); 01508 }
A_CHOICE* permute_words | ( | CHOICES_LIST | char_choices, | |
float | rating_limit | |||
) |
Permute all the characters together using the dawg to prune all but the valid words.
Definition at line 1593 of file permute.cpp.
References array_count, case_sensative, class_probability, dawg_permute_and_select(), DOC_DAWG_PERM, document_words, FALSE, hyphen_string, MAX_FLOAT32, MAX_WERD_LENGTH, new_choice(), NO_PERM, NULL, SYSTEM_DAWG_PERM, TRUE, USER_DAWG_PERM, user_words, and word_dawg.
Referenced by permute_all().
01593 { 01594 A_CHOICE *best_choice; 01595 int hyphen_len; 01596 01597 best_choice = new_choice (NULL, rating_limit, -MAX_FLOAT32, -1, NO_PERM); 01598 01599 hyphen_len = hyphen_string != NULL ? strlen (hyphen_string) : 0; 01600 if (hyphen_len + array_count (char_choices) > MAX_WERD_LENGTH) { 01601 class_probability (best_choice) = MAX_FLOAT32; 01602 } 01603 else { 01604 01605 dawg_permute_and_select ("system words:", word_dawg, SYSTEM_DAWG_PERM, 01606 char_choices, best_choice, TRUE); 01607 01608 dawg_permute_and_select ("document_words", document_words, 01609 DOC_DAWG_PERM, char_choices, best_choice, 01610 FALSE); 01611 01612 dawg_permute_and_select ("user words", user_words, USER_DAWG_PERM, 01613 char_choices, best_choice, FALSE); 01614 case_sensative = FALSE; 01615 } 01616 01617 return (best_choice); 01618 }
int valid_word | ( | const char * | string | ) |
Check all the DAWGs to see if this word is in any of them.
Definition at line 1624 of file permute.cpp.
References case_sensative, DOC_DAWG_PERM, document_words, FALSE, NO_PERM, SYSTEM_DAWG_PERM, USER_DAWG_PERM, user_words, word_dawg, and word_in_dawg().
Referenced by AcceptableChoice(), AcceptableResult(), AdaptableWord(), add_document_word(), AmbigsFound(), and dict_word().
01624 { 01625 int result = NO_PERM; 01626 01627 if (word_in_dawg (word_dawg, string)) 01628 result = SYSTEM_DAWG_PERM; 01629 else { 01630 if (word_in_dawg (document_words, string)) 01631 result = DOC_DAWG_PERM; 01632 else if (word_in_dawg (user_words, string)) 01633 result = USER_DAWG_PERM; 01634 case_sensative = FALSE; 01635 } 01636 return (result); 01637 }
int adjust_debug |
Adjustment Debug, defaults to FALSE
float garbage |
Garbage adjustment, defaults to GARBAGE_STRING
Referenced by adjust_non_word(), and set_tess_tweak_vars().
float non_word |
Non-word adjustment, defaults to NON_WERD
Referenced by adjust_non_word(), and set_tess_tweak_vars().
int permute_only_top |
Set to 1 if word flag is W_REP_CHAR & 0 if word flag is W_DONT_CHOP
And this means what?
Definition at line 107 of file permute.cpp.
Referenced by permute_all(), tess_segment_pass1(), and tess_segment_pass2().