dict/permute.h File Reference

#include "choicearr.h"

Go to the source code of this file.

Defines

Functions

Variables


Define Documentation

#define RATING_PAD   4.0

Definition at line 37 of file permute.h.

Referenced by adjust_non_word(), adjust_number(), and adjust_word().


Function Documentation

void add_document_word ( A_CHOICE best_choice  ) 

Add a word found on this document to the document specific dictionary.

Definition at line 928 of file permute.cpp.

References add_word_to_dawg(), case_sensative, CHARS_PER_LINE, class_certainty, class_string, CurrentWordAmbig(), document_words, FALSE, good_choice(), imagefile, MAX_DOC_EDGES, open_file(), pending_words, RESERVED_DOC_EDGES, valid_word(), and word_in_dawg().

Referenced by tess_add_doc_word().

00928                                               { 
00929   char filename[CHARS_PER_LINE];
00930   FILE *doc_word_file;
00931   char *string;
00932   int stringlen;                 //length of word
00933 
00934   string = class_string (best_choice);
00935   stringlen = strlen (string);
00936 
00937   if (!doc_dict_enable
00938     || valid_word (string) || CurrentWordAmbig () || stringlen < 2)
00939     return;
00940 
00941   if (!good_choice (best_choice) || stringlen == 2) {
00942     if (class_certainty (best_choice) < permuter_pending_threshold)
00943       return;
00944     if (!word_in_dawg (pending_words, string)) {
00945       if (stringlen > 2 || isupper (string[0]) && isupper (string[1]))
00946         add_word_to_dawg(pending_words,
00947                          string,
00948                          MAX_DOC_EDGES,
00949                          RESERVED_DOC_EDGES);
00950       return;
00951     }
00952   }
00953 
00954   if (save_doc_words) {
00955     strcpy(filename, imagefile);
00956     strcat (filename, ".doc");
00957     doc_word_file = open_file (filename, "a");
00958     fprintf (doc_word_file, "%s\n", string);
00959     fclose(doc_word_file);
00960   }
00961   add_word_to_dawg(document_words, string, MAX_DOC_EDGES, RESERVED_DOC_EDGES);
00962   case_sensative = FALSE;
00963 }

void adjust_non_word ( A_CHOICE best_choice,
float  certainties[] 
)

Assign an adjusted value to a string that is a non-word; where the value of that word is based on case and punctuation rules.

Definition at line 971 of file permute.cpp.

References adjust_debug, case_ok(), class_probability, class_string, cprintf(), garbage, LogNewWordChoice(), non_word, punctuation_ok(), and RATING_PAD.

Referenced by permute_top_choice().

00971                                                               {
00972   char *this_word;
00973   float adjust_factor;
00974 
00975   if (adjust_debug)
00976     cprintf ("%s %4.2f ",
00977       class_string (best_choice), class_probability (best_choice));
00978 
00979   this_word = class_string (best_choice);
00980 
00981   class_probability (best_choice) += RATING_PAD;
00982   if (case_ok (this_word) && punctuation_ok (this_word) != -1) {
00983     class_probability (best_choice) *= non_word;
00984     adjust_factor = non_word;
00985     if (adjust_debug)
00986       cprintf (", %4.2f ", non_word);
00987   }
00988   else {
00989     class_probability (best_choice) *= garbage;
00990     adjust_factor = garbage;
00991     if (adjust_debug) {
00992       if (!case_ok (this_word))
00993         cprintf (", C");
00994       if (punctuation_ok (this_word) == -1)
00995         cprintf (", P");
00996       cprintf (", %4.2f ", garbage);
00997     }
00998   }
00999 
01000   class_probability (best_choice) -= RATING_PAD;
01001 
01002   LogNewWordChoice(best_choice, adjust_factor, certainties);
01003 
01004   if (adjust_debug)
01005     cprintf (" --> %4.2f\n", class_probability (best_choice));
01006 }

char choose_il1 ( char  first_char,
char  second_char,
char  third_char,
char  prev_char,
char  next_char,
char  next_next_char 
)

Choose between the candidate il1 chars.

Parameters:
first_char first choice
second_char second choice
third_char third choice
prev_char prev in word
next_char next in word
next_next_char after next next in word
Returns:
first choice, which may differ from first_char

Definition at line 1522 of file permute.cpp.

References bigram_counts.

Referenced by permute_top_choice().

01527                                      {
01528   INT32 type1;                   //1/I/l type of first choice
01529   INT32 type2;                   //1/I/l type of second choice
01530   INT32 type3;                   //1/I/l type of third choice
01531 
01532   if (first_char == 'l' && second_char != '\0') {
01533     if (second_char == 'I'
01534       && (isupper (prev_char) && !islower (next_char)
01535       && !isdigit (next_char) || isupper (next_char)
01536       && !islower (prev_char) && !isdigit (prev_char)))
01537       first_char = second_char;  //override
01538     else if (second_char == '1' || third_char == '1') {
01539       if (isdigit (next_char) || isdigit (prev_char)
01540       || next_char == 'l' && isdigit (next_next_char)) {
01541         first_char = '1';
01542       }
01543       else if (!islower (prev_char)
01544         && (!islower (next_char) || next_char == 's'
01545       && next_next_char == 't')) {
01546         if ((prev_char != '\'' && prev_char != '`' || next_char != '\0')
01547           && (next_char != '\'' && next_char != '`'
01548         || prev_char != '\0')) {
01549           first_char = '1';
01550         }
01551       }
01552     }
01553     if (first_char == 'l' && next_char != '\0' && !isalpha (prev_char)) {
01554       type1 = 2;
01555 
01556       if (second_char == '1')
01557         type2 = 0;
01558       else if (second_char == 'I')
01559         type2 = 1;
01560       else if (second_char == 'l')
01561         type2 = 2;
01562       else
01563         type2 = type1;
01564 
01565       if (third_char == '1')
01566         type3 = 0;
01567       else if (third_char == 'I')
01568         type3 = 1;
01569       else if (third_char == 'l')
01570         type3 = 2;
01571       else
01572         type3 = type1;
01573 
01574       if (bigram_counts[next_char][type2] >
01575       bigram_counts[next_char][type1]) {
01576         first_char = second_char;
01577         type1 = type2;
01578       }
01579       if (bigram_counts[next_char][type3] >
01580       bigram_counts[next_char][type1]) {
01581         first_char = third_char;
01582       }
01583     }
01584   }
01585   return first_char;
01586 }

void end_permute (  ) 

Free memory used for DAWG operations.

Definition at line 1056 of file permute.cpp.

References document_words, memfree(), NULL, pending_words, user_words, and word_dawg.

Referenced by program_editdown().

01056                    {
01057   memfree(word_dawg);
01058   word_dawg = NULL;
01059   memfree(document_words);
01060   document_words =  NULL;
01061   memfree(pending_words);
01062   pending_words = NULL;
01063   memfree(user_words);
01064   user_words = NULL;
01065 }

void init_permute (  ) 

Initialize anything that needs to be set up for the permute functions.

FIX: Later, tries changing iffy letters in a word to see if those changes fit another word better? Also, loads user dictionary for improving user-specific word recognition, eg: medical terms, etc.

Definition at line 1016 of file permute.cpp.

References case_sensative, cprintf(), demodir, document_words, FALSE, init_permdawg(), init_permnum(), initialize_dawg(), MAX_DOC_EDGES, MAX_NUM_EDGES, MAX_USER_EDGES, memalloc(), pending_words, read_squished_dawg(), read_word_list(), USER_RESERVED_EDGES, user_words, and word_dawg.

Referenced by init_ms_debug().

01016                     {
01017   char name[1024];
01018   make_adjust_debug();
01019   make_compound_debug();
01020   make_non_word();
01021   make_garbage();
01022   make_doc_words();
01023   make_doc_dict();
01024 
01025   init_permdawg();
01026   init_permnum();
01027 
01028 #ifdef TEXT_VERBOSE
01029   // gets a 'g', see ccmain/tesseractmain.dox
01030   cprintf("g");
01031 #endif
01032 
01033   word_dawg = (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_NUM_EDGES);
01034   strcpy(name, demodir);
01035   strcat (name, "tessdata/word-dawg");
01036   read_squished_dawg(name, word_dawg, MAX_NUM_EDGES);
01037 
01038   document_words =
01039     (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES);
01040   initialize_dawg(document_words, MAX_DOC_EDGES);
01041 
01042   pending_words =
01043     (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES);
01044   initialize_dawg(pending_words, MAX_DOC_EDGES);
01045 
01046   user_words = (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_USER_EDGES);
01047   strcpy(name, demodir);
01048   strcat (name, "tessdata/user-words");
01049   read_word_list(name, user_words, MAX_USER_EDGES, USER_RESERVED_EDGES);
01050   case_sensative = FALSE;
01051 }

A_CHOICE* permute_all ( CHOICES_LIST  char_choices,
float  rating_limit,
A_CHOICE raw_choice 
)

Permute all the characters together using all of the different types of permuters/selectors available.

Each of the characters must have a non-NIL choice list.

Definition at line 1073 of file permute.cpp.

References array_count, class_probability, class_string, free_choice(), NULL, number_permute_and_select(), permute_compound_words(), permute_only_top, permute_top_choice(), and permute_words().

Referenced by permute_characters(), and permute_subword().

01075                                             {
01076   A_CHOICE *result_1;
01077   A_CHOICE *result_2 = NULL;
01078   BOOL8 any_alpha;
01079 
01080   result_1 = permute_top_choice (char_choices, rating_limit, raw_choice,
01081     &any_alpha);
01082   if (result_1 == NULL)
01083     return (NULL);
01084   if (permute_only_top)
01085     return result_1;
01086   if (any_alpha && array_count (char_choices) <= 20) {
01087     result_2 = permute_words (char_choices, rating_limit);
01088 
01089     if (class_probability (result_1) < class_probability (result_2)
01090     || class_string (result_2) == NULL) {
01091       free_choice(result_2);
01092     }
01093     else {
01094       free_choice(result_1);
01095       result_1 = result_2;
01096     }
01097   }
01098 
01099   result_2 = number_permute_and_select (char_choices, rating_limit);
01100 
01101   if (class_probability (result_1) < class_probability (result_2)
01102   || class_string (result_2) == NULL) {
01103     free_choice(result_2);
01104   }
01105   else {
01106     free_choice(result_1);
01107     result_1 = result_2;
01108   }
01109 
01110   result_2 = permute_compound_words (char_choices, rating_limit);
01111 
01112   if (!result_2 ||
01113     class_probability (result_1) < class_probability (result_2)
01114   || class_string (result_2) == NULL) {
01115     free_choice(result_2);
01116   }
01117   else {
01118     free_choice(result_1);
01119     result_1 = result_2;
01120   }
01121 
01122   return (result_1);
01123 }

void permute_characters ( CHOICES_LIST  char_choices,
float  limit,
A_CHOICE best_choice,
A_CHOICE raw_choice 
)

Permute these characters together according to each of the different permuters that are enabled.

If you enable display_ratings, tesseract will show you what combinations it considered to arrive at any particular word it recognizes. Very useful!

Definition at line 1133 of file permute.cpp.

References class_certainty, class_probability, class_string, clone_choice, cprintf(), display_ratings, free_choice(), permutation_count, and permute_all().

Referenced by chop_word_main(), evaluate_state(), and improve_by_chopping().

01136                                               {
01137   A_CHOICE *this_choice;
01138 
01139   permutation_count++;           /* Global counter */
01140 
01141   this_choice = permute_all (char_choices, limit, raw_choice);
01142 
01143   if (this_choice &&
01144   class_probability (this_choice) < class_probability (best_choice)) {
01145     clone_choice(best_choice, this_choice);
01146   }
01147   free_choice(this_choice);
01148 
01149   if (display_ratings)
01150     cprintf ("permute_characters:   %-15s %4.2f %4.2f\n",
01151       class_string (best_choice),
01152       class_probability (best_choice), class_certainty (best_choice));
01153 }

A_CHOICE* permute_compound_words ( CHOICES_LIST  character_choices,
float  rating_limit 
)

Return the top choice for each character as the choice for the word.

Definition at line 1159 of file permute.cpp.

References array_count, array_loop, array_value, class_certainty, class_probability, class_string, COMPOUND_PERM, cprintf(), first, MAX_FLOAT32, MAX_WERD_LENGTH, min, new_choice(), NO_PERM, NULL, and permute_subword().

Referenced by permute_all().

01160                                                      {
01161   A_CHOICE *first_choice;
01162   A_CHOICE *best_choice = NULL;
01163   char word[MAX_WERD_LENGTH + 1];
01164   float rating = 0;
01165   float certainty = 10000;
01166   char char_choice;
01167   int x;
01168   int first_index = 0;
01169   char *ptr;
01170 
01171   word[0] = '\0';
01172 
01173   if (array_count (character_choices) > MAX_WERD_LENGTH) {
01174     return (new_choice (NULL, MAX_FLOAT32, -MAX_FLOAT32, -1, NO_PERM));
01175   }
01176 
01177   array_loop(character_choices, x) {
01178 
01179     first_choice =
01180       (A_CHOICE *) first ((CHOICES) array_value (character_choices, x));
01181 
01182     ptr = class_string (first_choice);
01183     char_choice = ptr != NULL ? *ptr : '\0';
01184     if (x > first_index && (char_choice == '-' || char_choice == '/')) {
01185       if (compound_debug)
01186         cprintf ("Hyphenated word found\n");
01187 
01188       permute_subword (character_choices, rating_limit,
01189         first_index, x - 1, word, &rating, &certainty);
01190 
01191       if (rating > rating_limit)
01192         break;
01193       first_index = x + 1;
01194       strcat (word, class_string (first_choice));
01195       rating += class_probability (first_choice);
01196       certainty = min (class_certainty (first_choice), certainty);
01197     }
01198   }
01199 
01200   if (first_index > 0 && first_index < x && rating <= rating_limit) {
01201     permute_subword (character_choices, rating_limit,
01202       first_index, x - 1, word, &rating, &certainty);
01203 
01204     best_choice = new_choice (word, rating, certainty, -1, COMPOUND_PERM);
01205   }
01206   return (best_choice);
01207 }

void permute_subword ( CHOICES_LIST  character_choices,
float  rating_limit,
int  start,
int  end,
char *  word,
float *  rating,
float *  certainty 
)

Permute a part of a compound word this subword is bounded by hyphens and the start and end of the word.

Parameters:
character_choices CHOICES_LIST
rating_limit 
start 
end 
word 
rating 
certainty 
Returns:
none (but rating & certainty are update)
Call the standard word permute function on a set of choices covering only part of the original word.

When done, reclaims the memory that was used in the excercise. Also, sets flag used by LogNewRawChoice

Note:
Global: compound_debug, MAX_FLOAT32

Definition at line 1230 of file permute.cpp.

References array_count, array_push(), array_value, best_string, choicestruct::certainty, class_certainty, class_probability, class_string, cprintf(), dawg_debug, DisableChoiceAccum, EnableChoiceAccum, FALSE, free_choice(), free_choice_list, MAX_FLOAT32, MAX_INT16, min, new_choice_list, NULL, permute_all(), choicestruct::rating, strfree, choicestruct::string, and TRUE.

Referenced by permute_compound_words().

01236                                        {
01237   int x;
01238   A_CHOICE *best_choice = NULL;
01239   A_CHOICE raw_choice;
01240   CHOICES_LIST subchoices;
01241   CHOICES choices;
01242   char this_char;
01243   char *ptr;
01244 
01245   DisableChoiceAccum();
01246   raw_choice.string = NULL;
01247   raw_choice.rating = MAX_INT16;
01248   raw_choice.certainty = -MAX_INT16;
01249 
01250   subchoices = new_choice_list ();
01251   for (x = start; x <= end; x++) {
01252     choices = (CHOICES) array_value (character_choices, x);
01253     ptr = best_string (choices);
01254     this_char = ptr != NULL ? *ptr : '\0';
01255     if (this_char != '-' && this_char != '/') {
01256       subchoices = array_push (subchoices, choices);
01257     } else {
01258       const char* str = best_string(choices);
01259       strcat (word, str);
01260     }
01261   }
01262 
01263   if (array_count (subchoices)) {
01264     if (compound_debug)
01265       dawg_debug = TRUE;
01266     best_choice = permute_all (subchoices, rating_limit, &raw_choice);
01267     if (compound_debug)
01268       dawg_debug = FALSE;
01269 
01270     if (best_choice && class_string (best_choice)) {
01271       strcat (word, class_string (best_choice));
01272       *rating += class_probability (best_choice);
01273       *certainty = min (class_certainty (best_choice), *certainty);
01274     }
01275     else {
01276       *rating = MAX_FLOAT32;
01277     }
01278   }
01279   else {
01280     *rating = MAX_FLOAT32;
01281   }
01282 
01283   free_choice_list(subchoices);
01284   if (best_choice)
01285     free_choice(best_choice);
01286 
01287   if (compound_debug && *rating < MAX_FLOAT32) {
01288     cprintf ("Subword permuted = %s, %5.2f, %5.2f\n\n",
01289       word, *rating, *certainty);
01290   }
01291   if (raw_choice.string)
01292     strfree(raw_choice.string);
01293 
01294   /* Sets flag used to disable accumulation of word choices during
01295   compound word permutation. LogNewRawChoice */
01296   EnableChoiceAccum();
01297 }

A_CHOICE* permute_top_choice ( CHOICES_LIST  character_choices,
float  rating_limit,
A_CHOICE raw_choice,
BOOL8 any_alpha 
)

Top choice for each character as the choice for the word.

Parameters:
character_choices 
rating_limit 
raw_choice 
any_alpha 
Returns:
Top choice for each character as the choice for the word.
In addition a choice is created for the best lower and upper case non-words. In each character position the best lower (or upper) case character is substituted for the best overall character.

Definition at line 1313 of file permute.cpp.

References adjust_non_word(), array_count, array_loop, array_value, best_certainty, best_probability, best_string, choose_il1(), class_certainty, class_permuter, class_probability, class_string, clone_choice, cprintf(), FALSE, first, free_choice(), iterate_list, LogNewRawChoice(), LOWER_CASE_PERM, MAX_PERM_LENGTH, min, new_choice(), NULL, rest, strfree, strsave, TOP_CHOICE_PERM, TRUE, and UPPER_CASE_PERM.

Referenced by permute_all().

01316                                                {
01317   CHOICES char_list;
01318   A_CHOICE *first_choice;
01319   A_CHOICE *best_choice;
01320   A_CHOICE *other_choice;
01321   char *ptr;
01322   char first_char;               //first choice
01323   char second_char;              //second choice
01324   char third_char;               //third choice
01325   char prev_char = '\0';         //prev in word
01326   char next_char = '\0';         //next in word
01327   char next_next_char = '\0';    //after next next in word
01328 
01329   char word[MAX_PERM_LENGTH + 1];
01330   char capital_word[MAX_PERM_LENGTH + 1];
01331   char lower_word[MAX_PERM_LENGTH + 1];
01332   int x;
01333   BOOL8 char_alpha;
01334 
01335   float rating = 0;
01336   float upper_rating = 0;
01337   float lower_rating = 0;
01338   float first_rating = 0;
01339 
01340   float certainty = 10000;
01341   float upper_certainty = 10000;
01342   float lower_certainty = 10000;
01343 
01344   float certainties[MAX_PERM_LENGTH + 1];
01345   float lower_certainties[MAX_PERM_LENGTH + 1];
01346   float upper_certainties[MAX_PERM_LENGTH + 1];
01347 
01348   register CHOICES this_char;
01349   register char ch;
01350   register INT8 lower_done;
01351   register INT8 upper_done;
01352 
01353   if (any_alpha != NULL)
01354     *any_alpha = FALSE;
01355 
01356   if (array_count (character_choices) > MAX_PERM_LENGTH) {
01357     return (NULL);
01358   }
01359 
01360   array_loop(character_choices, x) {
01361     if (x + 1 < array_count (character_choices)) {
01362       char_list = (CHOICES) array_value (character_choices, x + 1);
01363       first_choice = (A_CHOICE *) first (char_list);
01364 
01365       ptr = class_string (first_choice);
01366       next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' ';
01367     }
01368     else
01369       next_char = '\0';
01370     if (x + 2 < array_count (character_choices)) {
01371       char_list = (CHOICES) array_value (character_choices, x + 2);
01372       first_choice = (A_CHOICE *) first (char_list);
01373 
01374       ptr = class_string (first_choice);
01375       next_next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' ';
01376     }
01377     else
01378       next_next_char = '\0';
01379 
01380     char_list = (CHOICES) array_value (character_choices, x);
01381     first_choice = (A_CHOICE *) first (char_list);
01382 
01383     ptr = class_string (first_choice);
01384     word[x] = (ptr != NULL && *ptr != '\0') ? *ptr : ' ';
01385 
01386     lower_word[x] = word[x];
01387     capital_word[x] = word[x];
01388     first_char = word[x];
01389     first_rating = class_probability (first_choice);
01390     upper_rating += class_probability (first_choice);
01391     lower_rating += class_probability (first_choice);
01392     lower_certainty = min (class_certainty (first_choice), lower_certainty);
01393     upper_certainty = min (class_certainty (first_choice), upper_certainty);
01394 
01395     certainties[x] = class_certainty (first_choice);
01396     lower_certainties[x] = class_certainty (first_choice);
01397     upper_certainties[x] = class_certainty (first_choice);
01398 
01399     lower_done = FALSE;
01400     upper_done = FALSE;
01401     char_alpha = FALSE;
01402     second_char = '\0';
01403     third_char = '\0';
01404     iterate_list(this_char, char_list) {
01405       ptr = best_string (this_char);
01406       ch = ptr != NULL ? *ptr : '\0';
01407       if (ch == 'l' && rest (this_char) != NULL
01408       && best_probability (rest (this_char)) == first_rating) {
01409         ptr = best_string (rest (this_char));
01410         if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) {
01411           second_char = *ptr;
01412           this_char = rest (this_char);
01413           if (rest (this_char) != NULL
01414           && best_probability (rest (this_char)) == first_rating) {
01415             ptr = best_string (rest (this_char));
01416             if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) {
01417               third_char = *ptr;
01418               this_char = rest (this_char);
01419             }
01420           }
01421           ch = choose_il1 (first_char, second_char, third_char,
01422             prev_char, next_char, next_next_char);
01423           if (ch != 'l' && word[x] == 'l') {
01424             word[x] = ch;
01425             lower_word[x] = ch;
01426             capital_word[x] = ch;
01427           }
01428         }
01429       }
01430       /* Find lower case */
01431       if (!lower_done && (islower (ch) || (isupper (ch) && x == 0))) {
01432         lower_word[x] = ch;
01433         lower_rating += best_probability (this_char);
01434         lower_rating -= class_probability (first_choice);
01435         lower_certainty = min (best_certainty (this_char), lower_certainty);
01436         lower_certainties[x] = best_certainty (this_char);
01437         lower_done = TRUE;
01438       }
01439       /* Find upper case */
01440       if (!upper_done && isupper (ch)) {
01441         capital_word[x] = ch;
01442         upper_rating += best_probability (this_char);
01443         upper_rating -= class_probability (first_choice);
01444         upper_certainty = min (best_certainty (this_char), upper_certainty);
01445         upper_certainties[x] = best_certainty (this_char);
01446         upper_done = TRUE;
01447       }
01448       if (!char_alpha && isalpha (ch))
01449         char_alpha = TRUE;
01450       if (lower_done && upper_done)
01451         break;
01452     }
01453     if (char_alpha && any_alpha != NULL)
01454       *any_alpha = TRUE;
01455 
01456     if (first_choice == NULL) {
01457       cprintf ("Permuter giving up due to null choices list");
01458       word[x + 1] = '$';
01459       word[x + 2] = '\0';
01460       cprintf (" word=%s\n", word);
01461       return (NULL);
01462     }
01463 
01464     rating += class_probability (first_choice);
01465     if (rating > rating_limit)
01466       return (NULL);
01467 
01468     certainty = min (class_certainty (first_choice), certainty);
01469     prev_char = word[x];
01470   }
01471 
01472   lower_word[x] = '\0';
01473   capital_word[x] = '\0';
01474   word[x] = '\0';
01475 
01476   if (rating < class_probability (raw_choice)) {
01477     if (class_string (raw_choice))
01478       strfree (class_string (raw_choice));
01479 
01480     class_probability (raw_choice) = rating;
01481     class_certainty (raw_choice) = certainty;
01482     class_string (raw_choice) = strsave (word);
01483     class_permuter (raw_choice) = TOP_CHOICE_PERM;
01484 
01485     LogNewRawChoice (raw_choice, 1.0, certainties);
01486   }
01487 
01488   best_choice = new_choice (word, rating, certainty, -1, TOP_CHOICE_PERM);
01489   adjust_non_word(best_choice, certainties);
01490 
01491   other_choice = new_choice (lower_word, lower_rating, lower_certainty,
01492     -1, LOWER_CASE_PERM);
01493   adjust_non_word(other_choice, lower_certainties);
01494   if (class_probability (best_choice) > class_probability (other_choice)) {
01495     clone_choice(best_choice, other_choice);
01496   }
01497   free_choice(other_choice);
01498 
01499   other_choice = new_choice (capital_word, upper_rating, upper_certainty,
01500     -1, UPPER_CASE_PERM);
01501   adjust_non_word(other_choice, upper_certainties);
01502   if (class_probability (best_choice) > class_probability (other_choice)) {
01503     clone_choice(best_choice, other_choice);
01504   }
01505   free_choice(other_choice);
01506 
01507   return (best_choice);
01508 }

A_CHOICE* permute_words ( CHOICES_LIST  char_choices,
float  rating_limit 
)

Permute all the characters together using the dawg to prune all but the valid words.

Definition at line 1593 of file permute.cpp.

References array_count, case_sensative, class_probability, dawg_permute_and_select(), DOC_DAWG_PERM, document_words, FALSE, hyphen_string, MAX_FLOAT32, MAX_WERD_LENGTH, new_choice(), NO_PERM, NULL, SYSTEM_DAWG_PERM, TRUE, USER_DAWG_PERM, user_words, and word_dawg.

Referenced by permute_all().

01593                                                                        { 
01594   A_CHOICE *best_choice;
01595   int hyphen_len;
01596 
01597   best_choice = new_choice (NULL, rating_limit, -MAX_FLOAT32, -1, NO_PERM);
01598 
01599   hyphen_len = hyphen_string != NULL ? strlen (hyphen_string) : 0;
01600   if (hyphen_len + array_count (char_choices) > MAX_WERD_LENGTH) {
01601     class_probability (best_choice) = MAX_FLOAT32;
01602   }
01603   else {
01604 
01605     dawg_permute_and_select ("system words:", word_dawg, SYSTEM_DAWG_PERM,
01606       char_choices, best_choice, TRUE);
01607 
01608     dawg_permute_and_select ("document_words", document_words,
01609       DOC_DAWG_PERM, char_choices, best_choice,
01610       FALSE);
01611 
01612     dawg_permute_and_select ("user words", user_words, USER_DAWG_PERM,
01613       char_choices, best_choice, FALSE);
01614     case_sensative = FALSE;
01615   }
01616 
01617   return (best_choice);
01618 }

int valid_word ( const char *  string  ) 

Check all the DAWGs to see if this word is in any of them.

Definition at line 1624 of file permute.cpp.

References case_sensative, DOC_DAWG_PERM, document_words, FALSE, NO_PERM, SYSTEM_DAWG_PERM, USER_DAWG_PERM, user_words, word_dawg, and word_in_dawg().

Referenced by AcceptableChoice(), AcceptableResult(), AdaptableWord(), add_document_word(), AmbigsFound(), and dict_word().

01624                                    { 
01625   int result = NO_PERM;
01626 
01627   if (word_in_dawg (word_dawg, string))
01628     result = SYSTEM_DAWG_PERM;
01629   else {
01630     if (word_in_dawg (document_words, string))
01631       result = DOC_DAWG_PERM;
01632     else if (word_in_dawg (user_words, string))
01633       result = USER_DAWG_PERM;
01634     case_sensative = FALSE;
01635   }
01636   return (result);
01637 }


Variable Documentation

int adjust_debug

Adjustment Debug, defaults to FALSE

float garbage

Garbage adjustment, defaults to GARBAGE_STRING

Referenced by adjust_non_word(), and set_tess_tweak_vars().

float non_word

Non-word adjustment, defaults to NON_WERD

Referenced by adjust_non_word(), and set_tess_tweak_vars().

int permute_only_top

Set to 1 if word flag is W_REP_CHAR & 0 if word flag is W_DONT_CHOP

And this means what?

Definition at line 107 of file permute.cpp.

Referenced by permute_all(), tess_segment_pass1(), and tess_segment_pass2().


Generated on Wed Feb 28 19:49:22 2007 for Tesseract by  doxygen 1.5.1