ccmain/tfacepp.cpp

Go to the documentation of this file.
00001 
00020 #include "mfcpch.h"
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #include          "errcode.h"
00025 #include          "tessarray.h"
00026 //#include                        "fxtop.h"
00027 #include          "werd.h"
00028 #include          "tfacep.h"
00029 #include          "tstruct.h"
00030 #include          "tfacepp.h"
00031 #include          "tessvars.h"
00032 #include          "reject.h"
00033 
00034 #define EXTERN
00035 
00038 EXTERN BOOL_VAR (tessedit_override_permuter, TRUE, "According to dict_word");
00041 static POLY_MATCHER tess_matcher; //current matcher
00042 static POLY_TESTER tess_tester;  //current tester
00043 static POLY_TESTER tess_trainer; //current trainer
00044 static DENORM *tess_denorm;      //current denorm
00045 static WERD *tess_word;          //current word
00046 
00047 #define MAX_UNDIVIDED_LENGTH 24
00048 
00067 WERD_CHOICE *recog_word(
00068                         WERD *word,
00069                         DENORM *denorm,
00070                         POLY_MATCHER matcher,
00071                         POLY_TESTER tester,
00072                         POLY_TESTER trainer,
00073                         BOOL8 testing,
00074                         WERD_CHOICE *&raw_choice,
00075                         BLOB_CHOICE_LIST_CLIST *blob_choices,
00076                         WERD *&outword
00077                        ) {
00078   WERD_CHOICE *word_choice;
00079   UINT8 perm_type;
00080   UINT8 real_dict_perm_type;
00081 
00082   if (word->blob_list ()->empty ()) {
00083     word_choice = new WERD_CHOICE ("", 10.0f, -1.0f, TOP_CHOICE_PERM);
00084     raw_choice = new WERD_CHOICE ("", 10.0f, -1.0f, TOP_CHOICE_PERM);
00085     outword = word->poly_copy (denorm->row ()->x_height ());
00086   }
00087   else
00088     word_choice = recog_word_recursive (word, denorm, matcher, tester,
00089       trainer, testing, raw_choice,
00090       blob_choices, outword);
00091   if ((word_choice->string ().length () !=
00092     outword->blob_list ()->length ()) ||
00093   (word_choice->string ().length () != blob_choices->length ())) {
00094     tprintf
00095       ("recog_word ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
00096       word_choice->string ().string (), word_choice->string ().length (),
00097       outword->blob_list ()->length (), blob_choices->length ());
00098   }
00099   ASSERT_HOST (word_choice->string ().length () ==
00100     outword->blob_list ()->length ());
00101   ASSERT_HOST (word_choice->string ().length () == blob_choices->length ());
00102 
00103   /* Copy any reject blobs into the outword */
00104   outword->rej_blob_list ()->deep_copy (word->rej_blob_list ());
00105 
00106   if (tessedit_override_permuter) {
00107     /* Override the permuter type if a straight dictionary check disagrees. */
00108     perm_type = word_choice->permuter ();
00109     if ((perm_type != SYSTEM_DAWG_PERM) &&
00110     (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
00111       real_dict_perm_type = dict_word (word_choice->string ().string ());
00112       if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||
00113         (real_dict_perm_type == FREQ_DAWG_PERM) ||
00114         (real_dict_perm_type == USER_DAWG_PERM)) &&
00115         (alpha_count (word_choice->string ().string ()) > 0))
00116         word_choice->set_permuter (real_dict_perm_type); //Use dict perm
00117     }
00118     if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) {
00119       tprintf ("Permuter Type Flipped from %d to %d\n",
00120         perm_type, word_choice->permuter ());
00121     }
00122   }
00123   assert ((word_choice == NULL) == (raw_choice == NULL));
00124   return word_choice;
00125 }
00126 
00127 
00146 WERD_CHOICE *recog_word_recursive(
00147                                   WERD *word,
00148                                   DENORM *denorm,
00149                                   POLY_MATCHER matcher,
00150                                   POLY_TESTER tester,
00151                                   POLY_TESTER trainer,
00152                                   BOOL8 testing,
00153                                   WERD_CHOICE *&raw_choice,
00154                                   BLOB_CHOICE_LIST_CLIST *blob_choices,
00155                                   WERD *&outword
00156                                  ) {
00157   INT32 initial_blob_choice_len;
00158   INT32 word_length;             //no of blobs
00159   STRING word_string;            //converted from tess
00160   ARRAY tess_ratings;            //tess results
00161   A_CHOICE tess_choice;          //best word
00162   A_CHOICE tess_raw;             //raw result
00163   TWERD *tessword;               //tess format
00164   BLOB_CHOICE_LIST *choice_list; //fake list
00165                                  //iterator
00166   BLOB_CHOICE_LIST_C_IT choice_it;
00167 
00168   tess_matcher = matcher;        //install matcher
00169   tess_tester = testing ? tester : NULL;
00170   tess_trainer = testing ? trainer : NULL;
00171   tess_denorm = denorm;
00172   tess_word = word;
00173   //      blob_matchers[1]=call_matcher;
00174   if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) {
00175     return split_and_recog_word (word, denorm, matcher, tester, trainer,
00176       testing, raw_choice, blob_choices,
00177       outword);
00178   }
00179   else {
00180     if (word->flag (W_EOL))
00181       last_word_on_line = TRUE;
00182     else
00183       last_word_on_line = FALSE;
00184     initial_blob_choice_len = blob_choices->length ();
00185     tessword = make_tess_word (word, NULL);
00186     tess_ratings = cc_recog (tessword, &tess_choice, &tess_raw,
00187       testing
00188       && tester != NULL /* ? call_tester : NULL */ ,
00189       testing
00190       && trainer !=
00191       NULL /* ? call_train_tester : NULL */ );
00192                                  //convert word
00193     outword = make_ed_word (tessword, word);
00194     if (outword == NULL) {
00195       outword = word->poly_copy (denorm->row ()->x_height ());
00196     }
00197     delete_word(tessword);  //get rid of it
00198                                  //no of blobs
00199     word_length = outword->blob_list ()->length ();
00200                                  //convert all ratings
00201     convert_choice_lists(tess_ratings, blob_choices); 
00202                                  //copy string
00203     word_string = tess_raw.string;
00204     while (word_string.length () < word_length)
00205       word_string += " ";        //pad with blanks
00206     raw_choice = new WERD_CHOICE (word_string.string (),
00207       tess_raw.rating, tess_raw.certainty,
00208       tess_raw.permuter);
00209     word_string = tess_choice.string;
00210     if (word_string.length () > word_length) {
00211       tprintf ("recog_word: Discarded long string \"%s\"\n",
00212         word_string.string ());
00213       word_string = NULL;        //should never happen
00214     }
00215     if (blob_choices->length () - initial_blob_choice_len != word_length) {
00216       word_string = NULL;        //force rejection
00217       tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n",
00218         blob_choices->length (), word_length);
00219       choice_it.set_to_list (blob_choices); //list of lists
00220       while (blob_choices->length () - initial_blob_choice_len <
00221       word_length) {
00222         choice_list = new BLOB_CHOICE_LIST; //get fake one
00223         choice_it.add_to_end (choice_list); //add to list
00224         tprintf ("recog_word: Added dummy choice list\n");
00225       }
00226       while (blob_choices->length () - initial_blob_choice_len >
00227       word_length) {
00228         choice_it.move_to_last ();
00229         delete choice_it.extract (); //should never happen
00230         tprintf ("recog_word: Deleted choice list\n");
00231       }
00232     }
00233     while (word_string.length () < word_length)
00234       word_string += " ";        //pad with blanks
00235 
00236     assert (raw_choice != NULL);
00237     if (tess_choice.string)
00238       strfree(tess_choice.string);
00239     if (tess_raw.string)
00240       strfree(tess_raw.string);
00241     return new WERD_CHOICE (word_string.string (),
00242       tess_choice.rating, tess_choice.certainty,
00243       tess_choice.permuter);
00244   }
00245 }
00246 
00247 
00266 WERD_CHOICE *split_and_recog_word(
00267                                   WERD *word,
00268                                   DENORM *denorm,
00269                                   POLY_MATCHER matcher,
00270                                   POLY_TESTER tester,
00271                                   POLY_TESTER trainer,
00272                                   BOOL8 testing,
00273                                   WERD_CHOICE *&raw_choice,
00274                                   BLOB_CHOICE_LIST_CLIST *blob_choices,
00275                                   WERD *&outword
00276                                  ) {
00277   //   INT32  outword1_len;
00278   //   INT32  outword2_len;
00279   WERD *first_word;              //poly copy of word
00280   WERD *second_word;             //fabricated word
00281   WERD *outword2;                //2nd output word
00282   PBLOB *blob;
00283   WERD_CHOICE *result;           //resturn value
00284   WERD_CHOICE *result2;          //output of 2nd word
00285   WERD_CHOICE *raw_choice2;      //raw version of 2nd
00286   float gap;                     //blob gap
00287   float bestgap;                 //biggest gap
00288   PBLOB_LIST new_blobs;          //list of gathered blobs
00289   PBLOB_IT blob_it;
00290                                  //iterator
00291   PBLOB_IT new_blob_it = &new_blobs;
00292 
00293   first_word = word->poly_copy (denorm->row ()->x_height ());
00294   blob_it.set_to_list (first_word->blob_list ());
00295   bestgap = -MAX_INT32;
00296   while (!blob_it.at_last ()) {
00297     blob = blob_it.data ();
00298     gap = blob_it.data_relative (1)->bounding_box ().left () -
00299       blob->bounding_box ().right (); //gap to next
00300     blob_it.forward ();
00301     if (gap > bestgap) {
00302       bestgap = gap;             //find biggest
00303       new_blob_it = blob_it;     //save position
00304     }
00305   }
00306   new_blobs.assign_to_sublist (&new_blob_it, &blob_it); //take 2nd half
00307   second_word = new WERD (&new_blobs, 1, NULL); //make it a word
00308   ASSERT_HOST (word->blob_list ()->length () ==
00309     first_word->blob_list ()->length () +
00310     second_word->blob_list ()->length ());
00311 
00312   result = recog_word_recursive (first_word, denorm, matcher,
00313     tester, trainer, testing, raw_choice,
00314     blob_choices, outword);
00315   delete first_word;             //done that one
00316   result2 = recog_word_recursive (second_word, denorm, matcher,
00317     tester, trainer, testing, raw_choice2,
00318     blob_choices, outword2);
00319   delete second_word;            //done that too
00320   *result += *result2;           //combine ratings
00321   delete result2;
00322   *raw_choice += *raw_choice2;
00323   delete raw_choice2;            //finished with it
00324 
00325   //   outword1_len= outword->blob_list()->length();
00326   //   outword2_len= outword2->blob_list()->length();
00327   outword->join_on (outword2);   //join words
00328   delete outword2;
00329   //   if ( outword->blob_list()->length() != outword1_len + outword2_len )
00330   //      tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d\n",
00331   //          outword1_len, outword2_len, outword->blob_list()->length() );
00332   //   ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len );
00333   return result;
00334 }
00335 
00336 
00350 LIST call_matcher(
00351                   TBLOB *ptblob,
00352                   TBLOB *tessblob,
00353                   TBLOB *ntblob,
00354                   void *,
00355                   TEXTROW *
00356                  ) {
00357   PBLOB *pblob;                  //converted blob
00358   PBLOB *blob;                   //converted blob
00359   PBLOB *nblob;                  //converted blob
00360   LIST result;                   //tess output
00361   BLOB_CHOICE *choice;           //current choice
00362   char string[2];                //char converted
00363   BLOB_CHOICE_LIST ratings;      //matcher result
00364   BLOB_CHOICE_IT it;             //iterator
00365 
00366   blob = make_ed_blob (tessblob);//convert blob
00367   if (blob == NULL)
00368     return NULL;                 //can't do it
00369   pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL;
00370   nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL;
00371   (*tess_matcher) (pblob, blob, nblob, tess_word, tess_denorm, ratings);
00372   //match it
00373   delete blob;                   //don't need that now
00374   if (pblob != NULL)
00375     delete pblob;
00376   if (nblob != NULL)
00377     delete nblob;
00378   it.set_to_list (&ratings);     //get list
00379   result = NULL;
00380   string[1] = '\0';
00381   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00382     choice = it.data ();
00383     string[0] = choice->char_class ();
00384     result = append_choice (result, string,
00385       choice->rating (), choice->certainty (),
00386       choice->config ());
00387   }
00388   return result;                 //converted list
00389 }
00390 
00391 
00406 void call_tester(
00407                  TBLOB *tessblob,
00408                  BOOL8 correct_blob,
00409                  char *text,
00410                  INT32 count,
00411                  LIST result
00412                 ) {
00413   PBLOB *blob;                   //converted blob
00414   BLOB_CHOICE_LIST ratings;      //matcher result
00415 
00416   blob = make_ed_blob (tessblob);//convert blob
00417   if (blob == NULL)
00418     return;
00419   convert_choice_list(result, ratings); //make it right type
00420   if (tess_tester != NULL)
00421     (*tess_tester) (blob, tess_denorm, correct_blob, text, count, &ratings);
00422   delete blob;                   //don't need that now
00423 }
00424 
00425 
00440 void call_train_tester(
00441                        TBLOB *tessblob,
00442                        BOOL8 correct_blob,
00443                        char *text,
00444                        INT32 count,
00445                        LIST result
00446                       ) {
00447   PBLOB *blob;                   //converted blob
00448   BLOB_CHOICE_LIST ratings;      //matcher result
00449 
00450   blob = make_ed_blob (tessblob);//convert blob
00451   if (blob == NULL)
00452     return;
00453   convert_choice_list(result, ratings); //make it right type
00454   if (tess_trainer != NULL)
00455     (*tess_trainer) (blob, tess_denorm, correct_blob, text, count, &ratings);
00456   delete blob;                   //don't need that now
00457 }

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1