ccmain/tfacepp.h File Reference

#include "varable.h"
#include "tstruct.h"
#include "ratngs.h"
#include "tessclas.h"
#include "notdll.h"

Go to the source code of this file.

Functions


Function Documentation

LIST call_matcher ( TBLOB ptblob,
TBLOB tessblob,
TBLOB ntblob,
void *  ,
TEXTROW  
)

Call a matcher.

Parameters:
ptblob previous
tessblob blob to match
ntblob next
Returns:
list of choices/results.
Called from Tess with a blob in tess form. Convert the blob to editor form. Call the matcher setup by the segmenter in tess_matcher. Convert the output choices back to tess form.

Definition at line 350 of file tfacepp.cpp.

00356                    {
00357   PBLOB *pblob;                  //converted blob
00358   PBLOB *blob;                   //converted blob
00359   PBLOB *nblob;                  //converted blob
00360   LIST result;                   //tess output
00361   BLOB_CHOICE *choice;           //current choice
00362   char string[2];                //char converted
00363   BLOB_CHOICE_LIST ratings;      //matcher result
00364   BLOB_CHOICE_IT it;             //iterator
00365 
00366   blob = make_ed_blob (tessblob);//convert blob
00367   if (blob == NULL)
00368     return NULL;                 //can't do it
00369   pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL;
00370   nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL;
00371   (*tess_matcher) (pblob, blob, nblob, tess_word, tess_denorm, ratings);
00372   //match it
00373   delete blob;                   //don't need that now
00374   if (pblob != NULL)
00375     delete pblob;
00376   if (nblob != NULL)
00377     delete nblob;
00378   it.set_to_list (&ratings);     //get list
00379   result = NULL;
00380   string[1] = '\0';
00381   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00382     choice = it.data ();
00383     string[0] = choice->char_class ();
00384     result = append_choice (result, string,
00385       choice->rating (), choice->certainty (),
00386       choice->config ());
00387   }
00388   return result;                 //converted list
00389 }

void call_tester ( TBLOB tessblob,
BOOL8  correct_blob,
char *  text,
INT32  count,
LIST  result 
)

Call a tester.

Parameters:
tessblob blob to test
correct_blob TRUE if good
text source text
count chars in text
result output of matcher
Returns:
none
Called from Tess with a blob in tess form. Convert the blob to editor form. Call the tester setup by the segmenter in tess_tester.

Definition at line 406 of file tfacepp.cpp.

References convert_choice_list(), make_ed_blob(), NULL, ratings, tess_denorm, and tess_tester.

00412                   {
00413   PBLOB *blob;                   //converted blob
00414   BLOB_CHOICE_LIST ratings;      //matcher result
00415 
00416   blob = make_ed_blob (tessblob);//convert blob
00417   if (blob == NULL)
00418     return;
00419   convert_choice_list(result, ratings); //make it right type
00420   if (tess_tester != NULL)
00421     (*tess_tester) (blob, tess_denorm, correct_blob, text, count, &ratings);
00422   delete blob;                   //don't need that now
00423 }

void call_train_tester ( TBLOB tessblob,
BOOL8  correct_blob,
char *  text,
INT32  count,
LIST  result 
)

Call a tester.

Parameters:
tessblob source text
correct_blob passed to trainer
text passed to trainer
count chars in text
result output of matcher
Returns:
none
Called from Tess with a blob in tess form. Convert the blob to editor form. Call the trainer setup by the segmenter in tess_trainer.

Definition at line 440 of file tfacepp.cpp.

References convert_choice_list(), make_ed_blob(), NULL, ratings, tess_denorm, and tess_trainer.

00446                         {
00447   PBLOB *blob;                   //converted blob
00448   BLOB_CHOICE_LIST ratings;      //matcher result
00449 
00450   blob = make_ed_blob (tessblob);//convert blob
00451   if (blob == NULL)
00452     return;
00453   convert_choice_list(result, ratings); //make it right type
00454   if (tess_trainer != NULL)
00455     (*tess_trainer) (blob, tess_denorm, correct_blob, text, count, &ratings);
00456   delete blob;                   //don't need that now
00457 }

WERD_CHOICE* recog_word ( WERD word,
DENORM denorm,
POLY_MATCHER  matcher,
POLY_TESTER  tester,
POLY_TESTER  trainer,
BOOL8  testing,
WERD_CHOICE *&  raw_choice,
BLOB_CHOICE_LIST_CLIST *  blob_choices,
WERD *&  outword 
)

Recognize one word.

Parameters:
word word to do
denorm de-normaliser
matcher matcher function
tester tester function
trainer trainer function
testing 0 or 1, 1 if answer driven
raw_choice raw result
blob_choices list of blob lists
outword bln word output
Note:
Global:
See also:
tessedit_override_permuter,

tessedit_rejection_debug

Returns:
result
Convert the word to tess form and pass it to the tess segmenter. Convert the output back to editor form.

Definition at line 67 of file tfacepp.cpp.

References alpha_count(), assert(), ASSERT_HOST, WERD::blob_list(), dict_word(), f, FREQ_DAWG_PERM, NULL, WERD::poly_copy(), recog_word_recursive(), WERD::rej_blob_list(), DENORM::row(), SYSTEM_DAWG_PERM, TOP_CHOICE_PERM, tprintf(), USER_DAWG_PERM, and ROW::x_height().

Referenced by correct_segment_pass2(), tess_segment_pass1(), tess_segment_pass2(), and test_segment_pass2().

00077                          {
00078   WERD_CHOICE *word_choice;
00079   UINT8 perm_type;
00080   UINT8 real_dict_perm_type;
00081 
00082   if (word->blob_list ()->empty ()) {
00083     word_choice = new WERD_CHOICE ("", 10.0f, -1.0f, TOP_CHOICE_PERM);
00084     raw_choice = new WERD_CHOICE ("", 10.0f, -1.0f, TOP_CHOICE_PERM);
00085     outword = word->poly_copy (denorm->row ()->x_height ());
00086   }
00087   else
00088     word_choice = recog_word_recursive (word, denorm, matcher, tester,
00089       trainer, testing, raw_choice,
00090       blob_choices, outword);
00091   if ((word_choice->string ().length () !=
00092     outword->blob_list ()->length ()) ||
00093   (word_choice->string ().length () != blob_choices->length ())) {
00094     tprintf
00095       ("recog_word ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
00096       word_choice->string ().string (), word_choice->string ().length (),
00097       outword->blob_list ()->length (), blob_choices->length ());
00098   }
00099   ASSERT_HOST (word_choice->string ().length () ==
00100     outword->blob_list ()->length ());
00101   ASSERT_HOST (word_choice->string ().length () == blob_choices->length ());
00102 
00103   /* Copy any reject blobs into the outword */
00104   outword->rej_blob_list ()->deep_copy (word->rej_blob_list ());
00105 
00106   if (tessedit_override_permuter) {
00107     /* Override the permuter type if a straight dictionary check disagrees. */
00108     perm_type = word_choice->permuter ();
00109     if ((perm_type != SYSTEM_DAWG_PERM) &&
00110     (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
00111       real_dict_perm_type = dict_word (word_choice->string ().string ());
00112       if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||
00113         (real_dict_perm_type == FREQ_DAWG_PERM) ||
00114         (real_dict_perm_type == USER_DAWG_PERM)) &&
00115         (alpha_count (word_choice->string ().string ()) > 0))
00116         word_choice->set_permuter (real_dict_perm_type); //Use dict perm
00117     }
00118     if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) {
00119       tprintf ("Permuter Type Flipped from %d to %d\n",
00120         perm_type, word_choice->permuter ());
00121     }
00122   }
00123   assert ((word_choice == NULL) == (raw_choice == NULL));
00124   return word_choice;
00125 }

WERD_CHOICE* recog_word_recursive ( WERD word,
DENORM denorm,
POLY_MATCHER  matcher,
POLY_TESTER  tester,
POLY_TESTER  trainer,
BOOL8  testing,
WERD_CHOICE *&  raw_choice,
BLOB_CHOICE_LIST_CLIST *  blob_choices,
WERD *&  outword 
)

Recognize one word, recursive.

Parameters:
word word to do
denorm de-normaliser
matcher matcher function
tester tester function
trainer trainer function
testing 0 or 1, 1 if answer driven
raw_choice raw result
blob_choices list of blob lists
outword bln word output
Note:
Global:
See also:
MAX_UNDIVIDED_LENGTH
Returns:
result
Convert the word to tess form and pass it to the tess segmenter. Convert the output back to editor form.

Definition at line 146 of file tfacepp.cpp.

References assert(), WERD::blob_list(), cc_recog(), choicestruct::certainty, convert_choice_lists(), delete_word(), FALSE, WERD::flag(), last_word_on_line, STRING::length(), make_ed_word(), make_tess_word(), MAX_UNDIVIDED_LENGTH, NULL, choicestruct::permuter, WERD::poly_copy(), choicestruct::rating, DENORM::row(), split_and_recog_word(), strfree, STRING::string(), choicestruct::string, tess_denorm, tess_matcher, tess_tester, tess_trainer, tess_word, tprintf(), TRUE, W_EOL, and ROW::x_height().

Referenced by recog_word(), and split_and_recog_word().

00156                                    {
00157   INT32 initial_blob_choice_len;
00158   INT32 word_length;             //no of blobs
00159   STRING word_string;            //converted from tess
00160   ARRAY tess_ratings;            //tess results
00161   A_CHOICE tess_choice;          //best word
00162   A_CHOICE tess_raw;             //raw result
00163   TWERD *tessword;               //tess format
00164   BLOB_CHOICE_LIST *choice_list; //fake list
00165                                  //iterator
00166   BLOB_CHOICE_LIST_C_IT choice_it;
00167 
00168   tess_matcher = matcher;        //install matcher
00169   tess_tester = testing ? tester : NULL;
00170   tess_trainer = testing ? trainer : NULL;
00171   tess_denorm = denorm;
00172   tess_word = word;
00173   //      blob_matchers[1]=call_matcher;
00174   if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) {
00175     return split_and_recog_word (word, denorm, matcher, tester, trainer,
00176       testing, raw_choice, blob_choices,
00177       outword);
00178   }
00179   else {
00180     if (word->flag (W_EOL))
00181       last_word_on_line = TRUE;
00182     else
00183       last_word_on_line = FALSE;
00184     initial_blob_choice_len = blob_choices->length ();
00185     tessword = make_tess_word (word, NULL);
00186     tess_ratings = cc_recog (tessword, &tess_choice, &tess_raw,
00187       testing
00188       && tester != NULL /* ? call_tester : NULL */ ,
00189       testing
00190       && trainer !=
00191       NULL /* ? call_train_tester : NULL */ );
00192                                  //convert word
00193     outword = make_ed_word (tessword, word);
00194     if (outword == NULL) {
00195       outword = word->poly_copy (denorm->row ()->x_height ());
00196     }
00197     delete_word(tessword);  //get rid of it
00198                                  //no of blobs
00199     word_length = outword->blob_list ()->length ();
00200                                  //convert all ratings
00201     convert_choice_lists(tess_ratings, blob_choices); 
00202                                  //copy string
00203     word_string = tess_raw.string;
00204     while (word_string.length () < word_length)
00205       word_string += " ";        //pad with blanks
00206     raw_choice = new WERD_CHOICE (word_string.string (),
00207       tess_raw.rating, tess_raw.certainty,
00208       tess_raw.permuter);
00209     word_string = tess_choice.string;
00210     if (word_string.length () > word_length) {
00211       tprintf ("recog_word: Discarded long string \"%s\"\n",
00212         word_string.string ());
00213       word_string = NULL;        //should never happen
00214     }
00215     if (blob_choices->length () - initial_blob_choice_len != word_length) {
00216       word_string = NULL;        //force rejection
00217       tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n",
00218         blob_choices->length (), word_length);
00219       choice_it.set_to_list (blob_choices); //list of lists
00220       while (blob_choices->length () - initial_blob_choice_len <
00221       word_length) {
00222         choice_list = new BLOB_CHOICE_LIST; //get fake one
00223         choice_it.add_to_end (choice_list); //add to list
00224         tprintf ("recog_word: Added dummy choice list\n");
00225       }
00226       while (blob_choices->length () - initial_blob_choice_len >
00227       word_length) {
00228         choice_it.move_to_last ();
00229         delete choice_it.extract (); //should never happen
00230         tprintf ("recog_word: Deleted choice list\n");
00231       }
00232     }
00233     while (word_string.length () < word_length)
00234       word_string += " ";        //pad with blanks
00235 
00236     assert (raw_choice != NULL);
00237     if (tess_choice.string)
00238       strfree(tess_choice.string);
00239     if (tess_raw.string)
00240       strfree(tess_raw.string);
00241     return new WERD_CHOICE (word_string.string (),
00242       tess_choice.rating, tess_choice.certainty,
00243       tess_choice.permuter);
00244   }
00245 }

WERD_CHOICE* split_and_recog_word ( WERD word,
DENORM denorm,
POLY_MATCHER  matcher,
POLY_TESTER  tester,
POLY_TESTER  trainer,
BOOL8  testing,
WERD_CHOICE *&  raw_choice,
BLOB_CHOICE_LIST_CLIST *  blob_choices,
WERD *&  outword 
)

Recognize one word.

Parameters:
word word to do
denorm de-normaliser
matcher matcher function
tester tester function
trainer trainer function
testing 0 or 1, 1 if answer driven
raw_choice raw result
blob_choices list of blob lists
outword bln word output
Note:
Global:
See also:
MAX_UNDIVIDED_LENGTH
Returns:
result
Convert the word to tess form and pass it to the tess segmenter. Convert the output back to editor form.

Definition at line 266 of file tfacepp.cpp.

References ASSERT_HOST, WERD::blob_list(), PBLOB::bounding_box(), WERD::join_on(), BOX::left(), MAX_INT32, NULL, WERD::poly_copy(), recog_word_recursive(), BOX::right(), DENORM::row(), and ROW::x_height().

Referenced by recog_word_recursive().

00276                                    {
00277   //   INT32  outword1_len;
00278   //   INT32  outword2_len;
00279   WERD *first_word;              //poly copy of word
00280   WERD *second_word;             //fabricated word
00281   WERD *outword2;                //2nd output word
00282   PBLOB *blob;
00283   WERD_CHOICE *result;           //resturn value
00284   WERD_CHOICE *result2;          //output of 2nd word
00285   WERD_CHOICE *raw_choice2;      //raw version of 2nd
00286   float gap;                     //blob gap
00287   float bestgap;                 //biggest gap
00288   PBLOB_LIST new_blobs;          //list of gathered blobs
00289   PBLOB_IT blob_it;
00290                                  //iterator
00291   PBLOB_IT new_blob_it = &new_blobs;
00292 
00293   first_word = word->poly_copy (denorm->row ()->x_height ());
00294   blob_it.set_to_list (first_word->blob_list ());
00295   bestgap = -MAX_INT32;
00296   while (!blob_it.at_last ()) {
00297     blob = blob_it.data ();
00298     gap = blob_it.data_relative (1)->bounding_box ().left () -
00299       blob->bounding_box ().right (); //gap to next
00300     blob_it.forward ();
00301     if (gap > bestgap) {
00302       bestgap = gap;             //find biggest
00303       new_blob_it = blob_it;     //save position
00304     }
00305   }
00306   new_blobs.assign_to_sublist (&new_blob_it, &blob_it); //take 2nd half
00307   second_word = new WERD (&new_blobs, 1, NULL); //make it a word
00308   ASSERT_HOST (word->blob_list ()->length () ==
00309     first_word->blob_list ()->length () +
00310     second_word->blob_list ()->length ());
00311 
00312   result = recog_word_recursive (first_word, denorm, matcher,
00313     tester, trainer, testing, raw_choice,
00314     blob_choices, outword);
00315   delete first_word;             //done that one
00316   result2 = recog_word_recursive (second_word, denorm, matcher,
00317     tester, trainer, testing, raw_choice2,
00318     blob_choices, outword2);
00319   delete second_word;            //done that too
00320   *result += *result2;           //combine ratings
00321   delete result2;
00322   *raw_choice += *raw_choice2;
00323   delete raw_choice2;            //finished with it
00324 
00325   //   outword1_len= outword->blob_list()->length();
00326   //   outword2_len= outword2->blob_list()->length();
00327   outword->join_on (outword2);   //join words
00328   delete outword2;
00329   //   if ( outword->blob_list()->length() != outword1_len + outword2_len )
00330   //      tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d\n",
00331   //          outword1_len, outword2_len, outword->blob_list()->length() );
00332   //   ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len );
00333   return result;
00334 }


Generated on Wed Feb 28 19:49:15 2007 for Tesseract by  doxygen 1.5.1