00001
00020 #include "mfcpch.h"
00021 #ifdef __UNIX__
00022 #include <assert.h>
00023 #endif
00024 #include "errcode.h"
00025 #include "tessarray.h"
00026
00027 #include "werd.h"
00028 #include "tfacep.h"
00029 #include "tstruct.h"
00030 #include "tfacepp.h"
00031 #include "tessvars.h"
00032 #include "reject.h"
00033
00034 #define EXTERN
00035
00038 EXTERN BOOL_VAR (tessedit_override_permuter, TRUE, "According to dict_word");
00041 static POLY_MATCHER tess_matcher;
00042 static POLY_TESTER tess_tester;
00043 static POLY_TESTER tess_trainer;
00044 static DENORM *tess_denorm;
00045 static WERD *tess_word;
00046
00047 #define MAX_UNDIVIDED_LENGTH 24
00048
00067 WERD_CHOICE *recog_word(
00068 WERD *word,
00069 DENORM *denorm,
00070 POLY_MATCHER matcher,
00071 POLY_TESTER tester,
00072 POLY_TESTER trainer,
00073 BOOL8 testing,
00074 WERD_CHOICE *&raw_choice,
00075 BLOB_CHOICE_LIST_CLIST *blob_choices,
00076 WERD *&outword
00077 ) {
00078 WERD_CHOICE *word_choice;
00079 UINT8 perm_type;
00080 UINT8 real_dict_perm_type;
00081
00082 if (word->blob_list ()->empty ()) {
00083 word_choice = new WERD_CHOICE ("", 10.0f, -1.0f, TOP_CHOICE_PERM);
00084 raw_choice = new WERD_CHOICE ("", 10.0f, -1.0f, TOP_CHOICE_PERM);
00085 outword = word->poly_copy (denorm->row ()->x_height ());
00086 }
00087 else
00088 word_choice = recog_word_recursive (word, denorm, matcher, tester,
00089 trainer, testing, raw_choice,
00090 blob_choices, outword);
00091 if ((word_choice->string ().length () !=
00092 outword->blob_list ()->length ()) ||
00093 (word_choice->string ().length () != blob_choices->length ())) {
00094 tprintf
00095 ("recog_word ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
00096 word_choice->string ().string (), word_choice->string ().length (),
00097 outword->blob_list ()->length (), blob_choices->length ());
00098 }
00099 ASSERT_HOST (word_choice->string ().length () ==
00100 outword->blob_list ()->length ());
00101 ASSERT_HOST (word_choice->string ().length () == blob_choices->length ());
00102
00103
00104 outword->rej_blob_list ()->deep_copy (word->rej_blob_list ());
00105
00106 if (tessedit_override_permuter) {
00107
00108 perm_type = word_choice->permuter ();
00109 if ((perm_type != SYSTEM_DAWG_PERM) &&
00110 (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
00111 real_dict_perm_type = dict_word (word_choice->string ().string ());
00112 if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||
00113 (real_dict_perm_type == FREQ_DAWG_PERM) ||
00114 (real_dict_perm_type == USER_DAWG_PERM)) &&
00115 (alpha_count (word_choice->string ().string ()) > 0))
00116 word_choice->set_permuter (real_dict_perm_type);
00117 }
00118 if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) {
00119 tprintf ("Permuter Type Flipped from %d to %d\n",
00120 perm_type, word_choice->permuter ());
00121 }
00122 }
00123 assert ((word_choice == NULL) == (raw_choice == NULL));
00124 return word_choice;
00125 }
00126
00127
00146 WERD_CHOICE *recog_word_recursive(
00147 WERD *word,
00148 DENORM *denorm,
00149 POLY_MATCHER matcher,
00150 POLY_TESTER tester,
00151 POLY_TESTER trainer,
00152 BOOL8 testing,
00153 WERD_CHOICE *&raw_choice,
00154 BLOB_CHOICE_LIST_CLIST *blob_choices,
00155 WERD *&outword
00156 ) {
00157 INT32 initial_blob_choice_len;
00158 INT32 word_length;
00159 STRING word_string;
00160 ARRAY tess_ratings;
00161 A_CHOICE tess_choice;
00162 A_CHOICE tess_raw;
00163 TWERD *tessword;
00164 BLOB_CHOICE_LIST *choice_list;
00165
00166 BLOB_CHOICE_LIST_C_IT choice_it;
00167
00168 tess_matcher = matcher;
00169 tess_tester = testing ? tester : NULL;
00170 tess_trainer = testing ? trainer : NULL;
00171 tess_denorm = denorm;
00172 tess_word = word;
00173
00174 if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) {
00175 return split_and_recog_word (word, denorm, matcher, tester, trainer,
00176 testing, raw_choice, blob_choices,
00177 outword);
00178 }
00179 else {
00180 if (word->flag (W_EOL))
00181 last_word_on_line = TRUE;
00182 else
00183 last_word_on_line = FALSE;
00184 initial_blob_choice_len = blob_choices->length ();
00185 tessword = make_tess_word (word, NULL);
00186 tess_ratings = cc_recog (tessword, &tess_choice, &tess_raw,
00187 testing
00188 && tester != NULL ,
00189 testing
00190 && trainer !=
00191 NULL );
00192
00193 outword = make_ed_word (tessword, word);
00194 if (outword == NULL) {
00195 outword = word->poly_copy (denorm->row ()->x_height ());
00196 }
00197 delete_word(tessword);
00198
00199 word_length = outword->blob_list ()->length ();
00200
00201 convert_choice_lists(tess_ratings, blob_choices);
00202
00203 word_string = tess_raw.string;
00204 while (word_string.length () < word_length)
00205 word_string += " ";
00206 raw_choice = new WERD_CHOICE (word_string.string (),
00207 tess_raw.rating, tess_raw.certainty,
00208 tess_raw.permuter);
00209 word_string = tess_choice.string;
00210 if (word_string.length () > word_length) {
00211 tprintf ("recog_word: Discarded long string \"%s\"\n",
00212 word_string.string ());
00213 word_string = NULL;
00214 }
00215 if (blob_choices->length () - initial_blob_choice_len != word_length) {
00216 word_string = NULL;
00217 tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n",
00218 blob_choices->length (), word_length);
00219 choice_it.set_to_list (blob_choices);
00220 while (blob_choices->length () - initial_blob_choice_len <
00221 word_length) {
00222 choice_list = new BLOB_CHOICE_LIST;
00223 choice_it.add_to_end (choice_list);
00224 tprintf ("recog_word: Added dummy choice list\n");
00225 }
00226 while (blob_choices->length () - initial_blob_choice_len >
00227 word_length) {
00228 choice_it.move_to_last ();
00229 delete choice_it.extract ();
00230 tprintf ("recog_word: Deleted choice list\n");
00231 }
00232 }
00233 while (word_string.length () < word_length)
00234 word_string += " ";
00235
00236 assert (raw_choice != NULL);
00237 if (tess_choice.string)
00238 strfree(tess_choice.string);
00239 if (tess_raw.string)
00240 strfree(tess_raw.string);
00241 return new WERD_CHOICE (word_string.string (),
00242 tess_choice.rating, tess_choice.certainty,
00243 tess_choice.permuter);
00244 }
00245 }
00246
00247
00266 WERD_CHOICE *split_and_recog_word(
00267 WERD *word,
00268 DENORM *denorm,
00269 POLY_MATCHER matcher,
00270 POLY_TESTER tester,
00271 POLY_TESTER trainer,
00272 BOOL8 testing,
00273 WERD_CHOICE *&raw_choice,
00274 BLOB_CHOICE_LIST_CLIST *blob_choices,
00275 WERD *&outword
00276 ) {
00277
00278
00279 WERD *first_word;
00280 WERD *second_word;
00281 WERD *outword2;
00282 PBLOB *blob;
00283 WERD_CHOICE *result;
00284 WERD_CHOICE *result2;
00285 WERD_CHOICE *raw_choice2;
00286 float gap;
00287 float bestgap;
00288 PBLOB_LIST new_blobs;
00289 PBLOB_IT blob_it;
00290
00291 PBLOB_IT new_blob_it = &new_blobs;
00292
00293 first_word = word->poly_copy (denorm->row ()->x_height ());
00294 blob_it.set_to_list (first_word->blob_list ());
00295 bestgap = -MAX_INT32;
00296 while (!blob_it.at_last ()) {
00297 blob = blob_it.data ();
00298 gap = blob_it.data_relative (1)->bounding_box ().left () -
00299 blob->bounding_box ().right ();
00300 blob_it.forward ();
00301 if (gap > bestgap) {
00302 bestgap = gap;
00303 new_blob_it = blob_it;
00304 }
00305 }
00306 new_blobs.assign_to_sublist (&new_blob_it, &blob_it);
00307 second_word = new WERD (&new_blobs, 1, NULL);
00308 ASSERT_HOST (word->blob_list ()->length () ==
00309 first_word->blob_list ()->length () +
00310 second_word->blob_list ()->length ());
00311
00312 result = recog_word_recursive (first_word, denorm, matcher,
00313 tester, trainer, testing, raw_choice,
00314 blob_choices, outword);
00315 delete first_word;
00316 result2 = recog_word_recursive (second_word, denorm, matcher,
00317 tester, trainer, testing, raw_choice2,
00318 blob_choices, outword2);
00319 delete second_word;
00320 *result += *result2;
00321 delete result2;
00322 *raw_choice += *raw_choice2;
00323 delete raw_choice2;
00324
00325
00326
00327 outword->join_on (outword2);
00328 delete outword2;
00329
00330
00331
00332
00333 return result;
00334 }
00335
00336
00350 LIST call_matcher(
00351 TBLOB *ptblob,
00352 TBLOB *tessblob,
00353 TBLOB *ntblob,
00354 void *,
00355 TEXTROW *
00356 ) {
00357 PBLOB *pblob;
00358 PBLOB *blob;
00359 PBLOB *nblob;
00360 LIST result;
00361 BLOB_CHOICE *choice;
00362 char string[2];
00363 BLOB_CHOICE_LIST ratings;
00364 BLOB_CHOICE_IT it;
00365
00366 blob = make_ed_blob (tessblob);
00367 if (blob == NULL)
00368 return NULL;
00369 pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL;
00370 nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL;
00371 (*tess_matcher) (pblob, blob, nblob, tess_word, tess_denorm, ratings);
00372
00373 delete blob;
00374 if (pblob != NULL)
00375 delete pblob;
00376 if (nblob != NULL)
00377 delete nblob;
00378 it.set_to_list (&ratings);
00379 result = NULL;
00380 string[1] = '\0';
00381 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00382 choice = it.data ();
00383 string[0] = choice->char_class ();
00384 result = append_choice (result, string,
00385 choice->rating (), choice->certainty (),
00386 choice->config ());
00387 }
00388 return result;
00389 }
00390
00391
00406 void call_tester(
00407 TBLOB *tessblob,
00408 BOOL8 correct_blob,
00409 char *text,
00410 INT32 count,
00411 LIST result
00412 ) {
00413 PBLOB *blob;
00414 BLOB_CHOICE_LIST ratings;
00415
00416 blob = make_ed_blob (tessblob);
00417 if (blob == NULL)
00418 return;
00419 convert_choice_list(result, ratings);
00420 if (tess_tester != NULL)
00421 (*tess_tester) (blob, tess_denorm, correct_blob, text, count, &ratings);
00422 delete blob;
00423 }
00424
00425
00440 void call_train_tester(
00441 TBLOB *tessblob,
00442 BOOL8 correct_blob,
00443 char *text,
00444 INT32 count,
00445 LIST result
00446 ) {
00447 PBLOB *blob;
00448 BLOB_CHOICE_LIST ratings;
00449
00450 blob = make_ed_blob (tessblob);
00451 if (blob == NULL)
00452 return;
00453 convert_choice_list(result, ratings);
00454 if (tess_trainer != NULL)
00455 (*tess_trainer) (blob, tess_denorm, correct_blob, text, count, &ratings);
00456 delete blob;
00457 }