ccstruct/pageres.cpp

Go to the documentation of this file.
00001 
00019 #include "mfcpch.h"
00020 #include          <stdlib.h>
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #include          "pageres.h"
00025 #include          "notdll.h"
00026 
00030 ELISTIZE (BLOCK_RES)
00031 CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES)
00032 PAGE_RES::PAGE_RES(                            //recursive construct
00033                    BLOCK_LIST *the_block_list  //real page
00034                   ) {
00035   BLOCK_IT block_it(the_block_list); 
00036   BLOCK_RES_IT block_res_it(&block_res_list); 
00037 
00038   char_count = 0;
00039   rej_count = 0;
00040   rejected = FALSE;
00041 
00042   for (block_it.mark_cycle_pt ();
00043   !block_it.cycled_list (); block_it.forward ()) {
00044     block_res_it.add_to_end (new BLOCK_RES (block_it.data ()));
00045   }
00046 }
00047 
00048 
00052 BLOCK_RES::BLOCK_RES(                  //recursive construct
00053                      BLOCK *the_block  //real BLOCK
00054                     ) {
00055   ROW_IT row_it (the_block->row_list ());
00056   ROW_RES_IT row_res_it(&row_res_list); 
00057 
00058   char_count = 0;
00059   rej_count = 0;
00060   font_class = -1;               //not assigned
00061   x_height = -1.0;
00062   font_assigned = FALSE;
00063   bold = FALSE;
00064   italic = FALSE;
00065   row_count = 0;
00066 
00067   block = the_block;
00068 
00069   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00070     row_res_it.add_to_end (new ROW_RES (row_it.data ()));
00071   }
00072 }
00073 
00074 
00078 ROW_RES::ROW_RES(              //recursive construct
00079                  ROW *the_row  //real ROW
00080                 ) {
00081   WERD_IT word_it (the_row->word_list ());
00082   WERD_RES_IT word_res_it(&word_res_list); 
00083   WERD_RES *combo = NULL;        //current combination of fuzzies
00084   WERD_RES *word_res;            //current word
00085   WERD *copy_word;
00086 
00087   char_count = 0;
00088   rej_count = 0;
00089   whole_word_rej_count = 0;
00090   font_class = -1;
00091   font_class_score = -1.0;
00092   bold = FALSE;
00093   italic = FALSE;
00094 
00095   row = the_row;
00096 
00097   for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
00098     word_res = new WERD_RES (word_it.data ());
00099 
00100     if (word_res->word->flag (W_FUZZY_NON)) {
00101       ASSERT_HOST (combo != NULL);
00102       word_res->part_of_combo = TRUE;
00103       combo->copy_on (word_res);
00104     }
00105     if (word_it.data_relative (1)->flag (W_FUZZY_NON)) {
00106       if (combo == NULL) {
00107         copy_word = new WERD;
00108                                  //deep copy
00109         *copy_word = *(word_it.data ());
00110         combo = new WERD_RES (copy_word);
00111         combo->combination = TRUE;
00112         word_res_it.add_to_end (combo);
00113       }
00114       word_res->part_of_combo = TRUE;
00115     }
00116     else
00117       combo = NULL;
00118     word_res_it.add_to_end (word_res);
00119   }
00120 }
00121 
00125 WERD_RES & WERD_RES::operator= ( //assign word_res
00126 const WERD_RES & source          //from this
00127 ) {
00128   this->ELIST_LINK::operator= (source);
00129   if (source.combination) {
00130     word = new WERD;
00131     *word = *(source.word);      //deep copy
00132   }
00133   else
00134     word = source.word;          //pt to same word
00135 
00136   if (source.outword != NULL) {
00137     outword = new WERD;
00138     *outword = *(source.outword);//deep copy
00139   }
00140   else
00141     outword = NULL;
00142 
00143   denorm = source.denorm;
00144   if (source.best_choice != NULL) {
00145     best_choice = new WERD_CHOICE;
00146     *best_choice = *(source.best_choice);
00147     raw_choice = new WERD_CHOICE;
00148     *raw_choice = *(source.raw_choice);
00149   }
00150   else {
00151     best_choice = NULL;
00152     raw_choice = NULL;
00153   }
00154   if (source.ep_choice != NULL) {
00155     ep_choice = new WERD_CHOICE;
00156     *ep_choice = *(source.ep_choice);
00157   }
00158   else
00159     ep_choice = NULL;
00160   reject_map = source.reject_map;
00161   tess_failed = source.tess_failed;
00162   tess_accepted = source.tess_accepted;
00163   tess_would_adapt = source.tess_would_adapt;
00164   done = source.done;
00165   unlv_crunch_mode = source.unlv_crunch_mode;
00166   italic = source.italic;
00167   bold = source.bold;
00168   font1 = source.font1;
00169   font1_count = source.font1_count;
00170   font2 = source.font2;
00171   font2_count = source.font2_count;
00172   x_height = source.x_height;
00173   caps_height = source.caps_height;
00174   guessed_x_ht = source.guessed_x_ht;
00175   guessed_caps_ht = source.guessed_caps_ht;
00176   combination = source.combination;
00177   part_of_combo = source.part_of_combo;
00178   reject_spaces = source.reject_spaces;
00179   return *this;
00180 }
00181 
00182 
00186 WERD_RES::~WERD_RES () {
00187   if (combination)
00188     delete word;
00189   if (outword != NULL)
00190     delete outword;
00191   if (best_choice != NULL) {
00192     delete best_choice;
00193     delete raw_choice;
00194   }
00195   if (ep_choice != NULL) {
00196     delete ep_choice;
00197   }
00198 }
00199 
00200 
00204 WERD_RES *PAGE_RES_IT::restart_page() { 
00205   block_res_it.set_to_list (&page_res->block_res_list);
00206   block_res_it.mark_cycle_pt ();
00207   block_res = NULL;
00208   row_res = NULL;
00209   word_res = NULL;
00210   next_block_res = NULL;
00211   next_row_res = NULL;
00212   next_word_res = NULL;
00213   internal_forward(TRUE); 
00214   return internal_forward (FALSE);
00215 }
00216 
00217 
00225 WERD_RES *PAGE_RES_IT::internal_forward(BOOL8 new_block) { 
00226   BOOL8 found_next_word = FALSE;
00227   BOOL8 new_row = FALSE;
00228 
00229   prev_block_res = block_res;
00230   prev_row_res = row_res;
00231   prev_word_res = word_res;
00232   block_res = next_block_res;
00233   row_res = next_row_res;
00234   word_res = next_word_res;
00235 
00236   while (!found_next_word && !block_res_it.cycled_list ()) {
00237     if (new_block) {
00238       new_block = FALSE;
00239       row_res_it.set_to_list (&block_res_it.data ()->row_res_list);
00240       row_res_it.mark_cycle_pt ();
00241       new_row = TRUE;
00242     }
00243     while (!found_next_word && !row_res_it.cycled_list ()) {
00244       if (new_row) {
00245         new_row = FALSE;
00246         word_res_it.set_to_list (&row_res_it.data ()->word_res_list);
00247         word_res_it.mark_cycle_pt ();
00248       }
00249       while (!found_next_word && !word_res_it.cycled_list ()) {
00250         next_block_res = block_res_it.data ();
00251         next_row_res = row_res_it.data ();
00252         next_word_res = word_res_it.data ();
00253         found_next_word = TRUE;
00254         do {
00255           word_res_it.forward ();
00256         }
00257         while (word_res_it.data ()->part_of_combo);
00258       }
00259       if (!found_next_word) {    //end of row reached
00260         row_res_it.forward ();
00261         new_row = TRUE;
00262       }
00263     }
00264     if (!found_next_word) {      //end of block reached
00265       block_res_it.forward ();
00266       new_block = TRUE;
00267     }
00268   }
00269   if (!found_next_word) {        //end of page reached
00270     next_block_res = NULL;
00271     next_row_res = NULL;
00272     next_word_res = NULL;
00273   }
00274   return word_res;
00275 }
00276 
00277 
00284 WERD_RES *PAGE_RES_IT::forward_block() { 
00285   if (block_res == next_block_res) {
00286     block_res_it.forward ();;
00287     block_res = NULL;
00288     row_res = NULL;
00289     word_res = NULL;
00290     next_block_res = NULL;
00291     next_row_res = NULL;
00292     next_word_res = NULL;
00293     internal_forward(TRUE); 
00294   }
00295   return internal_forward (FALSE);
00296 }
00297 
00298 
00302 void PAGE_RES_IT::rej_stat_word() { 
00303   INT16 chars_in_word;
00304   INT16 rejects_in_word = 0;
00305 
00306   chars_in_word = word_res->reject_map.length ();
00307   page_res->char_count += chars_in_word;
00308   block_res->char_count += chars_in_word;
00309   row_res->char_count += chars_in_word;
00310 
00311   rejects_in_word = word_res->reject_map.reject_count ();
00312 
00313   page_res->rej_count += rejects_in_word;
00314   block_res->rej_count += rejects_in_word;
00315   row_res->rej_count += rejects_in_word;
00316   if (chars_in_word == rejects_in_word)
00317     row_res->whole_word_rej_count += rejects_in_word;
00318 }

Generated on Wed Feb 28 19:49:08 2007 for Tesseract by  doxygen 1.5.1