wordrec/bestfirst.cpp

Go to the documentation of this file.
00001 
00021 /*----------------------------------------------------------------------
00022           I n c l u d e s
00023 ---------------------------------------------------------------------*/
00024 #include "bestfirst.h"
00025 #include "heuristic.h"
00026 #include "plotseg.h"
00027 #include "tordvars.h"
00028 #include "debug.h"
00029 #include "pieces.h"
00030 #include "stopper.h"
00031 #include "metrics.h"
00032 #include "states.h"
00033 #include "bitvec.h"
00034 #include "freelist.h"
00035 #include "permute.h"
00036 #include "structures.h"
00037 #include "wordclass.h"
00038 
00039 void call_caller(); 
00040 
00041 /*----------------------------------------------------------------------
00042           V a r i a b l e s
00043 ---------------------------------------------------------------------*/
00045 int num_joints;
00046 int num_pushed = 0;
00047 int num_popped = 0;
00048 
00051 make_int_var (num_seg_states, 30, make_seg_states,
00052 9, 1, set_seg_states, "Segmentation states");
00053 make_float_var (worst_state, 1, make_worst_state,
00054 9, 9, set_worst_state, "Worst segmentation state");
00057 /*----------------------------------------------------------------------
00058           F u n c t i o n s
00059 ----------------------------------------------------------------------*/
00060 /* ==================== */
00065 void init_bestfirst_vars() { 
00066   make_seg_states(); 
00067   make_worst_state(); 
00068 }
00069 
00070 
00071 /* ==================== */
00076 void best_first_search(CHUNKS_RECORD *chunks_record,
00077                        A_CHOICE *best_choice,
00078                        A_CHOICE *raw_choice,
00079                        STATE *state,
00080                        DANGERR *fixpt,
00081                        STATE *best_state,
00082                        INT32 pass) {
00083   SEARCH_RECORD *the_search;
00084   INT16 keep_going;
00085   STATE guided_state;
00086 
00087   num_joints = matrix_dimension (chunks_record->ratings) - 1;
00088   the_search = new_search (chunks_record, num_joints,
00089     best_choice, raw_choice, state);
00090 
00091 #ifndef GRAPHICS_DISABLED
00092   save_best_state(chunks_record); 
00093 #endif
00094   start_recording(); 
00095 
00096   guided_state = *state;
00097   do {
00098                                  /* Look for answer */
00099     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
00100 
00101       if (blob_skip) {
00102         free_state (the_search->this_state);
00103         break;
00104       }
00105 
00106       guided_state = *(the_search->this_state);
00107       keep_going =
00108         evaluate_state(chunks_record, the_search, fixpt, best_state, pass); 
00109 
00110       hash_add (the_search->closed_states, the_search->this_state);
00111 
00112       if (!keep_going ||
00113       (the_search->num_states > num_seg_states) || (blob_skip)) {
00114         free_state (the_search->this_state);
00115         break;
00116       }
00117 
00118       expand_node(chunks_record, the_search); 
00119     }
00120 
00121     free_state (the_search->this_state);
00122     num_popped++;
00123     the_search->this_state = pop_queue (the_search->open_states);
00124   }
00125   while (the_search->this_state);
00126 
00127   state->part1 = the_search->best_state->part1;
00128   state->part2 = the_search->best_state->part2;
00129   stop_recording(); 
00130   delete_search(the_search); 
00131 }
00132 
00133 
00134 /* ==================== */
00139 int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk) { 
00140   int result = 0;
00141   int x;
00142 
00143   for (x = start_chunk * 2; x <= last_chunk * 2; x++)
00144     result += width_record->widths[x];
00145 
00146   return (result);
00147 }
00148 
00149 
00150 /* ==================== */
00154 void delete_search(SEARCH_RECORD *the_search) { 
00155   float closeness;
00156 
00157   closeness = (the_search->num_joints ?
00158     (hamming_distance ((unsigned long *) the_search->first_state,
00159     (unsigned long *) the_search->best_state,
00160     2) / (float) the_search->num_joints) : 0.0);
00161 
00162   record_search_status (the_search->num_states,
00163     the_search->before_best, closeness);
00164 
00165   free_state (the_search->first_state);
00166   free_state (the_search->best_state);
00167 
00168   free_hash_table (the_search->closed_states);
00169   FreeHeapData (the_search->open_states, (void_dest) free_state);
00170 
00171   memfree(the_search); 
00172 }
00173 
00174 
00175 /* ==================== */
00180 CHOICES_LIST evaluate_chunks(CHUNKS_RECORD *chunks_record,
00181                              SEARCH_STATE search_state,
00182                              STATE *this_state,
00183                              STATE *best_state,
00184                              INT32 pass) {
00185   CHOICES_LIST char_choices;
00186   CHOICES this_choice;
00187   int i;
00188   int x = 0;
00189   int y;
00190 
00191   char_choices = new_choice_list ();
00192   /* Iterate sub-paths */
00193   for (i = 1; i <= search_state[0] + 1; i++) {
00194     if (i > search_state[0])
00195       y = count_blobs (chunks_record->chunks) - 1;
00196     else
00197       y = x + search_state[i];
00198 
00199     if (blob_skip) {
00200       array_free(char_choices); 
00201       return (NULL);
00202     }                            /* Process one square */
00203     /* Classify if needed */
00204     this_choice = get_piece_rating (chunks_record->ratings,
00205       chunks_record->chunks,
00206       chunks_record->splits,
00207       x, y,
00208       chunks_record->fx,
00209       this_state, best_state, pass, i - 1);
00210 
00211     if (this_choice == NIL) {
00212       array_free(char_choices); 
00213       return (NULL);
00214     }
00215     /* Add permuted ratings */
00216     last_segmentation[i - 1].certainty = best_certainty (this_choice);
00217     last_segmentation[i - 1].match = best_probability (this_choice);
00218 
00219     last_segmentation[i - 1].width =
00220       chunks_width (chunks_record->chunk_widths, x, y);
00221     last_segmentation[i - 1].gap =
00222       chunks_gap (chunks_record->chunk_widths, y);
00223 
00224     char_choices = array_push (char_choices, this_choice);
00225     x = y + 1;
00226   }
00227   return (char_choices);
00228 }
00229 
00230 
00231 /* ==================== */
00236 INT16 evaluate_state(CHUNKS_RECORD *chunks_record,
00237                      SEARCH_RECORD *the_search,
00238                      DANGERR *fixpt,
00239                      STATE *best_state,
00240                      INT32 pass) {
00241   CHOICES_LIST char_choices;
00242   SEARCH_STATE chunk_groups;
00243   float rating_limit = class_probability (the_search->best_choice);
00244   INT16 keep_going = TRUE;
00245   PIECES_STATE widths;
00246 
00247   the_search->num_states++;
00248   chunk_groups = bin_to_chunks (the_search->this_state,
00249     the_search->num_joints);
00250   bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
00251   LogNewSegmentation(widths); 
00252 
00253   rating_limit = class_probability (the_search->best_choice);
00254 
00255   char_choices =
00256     evaluate_chunks (chunks_record, chunk_groups, the_search->this_state,
00257     best_state, pass);
00258   if (char_choices != NULL) {
00259     permute_characters (char_choices,
00260       rating_limit,
00261       the_search->best_choice, the_search->raw_choice);
00262     if (AcceptableChoice (char_choices, the_search->best_choice,
00263       the_search->raw_choice, fixpt))
00264       keep_going = FALSE;
00265     array_free(char_choices); 
00266   }
00267 
00268 #ifndef GRAPHICS_DISABLED
00269   if (display_segmentations) {
00270     display_segmentation (chunks_record->chunks, chunk_groups);
00271     if (display_segmentations > 1)
00272       window_wait(segm_window); 
00273   }
00274 #endif
00275 
00276   if (rating_limit != class_probability (the_search->best_choice)) {
00277     the_search->before_best = the_search->num_states;
00278     the_search->best_state->part1 = the_search->this_state->part1;
00279     the_search->best_state->part2 = the_search->this_state->part2;
00280     replace_char_widths(chunks_record, chunk_groups); 
00281   }
00282   else if (char_choices != NULL)
00283     fixpt->index = -1;
00284 
00285   memfree(chunk_groups); 
00286 
00287   return (keep_going);
00288 }
00289 
00290 
00291 /* ==================== */
00296 CHOICES_LIST rebuild_current_state(TBLOB *blobs,
00297                                    SEAMS seam_list,
00298                                    STATE *state,
00299                                    CHOICES_LIST old_choices,
00300                                    int fx) {
00301   CHOICES_LIST char_choices;
00302   SEARCH_STATE search_state;
00303   int i;
00304   int num_joints = array_count (seam_list);
00305   int x = 0;
00306   int blobindex;                 /*current blob */
00307   TBLOB *p_blob;
00308   TBLOB *blob;
00309   TBLOB *next_blob;
00310   int y;
00311 
00312   search_state = bin_to_chunks (state, num_joints);
00313 
00314   char_choices = new_choice_list ();
00315   /* Iterate sub-paths */
00316   for (i = 1; i <= search_state[0]; i++) {
00317     y = x + search_state[i];
00318     x = y + 1;
00319     char_choices = array_push (char_choices, NULL);
00320   }
00321   char_choices = array_push (char_choices, NULL);
00322 
00323   y = count_blobs (blobs) - 1;
00324   for (i = search_state[0]; i >= 0; i--) {
00325     if (x == y) {                /*single fragment */
00326       array_value (char_choices, i) = array_value (old_choices, x);
00327                                  /*grab the list */
00328       array_value (old_choices, x) = NULL;
00329     }
00330     else {
00331       join_pieces(blobs, seam_list, x, y); 
00332       for (blob = blobs, blobindex = 0, p_blob = NULL; blobindex < x;
00333       blobindex++) {
00334         p_blob = blob;
00335         blob = blob->next;
00336       }
00337       while (blobindex < y) {
00338         next_blob = blob->next;
00339         blob->next = next_blob->next;
00340         oldblob(next_blob);  /*junk dead blobs */
00341         blobindex++;
00342       }
00343       array_value (char_choices, i) =
00344         (char *) classify_blob (p_blob, blob, blob->next, NULL, fx,
00345         "rebuild", Orange, NULL, NULL, 0, 0);
00346     }
00347 
00348     y = x - 1;
00349     x = y - search_state[i];
00350   }
00351 
00352   memfree(search_state); 
00353   free_all_choices(old_choices, x); 
00354   return (char_choices);
00355 
00356 }
00357 
00358 
00359 /* ==================== */
00366 void expand_node(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) { 
00367   STATE old_state;
00368   int x;
00369   int mask = 1 << (the_search->num_joints - 1 - 32);
00370 
00371   old_state.part1 = the_search->this_state->part1;
00372   old_state.part2 = the_search->this_state->part2;
00373 
00374   for (x = the_search->num_joints; x > 32; x--) {
00375     the_search->this_state->part1 = mask ^ old_state.part1;
00376     if (!hash_lookup (the_search->closed_states, the_search->this_state))
00377       push_queue (the_search->open_states,
00378         the_search->this_state,
00379         prioritize_state (chunks_record, the_search, &old_state));
00380     mask >>= 1;
00381   }
00382 
00383   if (the_search->num_joints > 32) {
00384     mask = 1 << 31;
00385   }
00386   else {
00387     mask = 1 << (the_search->num_joints - 1);
00388   }
00389 
00390   while (x--) {
00391     the_search->this_state->part2 = mask ^ old_state.part2;
00392     if (!hash_lookup (the_search->closed_states, the_search->this_state))
00393       push_queue (the_search->open_states,
00394         the_search->this_state,
00395         prioritize_state (chunks_record, the_search, &old_state));
00396     mask >>= 1;
00397   }
00398 }
00399 
00400 
00401 /* ==================== */
00405 SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
00406                           int num_joints,
00407                           A_CHOICE *best_choice,
00408                           A_CHOICE *raw_choice,
00409                           STATE *state) {
00410   SEARCH_RECORD *this_search;
00411 
00412   this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));
00413 
00414   this_search->open_states = MakeHeap (num_seg_states * 20);
00415   this_search->closed_states = new_hash_table ();
00416 
00417   if (state)
00418     this_search->this_state = new_state (state);
00419   else
00420     cprintf ("error: bad initial state in new_search\n");
00421 
00422   this_search->first_state = new_state (this_search->this_state);
00423   this_search->best_state = new_state (this_search->this_state);
00424 
00425   this_search->best_choice = best_choice;
00426   this_search->raw_choice = raw_choice;
00427 
00428   this_search->num_joints = num_joints;
00429   this_search->num_states = 0;
00430   this_search->before_best = 0;
00431 
00432   return (this_search);
00433 }
00434 
00435 
00436 /* ==================== */
00442 STATE *pop_queue(HEAP *queue) { 
00443   HEAPENTRY entry;
00444 
00445   if (GetTopOfHeap (queue, &entry) == OK) {
00446 #ifndef GRAPHICS_DISABLED
00447     if (display_segmentations) {
00448       cprintf ("eval state: %8.3f ", entry.Key);
00449       print_state ("", (STATE *) entry.Data, num_joints);
00450     }
00451 #endif
00452     return ((STATE *) entry.Data);
00453   }
00454   else {
00455     return (NULL);
00456   }
00457 }
00458 
00459 
00460 /* ==================== */
00464 void push_queue(HEAP *queue, STATE *state, FLOAT32 priority) { 
00465   HEAPENTRY entry;
00466 
00467   if (SizeOfHeap (queue) < MaxSizeOfHeap (queue) && priority < worst_state) {
00468     entry.Data = (char *) new_state (state);
00469     num_pushed++;
00470     entry.Key = priority;
00471     HeapStore(queue, &entry); 
00472   }
00473 }
00474 
00475 
00476 /* ==================== */
00481 void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) { 
00482   WIDTH_RECORD *width_record;
00483   int num_blobs;
00484   int i;
00485 
00486   free_widths (chunks_record->char_widths);
00487 
00488   num_blobs = state[0] + 1;
00489   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
00490   width_record->num_chars = num_blobs;
00491 
00492   for (i = 0; i < num_blobs; i++) {
00493 
00494     width_record->widths[2 * i] = last_segmentation[i].width;
00495 
00496     if (i + 1 < num_blobs)
00497       width_record->widths[2 * i + 1] = last_segmentation[i].gap;
00498   }
00499   chunks_record->char_widths = width_record;
00500 }

Generated on Wed Feb 28 19:49:13 2007 for Tesseract by  doxygen 1.5.1