wordrec/bestfirst.h File Reference

#include "oldheap.h"
#include "closed.h"
#include "choicearr.h"
#include "associate.h"
#include "choices.h"
#include "states.h"
#include "stopper.h"
#include "blobs.h"
#include "tessclas.h"
#include "seam.h"

Go to the source code of this file.

Classes

Defines

Functions

Variables


Define Documentation

#define chunks_gap ( chunk_widths,
last_chunk   ) 

Value:

((last_chunk < (chunk_widths)->num_chars - 1) ?  \
   ((chunk_widths)->widths[last_chunk * 2 + 1]) :  \
   (0))
Return the width of several of the chunks, if they were joined together.

Definition at line 77 of file bestfirst.h.

Referenced by evaluate_chunks(), and state_char_widths().


Function Documentation

void best_first_search ( CHUNKS_RECORD chunks_record,
A_CHOICE best_choice,
A_CHOICE raw_choice,
STATE state,
DANGERR fixpt,
STATE best_state,
INT32  pass 
)

Find the best segmentation by doing a best first search of the solution space.

Definition at line 76 of file bestfirst.cpp.

References SEARCH_RECORD::best_state, blob_skip, SEARCH_RECORD::closed_states, delete_search(), evaluate_state(), expand_node(), free_state(), hash_add(), hash_lookup(), matrix_dimension, new_search(), num_joints, num_popped, num_seg_states, SEARCH_RECORD::num_states, SEARCH_RECORD::open_states, STATE::part1, STATE::part2, pop_queue(), CHUNKS_RECORD::ratings, save_best_state(), start_recording(), stop_recording(), and SEARCH_RECORD::this_state.

Referenced by word_associator().

00082                                    {
00083   SEARCH_RECORD *the_search;
00084   INT16 keep_going;
00085   STATE guided_state;
00086 
00087   num_joints = matrix_dimension (chunks_record->ratings) - 1;
00088   the_search = new_search (chunks_record, num_joints,
00089     best_choice, raw_choice, state);
00090 
00091 #ifndef GRAPHICS_DISABLED
00092   save_best_state(chunks_record); 
00093 #endif
00094   start_recording(); 
00095 
00096   guided_state = *state;
00097   do {
00098                                  /* Look for answer */
00099     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
00100 
00101       if (blob_skip) {
00102         free_state (the_search->this_state);
00103         break;
00104       }
00105 
00106       guided_state = *(the_search->this_state);
00107       keep_going =
00108         evaluate_state(chunks_record, the_search, fixpt, best_state, pass); 
00109 
00110       hash_add (the_search->closed_states, the_search->this_state);
00111 
00112       if (!keep_going ||
00113       (the_search->num_states > num_seg_states) || (blob_skip)) {
00114         free_state (the_search->this_state);
00115         break;
00116       }
00117 
00118       expand_node(chunks_record, the_search); 
00119     }
00120 
00121     free_state (the_search->this_state);
00122     num_popped++;
00123     the_search->this_state = pop_queue (the_search->open_states);
00124   }
00125   while (the_search->this_state);
00126 
00127   state->part1 = the_search->best_state->part1;
00128   state->part2 = the_search->best_state->part2;
00129   stop_recording(); 
00130   delete_search(the_search); 
00131 }

int chunks_width ( WIDTH_RECORD width_record,
int  start_chunk,
int  last_chunk 
)

Return the width of several of the chunks (if they were joined together.

Definition at line 139 of file bestfirst.cpp.

References WIDTH_RECORD::widths.

Referenced by evaluate_chunks(), and state_char_widths().

00139                                                                               { 
00140   int result = 0;
00141   int x;
00142 
00143   for (x = start_chunk * 2; x <= last_chunk * 2; x++)
00144     result += width_record->widths[x];
00145 
00146   return (result);
00147 }

void delete_search ( SEARCH_RECORD the_search  ) 

Terminate the current search and free all the memory involved.

Definition at line 154 of file bestfirst.cpp.

References SEARCH_RECORD::before_best, SEARCH_RECORD::best_state, SEARCH_RECORD::closed_states, SEARCH_RECORD::first_state, free_hash_table, free_state(), FreeHeapData(), hamming_distance(), memfree(), SEARCH_RECORD::num_joints, SEARCH_RECORD::num_states, SEARCH_RECORD::open_states, and record_search_status().

Referenced by best_first_search().

00154                                               { 
00155   float closeness;
00156 
00157   closeness = (the_search->num_joints ?
00158     (hamming_distance ((unsigned long *) the_search->first_state,
00159     (unsigned long *) the_search->best_state,
00160     2) / (float) the_search->num_joints) : 0.0);
00161 
00162   record_search_status (the_search->num_states,
00163     the_search->before_best, closeness);
00164 
00165   free_state (the_search->first_state);
00166   free_state (the_search->best_state);
00167 
00168   free_hash_table (the_search->closed_states);
00169   FreeHeapData (the_search->open_states, (void_dest) free_state);
00170 
00171   memfree(the_search); 
00172 }

CHOICES_LIST evaluate_chunks ( CHUNKS_RECORD chunks_record,
SEARCH_STATE  search_state,
STATE this_state,
STATE best_state,
INT32  pass 
)

Since a particular word level segmentation has been chosen, evaluate this to find the word list that corresponds to it.

Definition at line 180 of file bestfirst.cpp.

References array_free, array_push(), best_certainty, best_probability, blob_skip, CHUNKS_RECORD::chunk_widths, CHUNKS_RECORD::chunks, chunks_gap, chunks_width(), count_blobs(), CHUNKS_RECORD::fx, get_piece_rating(), last_segmentation, new_choice_list, NIL, NULL, CHUNKS_RECORD::ratings, and CHUNKS_RECORD::splits.

Referenced by evaluate_state().

00184                                          {
00185   CHOICES_LIST char_choices;
00186   CHOICES this_choice;
00187   int i;
00188   int x = 0;
00189   int y;
00190 
00191   char_choices = new_choice_list ();
00192   /* Iterate sub-paths */
00193   for (i = 1; i <= search_state[0] + 1; i++) {
00194     if (i > search_state[0])
00195       y = count_blobs (chunks_record->chunks) - 1;
00196     else
00197       y = x + search_state[i];
00198 
00199     if (blob_skip) {
00200       array_free(char_choices); 
00201       return (NULL);
00202     }                            /* Process one square */
00203     /* Classify if needed */
00204     this_choice = get_piece_rating (chunks_record->ratings,
00205       chunks_record->chunks,
00206       chunks_record->splits,
00207       x, y,
00208       chunks_record->fx,
00209       this_state, best_state, pass, i - 1);
00210 
00211     if (this_choice == NIL) {
00212       array_free(char_choices); 
00213       return (NULL);
00214     }
00215     /* Add permuted ratings */
00216     last_segmentation[i - 1].certainty = best_certainty (this_choice);
00217     last_segmentation[i - 1].match = best_probability (this_choice);
00218 
00219     last_segmentation[i - 1].width =
00220       chunks_width (chunks_record->chunk_widths, x, y);
00221     last_segmentation[i - 1].gap =
00222       chunks_gap (chunks_record->chunk_widths, y);
00223 
00224     char_choices = array_push (char_choices, this_choice);
00225     x = y + 1;
00226   }
00227   return (char_choices);
00228 }

INT16 evaluate_state ( CHUNKS_RECORD chunks_record,
SEARCH_RECORD the_search,
DANGERR fixpt,
STATE best_state,
INT32  pass 
)

Evaluate the segmentation that is represented by this state in the best first search & add this state to the "states_seen" list.

Definition at line 236 of file bestfirst.cpp.

References AcceptableChoice(), array_free, SEARCH_RECORD::before_best, SEARCH_RECORD::best_choice, SEARCH_RECORD::best_state, bin_to_chunks(), bin_to_pieces(), CHUNKS_RECORD::chunks, class_probability, display_segmentation(), display_segmentations, evaluate_chunks(), FALSE, DANGERR::index, LogNewSegmentation(), memfree(), NULL, SEARCH_RECORD::num_joints, SEARCH_RECORD::num_states, STATE::part1, STATE::part2, permute_characters(), SEARCH_RECORD::raw_choice, replace_char_widths(), segm_window, SEARCH_RECORD::this_state, TRUE, and window_wait().

Referenced by best_first_search().

00240                                  {
00241   CHOICES_LIST char_choices;
00242   SEARCH_STATE chunk_groups;
00243   float rating_limit = class_probability (the_search->best_choice);
00244   INT16 keep_going = TRUE;
00245   PIECES_STATE widths;
00246 
00247   the_search->num_states++;
00248   chunk_groups = bin_to_chunks (the_search->this_state,
00249     the_search->num_joints);
00250   bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
00251   LogNewSegmentation(widths); 
00252 
00253   rating_limit = class_probability (the_search->best_choice);
00254 
00255   char_choices =
00256     evaluate_chunks (chunks_record, chunk_groups, the_search->this_state,
00257     best_state, pass);
00258   if (char_choices != NULL) {
00259     permute_characters (char_choices,
00260       rating_limit,
00261       the_search->best_choice, the_search->raw_choice);
00262     if (AcceptableChoice (char_choices, the_search->best_choice,
00263       the_search->raw_choice, fixpt))
00264       keep_going = FALSE;
00265     array_free(char_choices); 
00266   }
00267 
00268 #ifndef GRAPHICS_DISABLED
00269   if (display_segmentations) {
00270     display_segmentation (chunks_record->chunks, chunk_groups);
00271     if (display_segmentations > 1)
00272       window_wait(segm_window); 
00273   }
00274 #endif
00275 
00276   if (rating_limit != class_probability (the_search->best_choice)) {
00277     the_search->before_best = the_search->num_states;
00278     the_search->best_state->part1 = the_search->this_state->part1;
00279     the_search->best_state->part2 = the_search->this_state->part2;
00280     replace_char_widths(chunks_record, chunk_groups); 
00281   }
00282   else if (char_choices != NULL)
00283     fixpt->index = -1;
00284 
00285   memfree(chunk_groups); 
00286 
00287   return (keep_going);
00288 }

void expand_node ( CHUNKS_RECORD chunks_record,
SEARCH_RECORD the_search 
)

Create the states that are attached to this one.

Check to see that each one has not already been visited. If not add it to the priority queue.

Definition at line 366 of file bestfirst.cpp.

References SEARCH_RECORD::closed_states, hash_lookup(), SEARCH_RECORD::num_joints, SEARCH_RECORD::open_states, STATE::part1, STATE::part2, prioritize_state(), push_queue(), and SEARCH_RECORD::this_state.

Referenced by best_first_search().

00366                                                                           { 
00367   STATE old_state;
00368   int x;
00369   int mask = 1 << (the_search->num_joints - 1 - 32);
00370 
00371   old_state.part1 = the_search->this_state->part1;
00372   old_state.part2 = the_search->this_state->part2;
00373 
00374   for (x = the_search->num_joints; x > 32; x--) {
00375     the_search->this_state->part1 = mask ^ old_state.part1;
00376     if (!hash_lookup (the_search->closed_states, the_search->this_state))
00377       push_queue (the_search->open_states,
00378         the_search->this_state,
00379         prioritize_state (chunks_record, the_search, &old_state));
00380     mask >>= 1;
00381   }
00382 
00383   if (the_search->num_joints > 32) {
00384     mask = 1 << 31;
00385   }
00386   else {
00387     mask = 1 << (the_search->num_joints - 1);
00388   }
00389 
00390   while (x--) {
00391     the_search->this_state->part2 = mask ^ old_state.part2;
00392     if (!hash_lookup (the_search->closed_states, the_search->this_state))
00393       push_queue (the_search->open_states,
00394         the_search->this_state,
00395         prioritize_state (chunks_record, the_search, &old_state));
00396     mask >>= 1;
00397   }
00398 }

void init_bestfirst_vars (  ) 

Create and initialize references to debug variables that control operations in this file.

Definition at line 65 of file bestfirst.cpp.

Referenced by init_ms_debug().

00065                            { 
00066   make_seg_states(); 
00067   make_worst_state(); 
00068 }

SEARCH_RECORD* new_search ( CHUNKS_RECORD chunks_record,
int  num_joints,
A_CHOICE best_choice,
A_CHOICE raw_choice,
STATE state 
)

Create and initialize a new search record.

Definition at line 405 of file bestfirst.cpp.

References SEARCH_RECORD::before_best, SEARCH_RECORD::best_choice, SEARCH_RECORD::best_state, SEARCH_RECORD::closed_states, cprintf(), SEARCH_RECORD::first_state, MakeHeap(), memalloc(), new_hash_table(), new_state(), SEARCH_RECORD::num_joints, num_seg_states, SEARCH_RECORD::num_states, SEARCH_RECORD::open_states, SEARCH_RECORD::raw_choice, and SEARCH_RECORD::this_state.

Referenced by best_first_search().

00409                                         {
00410   SEARCH_RECORD *this_search;
00411 
00412   this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));
00413 
00414   this_search->open_states = MakeHeap (num_seg_states * 20);
00415   this_search->closed_states = new_hash_table ();
00416 
00417   if (state)
00418     this_search->this_state = new_state (state);
00419   else
00420     cprintf ("error: bad initial state in new_search\n");
00421 
00422   this_search->first_state = new_state (this_search->this_state);
00423   this_search->best_state = new_state (this_search->this_state);
00424 
00425   this_search->best_choice = best_choice;
00426   this_search->raw_choice = raw_choice;
00427 
00428   this_search->num_joints = num_joints;
00429   this_search->num_states = 0;
00430   this_search->before_best = 0;
00431 
00432   return (this_search);
00433 }

STATE* pop_queue ( HEAP queue  ) 

Get this state from the priority queue.

It should be the state that has the greatest urgency to be evaluated.

Definition at line 442 of file bestfirst.cpp.

References cprintf(), HEAPENTRY::Data, display_segmentations, GetTopOfHeap(), HEAPENTRY::Key, NULL, num_joints, OK, and print_state().

Referenced by best_first_search().

00442                               { 
00443   HEAPENTRY entry;
00444 
00445   if (GetTopOfHeap (queue, &entry) == OK) {
00446 #ifndef GRAPHICS_DISABLED
00447     if (display_segmentations) {
00448       cprintf ("eval state: %8.3f ", entry.Key);
00449       print_state ("", (STATE *) entry.Data, num_joints);
00450     }
00451 #endif
00452     return ((STATE *) entry.Data);
00453   }
00454   else {
00455     return (NULL);
00456   }
00457 }

void push_queue ( HEAP queue,
STATE state,
FLOAT32  priority 
)

Add this state into the priority queue.

Definition at line 464 of file bestfirst.cpp.

References HEAPENTRY::Data, HeapStore(), HEAPENTRY::Key, MaxSizeOfHeap, new_state(), num_pushed, and SizeOfHeap.

Referenced by expand_node().

00464                                                              { 
00465   HEAPENTRY entry;
00466 
00467   if (SizeOfHeap (queue) < MaxSizeOfHeap (queue) && priority < worst_state) {
00468     entry.Data = (char *) new_state (state);
00469     num_pushed++;
00470     entry.Key = priority;
00471     HeapStore(queue, &entry); 
00472   }
00473 }

CHOICES_LIST rebuild_current_state ( TBLOB blobs,
SEAMS  seam_list,
STATE state,
CHOICES_LIST  old_choices,
int  fx 
)

Evaluate the segmentation that is represented by this state in the best first search & add this state to the "states_seen" list.

Definition at line 296 of file bestfirst.cpp.

References array_count, array_push(), array_value, bin_to_chunks(), classify_blob(), count_blobs(), free_all_choices, join_pieces(), memfree(), new_choice_list, blobstruct::next, NULL, num_joints, oldblob(), and Orange.

Referenced by chop_word_main().

00300                                            {
00301   CHOICES_LIST char_choices;
00302   SEARCH_STATE search_state;
00303   int i;
00304   int num_joints = array_count (seam_list);
00305   int x = 0;
00306   int blobindex;                 /*current blob */
00307   TBLOB *p_blob;
00308   TBLOB *blob;
00309   TBLOB *next_blob;
00310   int y;
00311 
00312   search_state = bin_to_chunks (state, num_joints);
00313 
00314   char_choices = new_choice_list ();
00315   /* Iterate sub-paths */
00316   for (i = 1; i <= search_state[0]; i++) {
00317     y = x + search_state[i];
00318     x = y + 1;
00319     char_choices = array_push (char_choices, NULL);
00320   }
00321   char_choices = array_push (char_choices, NULL);
00322 
00323   y = count_blobs (blobs) - 1;
00324   for (i = search_state[0]; i >= 0; i--) {
00325     if (x == y) {                /*single fragment */
00326       array_value (char_choices, i) = array_value (old_choices, x);
00327                                  /*grab the list */
00328       array_value (old_choices, x) = NULL;
00329     }
00330     else {
00331       join_pieces(blobs, seam_list, x, y); 
00332       for (blob = blobs, blobindex = 0, p_blob = NULL; blobindex < x;
00333       blobindex++) {
00334         p_blob = blob;
00335         blob = blob->next;
00336       }
00337       while (blobindex < y) {
00338         next_blob = blob->next;
00339         blob->next = next_blob->next;
00340         oldblob(next_blob);  /*junk dead blobs */
00341         blobindex++;
00342       }
00343       array_value (char_choices, i) =
00344         (char *) classify_blob (p_blob, blob, blob->next, NULL, fx,
00345         "rebuild", Orange, NULL, NULL, 0, 0);
00346     }
00347 
00348     y = x - 1;
00349     x = y - search_state[i];
00350   }
00351 
00352   memfree(search_state); 
00353   free_all_choices(old_choices, x); 
00354   return (char_choices);
00355 
00356 }

void replace_char_widths ( CHUNKS_RECORD chunks_record,
SEARCH_STATE  state 
)

Replace the value of the char_width field in the chunks_record with the updated width measurements from the last_segmentation.

Definition at line 481 of file bestfirst.cpp.

References CHUNKS_RECORD::char_widths, free_widths, last_segmentation, memalloc(), WIDTH_RECORD::num_chars, and WIDTH_RECORD::widths.

Referenced by evaluate_state().

00481                                                                            { 
00482   WIDTH_RECORD *width_record;
00483   int num_blobs;
00484   int i;
00485 
00486   free_widths (chunks_record->char_widths);
00487 
00488   num_blobs = state[0] + 1;
00489   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
00490   width_record->num_chars = num_blobs;
00491 
00492   for (i = 0; i < num_blobs; i++) {
00493 
00494     width_record->widths[2 * i] = last_segmentation[i].width;
00495 
00496     if (i + 1 < num_blobs)
00497       width_record->widths[2 * i + 1] = last_segmentation[i].gap;
00498   }
00499   chunks_record->char_widths = width_record;
00500 }


Variable Documentation

int num_popped

Definition at line 47 of file bestfirst.cpp.

Referenced by best_first_search(), and write_cooked_text().

int num_seg_states

Referenced by best_first_search(), init_metrics(), new_search(), program_editup2(), record_search_status(), set_pass1(), and set_pass2().


Generated on Wed Feb 28 19:49:28 2007 for Tesseract by  doxygen 1.5.1