#include "bestfirst.h"
#include "heuristic.h"
#include "plotseg.h"
#include "tordvars.h"
#include "debug.h"
#include "pieces.h"
#include "stopper.h"
#include "metrics.h"
#include "states.h"
#include "bitvec.h"
#include "freelist.h"
#include "permute.h"
#include "structures.h"
#include "wordclass.h"
Go to the source code of this file.
void best_first_search | ( | CHUNKS_RECORD * | chunks_record, | |
A_CHOICE * | best_choice, | |||
A_CHOICE * | raw_choice, | |||
STATE * | state, | |||
DANGERR * | fixpt, | |||
STATE * | best_state, | |||
INT32 | pass | |||
) |
Find the best segmentation by doing a best first search of the solution space.
Definition at line 76 of file bestfirst.cpp.
References SEARCH_RECORD::best_state, blob_skip, SEARCH_RECORD::closed_states, delete_search(), evaluate_state(), expand_node(), free_state(), hash_add(), hash_lookup(), matrix_dimension, new_search(), num_joints, num_popped, num_seg_states, SEARCH_RECORD::num_states, SEARCH_RECORD::open_states, STATE::part1, STATE::part2, pop_queue(), CHUNKS_RECORD::ratings, save_best_state(), start_recording(), stop_recording(), and SEARCH_RECORD::this_state.
Referenced by word_associator().
00082 { 00083 SEARCH_RECORD *the_search; 00084 INT16 keep_going; 00085 STATE guided_state; 00086 00087 num_joints = matrix_dimension (chunks_record->ratings) - 1; 00088 the_search = new_search (chunks_record, num_joints, 00089 best_choice, raw_choice, state); 00090 00091 #ifndef GRAPHICS_DISABLED 00092 save_best_state(chunks_record); 00093 #endif 00094 start_recording(); 00095 00096 guided_state = *state; 00097 do { 00098 /* Look for answer */ 00099 if (!hash_lookup (the_search->closed_states, the_search->this_state)) { 00100 00101 if (blob_skip) { 00102 free_state (the_search->this_state); 00103 break; 00104 } 00105 00106 guided_state = *(the_search->this_state); 00107 keep_going = 00108 evaluate_state(chunks_record, the_search, fixpt, best_state, pass); 00109 00110 hash_add (the_search->closed_states, the_search->this_state); 00111 00112 if (!keep_going || 00113 (the_search->num_states > num_seg_states) || (blob_skip)) { 00114 free_state (the_search->this_state); 00115 break; 00116 } 00117 00118 expand_node(chunks_record, the_search); 00119 } 00120 00121 free_state (the_search->this_state); 00122 num_popped++; 00123 the_search->this_state = pop_queue (the_search->open_states); 00124 } 00125 while (the_search->this_state); 00126 00127 state->part1 = the_search->best_state->part1; 00128 state->part2 = the_search->best_state->part2; 00129 stop_recording(); 00130 delete_search(the_search); 00131 }
void call_caller | ( | ) |
* (c) Copyright 1990, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
int chunks_width | ( | WIDTH_RECORD * | width_record, | |
int | start_chunk, | |||
int | last_chunk | |||
) |
Return the width of several of the chunks (if they were joined together.
Definition at line 139 of file bestfirst.cpp.
References WIDTH_RECORD::widths.
Referenced by evaluate_chunks(), and state_char_widths().
00139 { 00140 int result = 0; 00141 int x; 00142 00143 for (x = start_chunk * 2; x <= last_chunk * 2; x++) 00144 result += width_record->widths[x]; 00145 00146 return (result); 00147 }
void delete_search | ( | SEARCH_RECORD * | the_search | ) |
Terminate the current search and free all the memory involved.
Definition at line 154 of file bestfirst.cpp.
References SEARCH_RECORD::before_best, SEARCH_RECORD::best_state, SEARCH_RECORD::closed_states, SEARCH_RECORD::first_state, free_hash_table, free_state(), FreeHeapData(), hamming_distance(), memfree(), SEARCH_RECORD::num_joints, SEARCH_RECORD::num_states, SEARCH_RECORD::open_states, and record_search_status().
Referenced by best_first_search().
00154 { 00155 float closeness; 00156 00157 closeness = (the_search->num_joints ? 00158 (hamming_distance ((unsigned long *) the_search->first_state, 00159 (unsigned long *) the_search->best_state, 00160 2) / (float) the_search->num_joints) : 0.0); 00161 00162 record_search_status (the_search->num_states, 00163 the_search->before_best, closeness); 00164 00165 free_state (the_search->first_state); 00166 free_state (the_search->best_state); 00167 00168 free_hash_table (the_search->closed_states); 00169 FreeHeapData (the_search->open_states, (void_dest) free_state); 00170 00171 memfree(the_search); 00172 }
CHOICES_LIST evaluate_chunks | ( | CHUNKS_RECORD * | chunks_record, | |
SEARCH_STATE | search_state, | |||
STATE * | this_state, | |||
STATE * | best_state, | |||
INT32 | pass | |||
) |
Since a particular word level segmentation has been chosen, evaluate this to find the word list that corresponds to it.
Definition at line 180 of file bestfirst.cpp.
References array_free, array_push(), best_certainty, best_probability, blob_skip, CHUNKS_RECORD::chunk_widths, CHUNKS_RECORD::chunks, chunks_gap, chunks_width(), count_blobs(), CHUNKS_RECORD::fx, get_piece_rating(), last_segmentation, new_choice_list, NIL, NULL, CHUNKS_RECORD::ratings, and CHUNKS_RECORD::splits.
Referenced by evaluate_state().
00184 { 00185 CHOICES_LIST char_choices; 00186 CHOICES this_choice; 00187 int i; 00188 int x = 0; 00189 int y; 00190 00191 char_choices = new_choice_list (); 00192 /* Iterate sub-paths */ 00193 for (i = 1; i <= search_state[0] + 1; i++) { 00194 if (i > search_state[0]) 00195 y = count_blobs (chunks_record->chunks) - 1; 00196 else 00197 y = x + search_state[i]; 00198 00199 if (blob_skip) { 00200 array_free(char_choices); 00201 return (NULL); 00202 } /* Process one square */ 00203 /* Classify if needed */ 00204 this_choice = get_piece_rating (chunks_record->ratings, 00205 chunks_record->chunks, 00206 chunks_record->splits, 00207 x, y, 00208 chunks_record->fx, 00209 this_state, best_state, pass, i - 1); 00210 00211 if (this_choice == NIL) { 00212 array_free(char_choices); 00213 return (NULL); 00214 } 00215 /* Add permuted ratings */ 00216 last_segmentation[i - 1].certainty = best_certainty (this_choice); 00217 last_segmentation[i - 1].match = best_probability (this_choice); 00218 00219 last_segmentation[i - 1].width = 00220 chunks_width (chunks_record->chunk_widths, x, y); 00221 last_segmentation[i - 1].gap = 00222 chunks_gap (chunks_record->chunk_widths, y); 00223 00224 char_choices = array_push (char_choices, this_choice); 00225 x = y + 1; 00226 } 00227 return (char_choices); 00228 }
INT16 evaluate_state | ( | CHUNKS_RECORD * | chunks_record, | |
SEARCH_RECORD * | the_search, | |||
DANGERR * | fixpt, | |||
STATE * | best_state, | |||
INT32 | pass | |||
) |
Evaluate the segmentation that is represented by this state in the best first search & add this state to the "states_seen" list.
Definition at line 236 of file bestfirst.cpp.
References AcceptableChoice(), array_free, SEARCH_RECORD::before_best, SEARCH_RECORD::best_choice, SEARCH_RECORD::best_state, bin_to_chunks(), bin_to_pieces(), CHUNKS_RECORD::chunks, class_probability, display_segmentation(), display_segmentations, evaluate_chunks(), FALSE, DANGERR::index, LogNewSegmentation(), memfree(), NULL, SEARCH_RECORD::num_joints, SEARCH_RECORD::num_states, STATE::part1, STATE::part2, permute_characters(), SEARCH_RECORD::raw_choice, replace_char_widths(), segm_window, SEARCH_RECORD::this_state, TRUE, and window_wait().
Referenced by best_first_search().
00240 { 00241 CHOICES_LIST char_choices; 00242 SEARCH_STATE chunk_groups; 00243 float rating_limit = class_probability (the_search->best_choice); 00244 INT16 keep_going = TRUE; 00245 PIECES_STATE widths; 00246 00247 the_search->num_states++; 00248 chunk_groups = bin_to_chunks (the_search->this_state, 00249 the_search->num_joints); 00250 bin_to_pieces (the_search->this_state, the_search->num_joints, widths); 00251 LogNewSegmentation(widths); 00252 00253 rating_limit = class_probability (the_search->best_choice); 00254 00255 char_choices = 00256 evaluate_chunks (chunks_record, chunk_groups, the_search->this_state, 00257 best_state, pass); 00258 if (char_choices != NULL) { 00259 permute_characters (char_choices, 00260 rating_limit, 00261 the_search->best_choice, the_search->raw_choice); 00262 if (AcceptableChoice (char_choices, the_search->best_choice, 00263 the_search->raw_choice, fixpt)) 00264 keep_going = FALSE; 00265 array_free(char_choices); 00266 } 00267 00268 #ifndef GRAPHICS_DISABLED 00269 if (display_segmentations) { 00270 display_segmentation (chunks_record->chunks, chunk_groups); 00271 if (display_segmentations > 1) 00272 window_wait(segm_window); 00273 } 00274 #endif 00275 00276 if (rating_limit != class_probability (the_search->best_choice)) { 00277 the_search->before_best = the_search->num_states; 00278 the_search->best_state->part1 = the_search->this_state->part1; 00279 the_search->best_state->part2 = the_search->this_state->part2; 00280 replace_char_widths(chunks_record, chunk_groups); 00281 } 00282 else if (char_choices != NULL) 00283 fixpt->index = -1; 00284 00285 memfree(chunk_groups); 00286 00287 return (keep_going); 00288 }
void expand_node | ( | CHUNKS_RECORD * | chunks_record, | |
SEARCH_RECORD * | the_search | |||
) |
Create the states that are attached to this one.
Check to see that each one has not already been visited. If not add it to the priority queue.
Definition at line 366 of file bestfirst.cpp.
References SEARCH_RECORD::closed_states, hash_lookup(), SEARCH_RECORD::num_joints, SEARCH_RECORD::open_states, STATE::part1, STATE::part2, prioritize_state(), push_queue(), and SEARCH_RECORD::this_state.
Referenced by best_first_search().
00366 { 00367 STATE old_state; 00368 int x; 00369 int mask = 1 << (the_search->num_joints - 1 - 32); 00370 00371 old_state.part1 = the_search->this_state->part1; 00372 old_state.part2 = the_search->this_state->part2; 00373 00374 for (x = the_search->num_joints; x > 32; x--) { 00375 the_search->this_state->part1 = mask ^ old_state.part1; 00376 if (!hash_lookup (the_search->closed_states, the_search->this_state)) 00377 push_queue (the_search->open_states, 00378 the_search->this_state, 00379 prioritize_state (chunks_record, the_search, &old_state)); 00380 mask >>= 1; 00381 } 00382 00383 if (the_search->num_joints > 32) { 00384 mask = 1 << 31; 00385 } 00386 else { 00387 mask = 1 << (the_search->num_joints - 1); 00388 } 00389 00390 while (x--) { 00391 the_search->this_state->part2 = mask ^ old_state.part2; 00392 if (!hash_lookup (the_search->closed_states, the_search->this_state)) 00393 push_queue (the_search->open_states, 00394 the_search->this_state, 00395 prioritize_state (chunks_record, the_search, &old_state)); 00396 mask >>= 1; 00397 } 00398 }
void init_bestfirst_vars | ( | ) |
Create and initialize references to debug variables that control operations in this file.
Definition at line 65 of file bestfirst.cpp.
Referenced by init_ms_debug().
SEARCH_RECORD* new_search | ( | CHUNKS_RECORD * | chunks_record, | |
int | num_joints, | |||
A_CHOICE * | best_choice, | |||
A_CHOICE * | raw_choice, | |||
STATE * | state | |||
) |
Create and initialize a new search record.
Definition at line 405 of file bestfirst.cpp.
References SEARCH_RECORD::before_best, SEARCH_RECORD::best_choice, SEARCH_RECORD::best_state, SEARCH_RECORD::closed_states, cprintf(), SEARCH_RECORD::first_state, MakeHeap(), memalloc(), new_hash_table(), new_state(), SEARCH_RECORD::num_joints, num_seg_states, SEARCH_RECORD::num_states, SEARCH_RECORD::open_states, SEARCH_RECORD::raw_choice, and SEARCH_RECORD::this_state.
Referenced by best_first_search().
00409 { 00410 SEARCH_RECORD *this_search; 00411 00412 this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD)); 00413 00414 this_search->open_states = MakeHeap (num_seg_states * 20); 00415 this_search->closed_states = new_hash_table (); 00416 00417 if (state) 00418 this_search->this_state = new_state (state); 00419 else 00420 cprintf ("error: bad initial state in new_search\n"); 00421 00422 this_search->first_state = new_state (this_search->this_state); 00423 this_search->best_state = new_state (this_search->this_state); 00424 00425 this_search->best_choice = best_choice; 00426 this_search->raw_choice = raw_choice; 00427 00428 this_search->num_joints = num_joints; 00429 this_search->num_states = 0; 00430 this_search->before_best = 0; 00431 00432 return (this_search); 00433 }
Get this state from the priority queue.
It should be the state that has the greatest urgency to be evaluated.
Definition at line 442 of file bestfirst.cpp.
References cprintf(), HEAPENTRY::Data, display_segmentations, GetTopOfHeap(), HEAPENTRY::Key, NULL, num_joints, OK, and print_state().
Referenced by best_first_search().
00442 { 00443 HEAPENTRY entry; 00444 00445 if (GetTopOfHeap (queue, &entry) == OK) { 00446 #ifndef GRAPHICS_DISABLED 00447 if (display_segmentations) { 00448 cprintf ("eval state: %8.3f ", entry.Key); 00449 print_state ("", (STATE *) entry.Data, num_joints); 00450 } 00451 #endif 00452 return ((STATE *) entry.Data); 00453 } 00454 else { 00455 return (NULL); 00456 } 00457 }
Add this state into the priority queue.
Definition at line 464 of file bestfirst.cpp.
References HEAPENTRY::Data, HeapStore(), HEAPENTRY::Key, MaxSizeOfHeap, new_state(), num_pushed, and SizeOfHeap.
Referenced by expand_node().
00464 { 00465 HEAPENTRY entry; 00466 00467 if (SizeOfHeap (queue) < MaxSizeOfHeap (queue) && priority < worst_state) { 00468 entry.Data = (char *) new_state (state); 00469 num_pushed++; 00470 entry.Key = priority; 00471 HeapStore(queue, &entry); 00472 } 00473 }
CHOICES_LIST rebuild_current_state | ( | TBLOB * | blobs, | |
SEAMS | seam_list, | |||
STATE * | state, | |||
CHOICES_LIST | old_choices, | |||
int | fx | |||
) |
Evaluate the segmentation that is represented by this state in the best first search & add this state to the "states_seen" list.
Definition at line 296 of file bestfirst.cpp.
References array_count, array_push(), array_value, bin_to_chunks(), classify_blob(), count_blobs(), free_all_choices, join_pieces(), memfree(), new_choice_list, blobstruct::next, NULL, num_joints, oldblob(), and Orange.
Referenced by chop_word_main().
00300 { 00301 CHOICES_LIST char_choices; 00302 SEARCH_STATE search_state; 00303 int i; 00304 int num_joints = array_count (seam_list); 00305 int x = 0; 00306 int blobindex; /*current blob */ 00307 TBLOB *p_blob; 00308 TBLOB *blob; 00309 TBLOB *next_blob; 00310 int y; 00311 00312 search_state = bin_to_chunks (state, num_joints); 00313 00314 char_choices = new_choice_list (); 00315 /* Iterate sub-paths */ 00316 for (i = 1; i <= search_state[0]; i++) { 00317 y = x + search_state[i]; 00318 x = y + 1; 00319 char_choices = array_push (char_choices, NULL); 00320 } 00321 char_choices = array_push (char_choices, NULL); 00322 00323 y = count_blobs (blobs) - 1; 00324 for (i = search_state[0]; i >= 0; i--) { 00325 if (x == y) { /*single fragment */ 00326 array_value (char_choices, i) = array_value (old_choices, x); 00327 /*grab the list */ 00328 array_value (old_choices, x) = NULL; 00329 } 00330 else { 00331 join_pieces(blobs, seam_list, x, y); 00332 for (blob = blobs, blobindex = 0, p_blob = NULL; blobindex < x; 00333 blobindex++) { 00334 p_blob = blob; 00335 blob = blob->next; 00336 } 00337 while (blobindex < y) { 00338 next_blob = blob->next; 00339 blob->next = next_blob->next; 00340 oldblob(next_blob); /*junk dead blobs */ 00341 blobindex++; 00342 } 00343 array_value (char_choices, i) = 00344 (char *) classify_blob (p_blob, blob, blob->next, NULL, fx, 00345 "rebuild", Orange, NULL, NULL, 0, 0); 00346 } 00347 00348 y = x - 1; 00349 x = y - search_state[i]; 00350 } 00351 00352 memfree(search_state); 00353 free_all_choices(old_choices, x); 00354 return (char_choices); 00355 00356 }
void replace_char_widths | ( | CHUNKS_RECORD * | chunks_record, | |
SEARCH_STATE | state | |||
) |
Replace the value of the char_width field in the chunks_record with the updated width measurements from the last_segmentation.
Definition at line 481 of file bestfirst.cpp.
References CHUNKS_RECORD::char_widths, free_widths, last_segmentation, memalloc(), WIDTH_RECORD::num_chars, and WIDTH_RECORD::widths.
Referenced by evaluate_state().
00481 { 00482 WIDTH_RECORD *width_record; 00483 int num_blobs; 00484 int i; 00485 00486 free_widths (chunks_record->char_widths); 00487 00488 num_blobs = state[0] + 1; 00489 width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2); 00490 width_record->num_chars = num_blobs; 00491 00492 for (i = 0; i < num_blobs; i++) { 00493 00494 width_record->widths[2 * i] = last_segmentation[i].width; 00495 00496 if (i + 1 < num_blobs) 00497 width_record->widths[2 * i + 1] = last_segmentation[i].gap; 00498 } 00499 chunks_record->char_widths = width_record; 00500 }
int num_joints |
Number of chunks \- 1
Definition at line 45 of file bestfirst.cpp.
Referenced by best_first_search(), pop_queue(), rebuild_current_state(), and save_best_state().
int num_popped = 0 |
Definition at line 47 of file bestfirst.cpp.
Referenced by best_first_search(), and write_cooked_text().
int num_pushed = 0 |