wordrec/metrics.cpp

Go to the documentation of this file.
00001 
00020 /*----------------------------------------------------------------------
00021               I n c l u d e s
00022 ----------------------------------------------------------------------*/
00023 #include "metrics.h"
00024 #include "bestfirst.h"
00025 #include "associate.h"
00026 #include "tally.h"
00027 #include "plotseg.h"
00028 #include "globals.h"
00029 #include "wordclass.h"
00030 #include "intmatcher.h"
00031 #include "freelist.h"
00032 #include "djmenus.h"
00033 #include "callcpp.h"
00034 
00035 /*----------------------------------------------------------------------
00036               V a r i a b l e s
00037 ----------------------------------------------------------------------*/
00039 static int states_timed_out1;
00040 static int states_timed_out2;
00041 static int words_segmented1;
00042 static int words_segmented2;
00043 static int segmentation_states1;
00044 static int segmentation_states2;
00045 static int save_priorities;
00046 
00047 int words_chopped1;
00048 int words_chopped2;
00049 int chops_attempted1;
00050 int chops_performed1;
00051 int chops_attempted2;
00052 int chops_performed2;
00053 
00054 int character_count;
00055 int word_count;
00057 int chars_classified;
00058 
00059 MEASUREMENT num_pieces;
00060 MEASUREMENT width_measure;
00061 
00063 MEASUREMENT width_priority_range;
00064 MEASUREMENT match_priority_range;
00065 
00066 TALLY states_before_best;
00067 TALLY best_certainties[2];
00069 TALLY character_widths;
00070 
00072 FILE *priority_file_1;
00073 FILE *priority_file_2;
00074 FILE *priority_file_3;
00075 
00077 STATE *known_best_state = NULL;
00078 
00079 /*----------------------------------------------------------------------
00080               M a c r o s
00081 ----------------------------------------------------------------------*/
00082 #define   CERTAINTY_BUCKET_SIZE -0.5
00083 #define   CERTAINTY_BUCKETS     40
00084 
00085 /*----------------------------------------------------------------------
00086               F u n c t i o n s
00087 ----------------------------------------------------------------------*/
00088 /* ================== */
00095 void init_metrics() {
00096   words_chopped1 = 0;
00097   words_chopped2 = 0;
00098   chops_performed1 = 0;
00099   chops_performed2 = 0;
00100   chops_attempted1 = 0;
00101   chops_attempted2 = 0;
00102 
00103   words_segmented1 = 0;
00104   words_segmented2 = 0;
00105   states_timed_out1 = 0;
00106   states_timed_out2 = 0;
00107   segmentation_states1 = 0;
00108   segmentation_states2 = 0;
00109 
00110   save_priorities = 0;
00111 
00112   character_count = 0;
00113   word_count = 0;
00114   chars_classified = 0;
00115   permutation_count = 0;
00116 
00117   states_before_best = new_tally (min (100, num_seg_states));
00118 
00119   best_certainties[0] = new_tally (CERTAINTY_BUCKETS);
00120   best_certainties[1] = new_tally (CERTAINTY_BUCKETS);
00121 }
00122 
00123 /* ================== */
00128 void end_metrics() {
00129   memfree(states_before_best);
00130   memfree(best_certainties[0]);
00131   memfree(best_certainties[1]);
00132   memfree(character_widths);
00133   states_before_best = NULL;
00134   best_certainties[0] = NULL;
00135   best_certainties[1] = NULL;
00136   character_widths = NULL;
00137 }
00138 
00139 
00140 /* ================== */
00145 void record_certainty(float certainty, int pass) {
00146   int bucket;
00147 
00148   if (certainty / CERTAINTY_BUCKET_SIZE < MAXINT)
00149     bucket = (int) (certainty / CERTAINTY_BUCKET_SIZE);
00150   else
00151     bucket = MAXINT;
00152 
00153   inc_tally_bucket (best_certainties[pass - 1], bucket);
00154 }
00155 
00156 
00157 /* ================== */
00164 void record_search_status(int num_states, int before_best, float closeness) {
00165   inc_tally_bucket(states_before_best, before_best);
00166 
00167   if (first_pass) {
00168     if (num_states == num_seg_states + 1)
00169       states_timed_out1++;
00170     segmentation_states1 += num_states;
00171     words_segmented1++;
00172   }
00173   else {
00174     if (num_states == num_seg_states + 1)
00175       states_timed_out2++;
00176     segmentation_states2 += num_states;
00177     words_segmented2++;
00178   }
00179 }
00180 
00181 
00182 /* ================== */
00186 void save_summary(INT32 elapsed_time) {
00187   #ifndef SECURE_NAMES
00188   char outfilename[CHARS_PER_LINE];
00189   FILE *f;
00190   int x;
00191   int total;
00192 
00193   strcpy(outfilename, imagefile);
00194   strcat (outfilename, ".sta");
00195   f = open_file (outfilename, "w");
00196 
00197   fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time);
00198   fprintf (f, "\n");
00199 
00200   fprintf (f, "%d characters\n", character_count);
00201   fprintf (f, "%d words\n", word_count);
00202   fprintf (f, "\n");
00203 
00204   fprintf (f, "%d permutations performed\n", permutation_count);
00205   fprintf (f, "%d characters classified\n", chars_classified);
00206   fprintf (f, "%4.0f%% classification overhead\n",
00207     (float) chars_classified / character_count * 100.0 - 100.0);
00208   fprintf (f, "\n");
00209 
00210   fprintf (f, "%d words chopped (pass 1) ", words_chopped1);
00211   fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100);
00212   fprintf (f, "%d chops performed\n", chops_performed1);
00213   fprintf (f, "%d chops attempted\n", chops_attempted1);
00214   fprintf (f, "\n");
00215 
00216   fprintf (f, "%d words joined (pass 1)", words_segmented1);
00217   fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100);
00218   fprintf (f, "%d segmentation states\n", segmentation_states1);
00219   fprintf (f, "%d segmentations timed out\n", states_timed_out1);
00220   fprintf (f, "\n");
00221 
00222   fprintf (f, "%d words chopped (pass 2) ", words_chopped2);
00223   fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100);
00224   fprintf (f, "%d chops performed\n", chops_performed2);
00225   fprintf (f, "%d chops attempted\n", chops_attempted2);
00226   fprintf (f, "\n");
00227 
00228   fprintf (f, "%d words joined (pass 2)", words_segmented2);
00229   fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100);
00230   fprintf (f, "%d segmentation states\n", segmentation_states2);
00231   fprintf (f, "%d segmentations timed out\n", states_timed_out2);
00232   fprintf (f, "\n");
00233 
00234   total = 0;
00235   iterate_tally (states_before_best, x)
00236     total += (tally_entry (states_before_best, x) * x);
00237   fprintf (f, "segmentations (before best) = %d\n", total);
00238   if (total != 0.0)
00239     fprintf (f, "%4.0f%% segmentation overhead\n",
00240       (float) (segmentation_states1 + segmentation_states2) /
00241       total * 100.0 - 100.0);
00242   fprintf (f, "\n");
00243 
00244   print_tally (f, "segmentations (before best)", states_before_best);
00245 
00246   iterate_tally (best_certainties[0], x)
00247     cprintf ("best certainty of %8.4f = %4d %4d\n",
00248     x * CERTAINTY_BUCKET_SIZE,
00249     tally_entry (best_certainties[0], x),
00250     tally_entry (best_certainties[1], x));
00251 
00252   PrintIntMatcherStats(f);
00253   dj_statistics(f);
00254   fclose(f);
00255   #endif
00256 }
00257 
00258 
00259 /* ================== */
00267 void record_priorities(SEARCH_RECORD *the_search,
00268                        STATE *old_state,
00269                        FLOAT32 priority_1,
00270                        FLOAT32 priority_2) {
00271   record_samples(priority_1, priority_2);
00272 }
00273 
00274 
00275 /* ================== */
00279 void record_samples(FLOAT32 match_pri, FLOAT32 width_pri) {
00280   ADD_SAMPLE(match_priority_range, match_pri);
00281   ADD_SAMPLE(width_priority_range, width_pri);
00282 }
00283 
00284 
00285 /* ================== */
00289 void reset_width_tally() {
00290   character_widths = new_tally (20);
00291   new_measurement(width_measure);
00292   width_measure.num_samples = 158;
00293   width_measure.sum_of_samples = 125.0;
00294   width_measure.sum_of_squares = 118.0;
00295 }
00296 
00297 
00298 /* ================== */
00302 #ifndef GRAPHICS_DISABLED
00303 void save_best_state(CHUNKS_RECORD *chunks_record) { 
00304   STATE state;
00305   SEARCH_STATE chunk_groups;
00306   int num_joints;
00307 
00308   if (save_priorities) {
00309     num_joints = matrix_dimension (chunks_record->ratings) - 1;
00310 
00311     state.part1 = 0xffffffff;
00312     state.part2 = 0xffffffff;
00313 
00314     chunk_groups = bin_to_chunks (&state, num_joints);
00315     display_segmentation (chunks_record->chunks, chunk_groups);
00316     memfree(chunk_groups);
00317 
00318     cprintf ("Enter the correct segmentation > ");
00319     fflush(stdout);
00320     state.part1 = 0;
00321     scanf ("%x", &state.part2);
00322 
00323     chunk_groups = bin_to_chunks (&state, num_joints);
00324     display_segmentation (chunks_record->chunks, chunk_groups);
00325     memfree(chunk_groups);
00326     window_wait(segm_window);  /* == 'n') */
00327 
00328     if (known_best_state)
00329       free_state(known_best_state);
00330     known_best_state = new_state (&state);
00331   }
00332 }
00333 #endif
00334 
00335 
00336 /* ================== */
00340 void start_recording() {
00341   if (save_priorities) {
00342     priority_file_1 = open_file ("Priorities1", "w");
00343     priority_file_2 = open_file ("Priorities2", "w");
00344     priority_file_3 = open_file ("Priorities3", "w");
00345   }
00346 }
00347 
00348 
00349 /* ================== */
00353 void stop_recording() {
00354   if (save_priorities) {
00355     fclose(priority_file_1);
00356     fclose(priority_file_2);
00357     fclose(priority_file_3);
00358   }
00359 }

Generated on Wed Feb 28 19:49:13 2007 for Tesseract by  doxygen 1.5.1