STATS Class Reference

#include <statistc.h>

List of all members.


Detailed Description

Statistics package.

Definition at line 30 of file statistc.h.

Public Member Functions

Private Attributes


Constructor & Destructor Documentation

STATS::STATS ( INT32  min,
INT32  max 
)

Construct a new stats element by allocating and zeroing the memory.

Definition at line 36 of file statistc.cpp.

References alloc_mem(), buckets, clear(), NULL, rangemax, and rangemin.

00039               {
00040 
00041   if (max <= min) {
00042     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00043             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00044             "Illegal range for stats, Min=%d, Max=%d",min,max);*/
00045     min = 0;
00046     max = 1;
00047   }
00048   rangemin = min;                //setup
00049   rangemax = max;
00050   buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));
00051   if (buckets != NULL)
00052     this->clear ();              //zero it
00053   /*   else
00054      err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
00055      ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00056      "No memory for stats, Min=%d, Max=%d",min,max); */
00057 }

STATS::STATS (  ) 

Definition at line 60 of file statistc.cpp.

References buckets, NULL, rangemax, and rangemin.

00060              {  //constructor
00061   rangemax = 0;   //empty
00062   rangemin = 0;
00063   buckets = NULL;
00064 }

STATS::~STATS (  ) 

Destructor for a stats class.

Definition at line 106 of file statistc.cpp.

References buckets, free_mem(), and NULL.

00107   {
00108   if (buckets != NULL) {
00109     free_mem(buckets); 
00110     buckets = NULL;
00111   }
00112 }


Member Function Documentation

void STATS::add ( INT32  value,
INT32  count 
)

Add a set of samples to (or delete from) a pile.

Definition at line 118 of file statistc.cpp.

References buckets, NULL, rangemax, rangemin, and total_count.

Referenced by add_in_one_row(), CHAR_SAMPLES::assign_to_char(), block_space_stat(), block_spacing_stats(), cluster(), compute_block_xheight(), compute_row_descdrop(), compute_row_xheight(), count_pitch_stats(), est_ambigs(), filter_noise_blobs(), filter_noise_blobs2(), find_modal_font(), fix_row_pitch(), font_recognition_pass(), GAPMAP::GAPMAP(), get_blob_coords(), horizontal_coutline_projection(), improve_estimate(), isolated_row_stats(), make_first_xheight(), old_first_xheight(), re_estimate_x_ht(), row_pitch_stats(), row_space_stat(), row_spacing_stats(), row_words(), row_words2(), set_word_fonts(), smooth(), try_doc_fixed(), uniformly_spaced(), vertical_coutline_projection(), vertical_cunderline_projection(), vertical_outline_projection(), and vigorous_noise_removal().

00121                  {
00122   if (buckets == NULL) {
00123     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00124             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00125             "Empty stats");*/
00126     return;
00127   }
00128   if (value <= rangemin)
00129     buckets[0] += count;         //silently clip to range
00130   else if (value >= rangemax)
00131     buckets[rangemax - rangemin - 1] += count;
00132   else
00133                                  //add count to cell
00134     buckets[value - rangemin] += count;
00135   total_count += count;          //keep count of total
00136 }

void STATS::clear (  ) 

Clear out the STATS class by zeroing all the buckets.

Definition at line 95 of file statistc.cpp.

References buckets, NULL, rangemax, rangemin, and total_count.

Referenced by block_space_stat(), count_pitch_stats(), filter_noise_blobs2(), find_row_pitch(), font_recognition_pass(), row_space_stat(), row_words(), row_words2(), set_range(), and STATS().

00095                   {  //clear out buckets
00096   total_count = 0;
00097   if (buckets != NULL)
00098     memset (buckets, 0, (rangemax - rangemin) * sizeof (INT32));
00099   //zero it
00100 }

INT32 STATS::cluster ( float  lower,
float  upper,
float  multiple,
INT32  max_clusters,
STATS clusters 
)

Cluster the samples into max_cluster clusters.

Parameters:
lower threshold
upper threshold
multiple distance threshold
max_clusters max no to make
clusters array of clusters
Returns:
Current number of clusters.
Each call runs one iteration. The array of clusters must be max_clusters+1 in size as cluster 0 is used to indicate which samples have been used.

Definition at line 337 of file statistc.cpp.

References add(), alloc_mem(), buckets, count(), FALSE, free_mem(), ile(), MAX_INT32, mode(), NULL, pile_count(), rangemax, rangemin, set_range(), total_count, and TRUE.

Referenced by row_pitch_stats(), row_words(), and row_words2().

00343                       {
00344   BOOL8 new_cluster;             // added one
00345   float *centres;                // cluster centres
00346   INT32 entry;                   // bucket index
00347   INT32 cluster;                 // cluster index
00348   INT32 best_cluster;            // one to assign to
00349   INT32 new_centre = 0;          // residual mode
00350   INT32 new_mode;                // pile count of new_centre
00351   INT32 count;                   // pile to place
00352   float dist;                    // from cluster
00353   float min_dist;                // from best_cluster
00354   INT32 cluster_count;           // no of clusters
00355 
00356   if (max_clusters < 1)
00357     return 0;
00358   if (buckets == NULL) {
00359     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00360             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00361             "Empty stats");*/
00362     return 0;
00363   }
00364   centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float));
00365   if (centres == NULL) {
00366     /*     err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
00367        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00368        "No memory for centres"); */
00369     return 0;
00370   }
00371   for (cluster_count = 1; cluster_count <= max_clusters
00372     && clusters[cluster_count].buckets != NULL
00373   && clusters[cluster_count].total_count > 0; cluster_count++) {
00374     centres[cluster_count] =
00375       (float) clusters[cluster_count].ile ((float) 0.5);
00376     new_centre = clusters[cluster_count].mode ();
00377     for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00378       && entry >= rangemin
00379     && pile_count (entry) <= pile_count (entry + 1); entry--) {
00380       count = pile_count (entry) - clusters[0].pile_count (entry);
00381       if (count > 0) {
00382         clusters[cluster_count].add (entry, count);
00383         clusters[0].add (entry, count);
00384       }
00385     }
00386     for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00387       && entry < rangemax
00388     && pile_count (entry) <= pile_count (entry - 1); entry++) {
00389       count = pile_count (entry) - clusters[0].pile_count (entry);
00390       if (count > 0) {
00391         clusters[cluster_count].add (entry, count);
00392         clusters[0].add (entry, count);
00393       }
00394     }
00395   }
00396   cluster_count--;
00397 
00398   if (cluster_count == 0) {
00399     clusters[0].set_range (rangemin, rangemax);
00400   }
00401   do {
00402     new_cluster = FALSE;
00403     new_mode = 0;
00404     for (entry = 0; entry < rangemax - rangemin; entry++) {
00405       count = buckets[entry] - clusters[0].buckets[entry];
00406       // remaining pile
00407       if (count > 0) {           // any to handle
00408         min_dist = (float) MAX_INT32;
00409         best_cluster = 0;
00410         for (cluster = 1; cluster <= cluster_count; cluster++) {
00411           dist = entry + rangemin - centres[cluster];
00412           // find distance
00413           if (dist < 0)
00414             dist = -dist;
00415           if (dist < min_dist) {
00416             min_dist = dist;     // find least
00417             best_cluster = cluster;
00418           }
00419         }
00420         if (min_dist > upper     // far enough for new
00421           && (best_cluster == 0
00422           || entry + rangemin > centres[best_cluster] * multiple
00423         || entry + rangemin < centres[best_cluster] / multiple)) {
00424           if (count > new_mode) {
00425             new_mode = count;
00426             new_centre = entry + rangemin;
00427           }
00428         }
00429       }
00430     }
00431     if (new_mode > 0 && cluster_count < max_clusters) { // need new and room
00432       cluster_count++;
00433       new_cluster = TRUE;
00434       if (!clusters[cluster_count].set_range (rangemin, rangemax))
00435         return 0;
00436       centres[cluster_count] = (float) new_centre;
00437       clusters[cluster_count].add (new_centre, new_mode);
00438       clusters[0].add (new_centre, new_mode);
00439       for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00440         && entry >= rangemin
00441       && pile_count (entry) <= pile_count (entry + 1); entry--) {
00442         count = pile_count (entry) - clusters[0].pile_count (entry);
00443         if (count > 0) {
00444           clusters[cluster_count].add (entry, count);
00445           clusters[0].add (entry, count);
00446         }
00447       }
00448       for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00449         && entry < rangemax
00450       && pile_count (entry) <= pile_count (entry - 1); entry++) {
00451         count = pile_count (entry) - clusters[0].pile_count (entry);
00452         if (count > 0) {
00453           clusters[cluster_count].add (entry, count);
00454           clusters[0].add (entry, count);
00455         }
00456       }
00457       centres[cluster_count] =
00458         (float) clusters[cluster_count].ile ((float) 0.5);
00459     }
00460   }
00461   while (new_cluster && cluster_count < max_clusters);
00462   free_mem(centres); 
00463   return cluster_count;
00464 }

INT32 STATS::get_total (  )  [inline]

Definition at line 89 of file statistc.h.

Referenced by block_spacing_stats(), compute_row_xheight(), count_pitch_stats(), est_ambigs(), estimate_from_stats(), find_modal_font(), get_blob_coords(), improve_row_threshold(), isolated_row_stats(), old_first_xheight(), old_to_method(), re_estimate_x_ht(), row_pitch_stats(), row_spacing_stats(), row_words(), row_words2(), try_doc_fixed(), and uniformly_spaced().

00089                       {  //access function
00090       return total_count;        //total of all piles
00091     }

float STATS::ile ( float  frac  ) 

Definition at line 222 of file statistc.cpp.

References buckets, NULL, rangemax, rangemin, and total_count.

Referenced by block_spacing_stats(), cluster(), compute_block_xheight(), est_ambigs(), estimate_from_stats(), filter_noise_blobs(), filter_noise_blobs2(), find_row_pitch(), fix_row_pitch(), get_blob_coords(), median(), old_first_xheight(), print(), re_estimate_x_ht(), row_words(), short_print(), and try_doc_fixed().

00224                   {
00225   INT32 index;                   //current index
00226   INT32 sum;                     //sum of cells
00227   float target;                  //target value
00228 
00229   if (buckets == NULL) {
00230     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00231        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00232        "Empty stats"); */
00233     return (float) rangemin;
00234   }
00235   target = frac * total_count;
00236   if (target <= 0)
00237     target = (float) 1;
00238   if (target > total_count)
00239     target = (float) total_count;
00240   for (sum = 0, index = 0; index < rangemax - rangemin
00241     && sum < target; sum += buckets[index], index++);
00242   if (index > 0)
00243     return rangemin + index - (sum - target) / buckets[index - 1];
00244   //better than just ints
00245   else
00246     return (float) rangemin;
00247 }

BOOL8 STATS::local_min ( INT32  x  ) 

Find minness of x within range of buckets.

Parameters:
x input
Note:
Global:
See also:
rangemin,

rangemax,

buckets

Returns:
TRUE if this point is a local min.

Definition at line 474 of file statistc.cpp.

References buckets, FALSE, NULL, rangemax, rangemin, and TRUE.

Referenced by check_pitch_sync3().

00476                         {
00477   INT32 index;                   // table index
00478 
00479   if (buckets == NULL) {
00480     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00481             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00482             "Empty stats");*/
00483     return FALSE;
00484   }
00485   if (x < rangemin)
00486     x = rangemin;
00487   if (x >= rangemax)
00488     x = rangemax - 1;
00489   x -= rangemin;
00490   if (buckets[x] == 0)
00491     return TRUE;
00492   for (index = x - 1; index >= 0 && buckets[index] == buckets[x]; index--);
00493   if (index >= 0 && buckets[index] < buckets[x])
00494     return FALSE;
00495   for (index = x + 1; index < rangemax - rangemin
00496     && buckets[index] == buckets[x]; index++);
00497   if (index < rangemax - rangemin && buckets[index] < buckets[x])
00498     return FALSE;
00499   else
00500     return TRUE;
00501 }

INT32 STATS::max_bucket (  ) 

Find REAL maximum bucket - ile(1.0) isnt necessarily correct.

Definition at line 562 of file statistc.cpp.

References buckets, max, NULL, rangemax, and rangemin.

Referenced by short_print().

00562                         {  //Find max
00563   INT32 max;
00564 
00565   if (buckets == NULL) {
00566     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00567             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00568             "Empty stats");*/
00569     return rangemin;
00570   }
00571 
00572   for (max = rangemax - rangemin - 1;
00573     (max > 0) && (buckets[max] == 0); max--);
00574   return rangemin + max;
00575 }

float STATS::mean (  ) 

Find the mean of a stats class.

Definition at line 166 of file statistc.cpp.

References buckets, NULL, rangemax, rangemin, and total_count.

Referenced by est_ambigs(), estimate_from_stats(), isolated_row_stats(), old_to_method(), print(), short_print(), and uniformly_spaced().

00166                   {  //get mean of samples
00167   INT32 index;                   //current index
00168   INT32 sum;                     //sum of cells
00169 
00170   if (buckets == NULL) {
00171     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00172             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00173             "Empty stats");*/
00174     return (float) rangemin;
00175   }
00176   for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) {
00177                                  //sum all buckets
00178     sum += index * buckets[index];
00179   }
00180   if (total_count > 0)
00181                                  //mean value
00182     return (float) sum / total_count + rangemin;
00183   else
00184     return (float) rangemin;     //no mean
00185 }

float STATS::median (  ) 

Get median.

Finds a more usefule estimate of median than ile(0.5).

Overcomes a problem with ile() - if the samples are, for example, 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway between 6 and 13 = 9.5

Definition at line 258 of file statistc.cpp.

References buckets, ile(), NULL, pile_count(), rangemin, and total_count.

Referenced by block_spacing_stats(), GAPMAP::GAPMAP(), isolated_row_stats(), old_to_method(), row_spacing_stats(), uniformly_spaced(), and vigorous_noise_removal().

00258                     {  //get median
00259   float median;
00260   INT32 min_pile;
00261   INT32 median_pile;
00262   INT32 max_pile;
00263 
00264   if (buckets == NULL) {
00265     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00266             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00267             "Empty stats");*/
00268     return (float) rangemin;
00269   }
00270   median = (float) ile ((float) 0.5);
00271   median_pile = (INT32) floor (median);
00272   if ((total_count > 1) && (pile_count (median_pile) == 0)) {
00273     /* Find preceeding non zero pile */
00274     for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--);
00275     /* Find following non zero pile */
00276     for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++);
00277     median = (float) ((min_pile + max_pile) / 2.0);
00278   }
00279   return median;
00280 }

INT32 STATS::min_bucket (  ) 

Find REAL minimum bucket - ile(0.0) isnt necessarily correct.

Definition at line 544 of file statistc.cpp.

References buckets, min, NULL, rangemax, and rangemin.

Referenced by short_print().

00544                         {  //Find min
00545   INT32 min;
00546 
00547   if (buckets == NULL) {
00548     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00549             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00550             "Empty stats");*/
00551     return rangemin;
00552   }
00553 
00554   for (min = 0; (min < rangemax - rangemin) && (buckets[min] == 0); min++);
00555   return rangemin + min;
00556 }

INT32 STATS::mode (  ) 

Find the mode of a stats class.

Definition at line 141 of file statistc.cpp.

References buckets, max, NULL, rangemax, and rangemin.

Referenced by cluster(), compute_row_descdrop(), compute_row_xheight(), and find_modal_font().

00141                   {  //get mode of samples
00142   INT32 index;         // current index
00143   INT32 max;           // max cell count
00144   INT32 maxindex;      // index of max
00145 
00146   if (buckets == NULL) {
00147     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00148             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00149             "Empty stats");*/
00150     return rangemin;
00151   }
00152   for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0;
00153   index--) {
00154     if (buckets[index] > max) {
00155       max = buckets[index];      //find biggest
00156       maxindex = index;
00157     }
00158   }
00159   return maxindex + rangemin;    //index of biggest
00160 }

INT32 STATS::pile_count ( INT32  value  )  [inline]

Definition at line 79 of file statistc.h.

Referenced by CHAR_SAMPLES::assign_to_char(), check_pitch_sync(), check_pitch_sync2(), check_pitch_sync3(), cluster(), compute_height_modes(), compute_row_descdrop(), compute_row_xheight(), find_modal_font(), find_top_modes(), median(), pick_x_height(), row_spacing_stats(), stats_count_under(), test_underline(), try_doc_fixed(), and tune_row_pitch2().

00081                       {
00082       return value > rangemin ? (value < rangemax
00083         ? buckets[value -
00084         rangemin] : buckets[rangemax -
00085         rangemin -
00086         1]) : buckets[0];
00087     }

void STATS::plot ( WINDOW  window,
float  xorigin,
float  yorigin,
float  xscale,
float  yscale,
COLOUR  colour 
)

Draw a histogram of the stats table.

Definition at line 622 of file statistc.cpp.

References buckets, INT_HOLLOW, interior_style, NULL, perimeter_color_index, rangemax, rangemin, and rectangle.

Referenced by compute_pitch_sd(), compute_pitch_sd2(), and try_doc_fixed().

00629                   {
00630   INT32 index;                   //table index
00631 
00632   if (buckets == NULL) {
00633     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00634             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00635             "Empty stats");*/
00636     return;
00637   }
00638   interior_style (window, INT_HOLLOW, 1);
00639   perimeter_color_index(window, colour); 
00640 
00641   for (index = 0; index < rangemax - rangemin; index++) {
00642     rectangle (window, xorigin + xscale * index, yorigin,
00643       xorigin + xscale * (index + 1),
00644       yorigin + yscale * buckets[index]);
00645   }
00646 }

void STATS::plotline ( WINDOW  window,
float  xorigin,
float  yorigin,
float  xscale,
float  yscale,
COLOUR  colour 
)

Draw a histogram of the stats table. (Line only.

Definition at line 654 of file statistc.cpp.

References buckets, draw2d, line_color_index, line_type, move2d, NULL, rangemax, rangemin, and SOLID.

00661                       {
00662   INT32 index;                   //table index
00663 
00664   if (buckets == NULL) {
00665     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00666        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00667        "Empty stats"); */
00668     return;
00669   }
00670   line_color_index(window, colour); 
00671   line_type(window, SOLID); 
00672 
00673   move2d (window, xorigin, yorigin + yscale * buckets[0]);
00674   for (index = 0; index < rangemax - rangemin; index++) {
00675     draw2d (window, xorigin + xscale * index,
00676       yorigin + yscale * buckets[index]);
00677   }
00678 }

void STATS::print ( FILE *  fp,
BOOL8  dump 
)

Print a summary of the stats and optionally a dump of the table.

Parameters:
dump If TRUE, dump full table

Definition at line 509 of file statistc.cpp.

References buckets, ile(), mean(), NULL, rangemax, rangemin, sd(), total_count, and tprintf().

Referenced by row_words(), row_words2(), and test_underline().

00512                    {
00513   INT32 index;                   // table index
00514 
00515   if (buckets == NULL) {
00516     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00517        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00518        "Empty stats"); */
00519     return;
00520   }
00521   if (dump) {
00522     for (index = 0; index < rangemax - rangemin; index++) {
00523       tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
00524       if (index % 8 == 7)
00525         tprintf ("\n");
00526     }
00527     tprintf ("\n");
00528   }
00529 
00530   tprintf ("Total count=%d\n", total_count);
00531   tprintf ("Min=%d\n", (INT32) (ile ((float) 0.0)));
00532   tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
00533   tprintf ("Median=%.2f\n", ile ((float) 0.5));
00534   tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
00535   tprintf ("Max=%d\n", (INT32) (ile ((float) 0.99999)));
00536   tprintf ("Mean= %.2f\n", mean ());
00537   tprintf ("SD= %.2f\n", sd ());
00538 }

float STATS::sd (  ) 

Find the standard deviation of a stats class.

Definition at line 191 of file statistc.cpp.

References buckets, NULL, rangemax, rangemin, and total_count.

Referenced by print(), and short_print().

00191                 {  //standard deviation
00192   INT32 index;                   //current index
00193   INT32 sum;                     //sum of cells
00194   INT32 sqsum;                   //sum of squares
00195   float variance;
00196 
00197   if (buckets == NULL) {
00198     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00199        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00200        "Empty stats"); */
00201     return (float) 0.0;
00202   }
00203   for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0;
00204   index--) {
00205                                  //sum all buckets
00206     sum += index * buckets[index];
00207                                  //and squares
00208     sqsum += index * index * buckets[index];
00209   }
00210   if (total_count > 0) {
00211     variance = sum / ((float) total_count);
00212     variance = sqsum / ((float) total_count) - variance * variance;
00213     return (float) sqrt (variance);
00214   }
00215   else
00216     return (float) 0.0;
00217 }

bool STATS::set_range ( INT32  min,
INT32  max 
)

Alter the range on an existing stats element.

Definition at line 69 of file statistc.cpp.

References alloc_mem(), buckets, clear(), free_mem(), NULL, rangemax, and rangemin.

Referenced by cluster(), compute_block_xheight(), TO_ROW::compute_vertical_projection(), fix_row_pitch(), test_underline(), and try_doc_fixed().

00072                        {
00073 
00074   if (max <= min) {
00075     return false;
00076   }
00077   rangemin = min;                //setup
00078   rangemax = max;
00079   if (buckets != NULL)
00080     free_mem(buckets);  //no longer want it
00081   buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));
00082   /*  if (buckets==NULL)
00083       return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
00084           ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00085           "No memory for stats, Min=%d, Max=%d",min,max);*/
00086 
00087   this->clear ();                //zero it
00088   return true;
00089 }

void STATS::short_print ( FILE *  fp,
BOOL8  dump 
)

Print a summary of the stats and optionally a dump of the table.

BUT ONLY THE PART OF THE TABLE BETWEEN MIN AND MAX

Definition at line 583 of file statistc.cpp.

References buckets, ile(), max, max_bucket(), mean(), min, min_bucket(), NULL, rangemin, sd(), total_count, and tprintf().

Referenced by block_space_stat(), and row_space_stat().

00586                          {
00587   INT32 index;                   //table index
00588   INT32 min = min_bucket ();
00589   INT32 max = max_bucket ();
00590 
00591   if (buckets == NULL) {
00592     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00593        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00594        "Empty stats"); */
00595     return;
00596   }
00597   if (dump) {
00598     for (index = min; index <= max; index++) {
00599       tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
00600       if ((index - min) % 8 == 7)
00601         tprintf ("\n");
00602     }
00603     tprintf ("\n");
00604   }
00605 
00606   tprintf ("Total count=%d\n", total_count);
00607   tprintf ("Min=%d Really=%d\n", (INT32) (ile ((float) 0.0)), min);
00608   tprintf ("Max=%d Really=%d\n", (INT32) (ile ((float) 1.1)), max);
00609   tprintf ("Range=%d\n", max + 1 - min);
00610   tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
00611   tprintf ("Median=%.2f\n", ile ((float) 0.5));
00612   tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
00613   tprintf ("Mean= %.2f\n", mean ());
00614   tprintf ("SD= %.2f\n", sd ());
00615 }

void STATS::smooth ( INT32  factor  ) 

Apply a triangular smoothing filter to the stats.

This makes the modes a bit more useful. The factor gives the height of the triangle, i.e. the weight of the centre.

Definition at line 289 of file statistc.cpp.

References add(), buckets, NULL, rangemax, rangemin, and total_count.

Referenced by block_space_stat(), row_pitch_stats(), row_space_stat(), row_words(), and row_words2().

00291                     {
00292   INT32 entry;                   //bucket index
00293   INT32 offset;                  //from entry
00294   INT32 entrycount;              //no of entries
00295   INT32 bucket;                  //new smoothed pile
00296                                  //output stats
00297   STATS result(rangemin, rangemax); 
00298 
00299   if (buckets == NULL) {
00300     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00301        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00302        "Empty stats"); */
00303     return;
00304   }
00305   if (factor < 2)
00306     return;                      //is a no-op
00307   entrycount = rangemax - rangemin;
00308   for (entry = 0; entry < entrycount; entry++) {
00309                                  //centre weight
00310     bucket = buckets[entry] * factor;
00311     for (offset = 1; offset < factor; offset++) {
00312       if (entry - offset >= 0)
00313         bucket += buckets[entry - offset] * (factor - offset);
00314       if (entry + offset < entrycount)
00315         bucket += buckets[entry + offset] * (factor - offset);
00316     }
00317     result.add (entry + rangemin, bucket);
00318   }
00319   total_count = result.total_count;
00320   memcpy (buckets, result.buckets, entrycount * sizeof (INT32));
00321 }


Member Data Documentation

INT32* STATS::buckets [private]

Definition at line 35 of file statistc.h.

Referenced by add(), clear(), cluster(), ile(), local_min(), max_bucket(), mean(), median(), min_bucket(), mode(), plot(), plotline(), print(), sd(), set_range(), short_print(), smooth(), STATS(), and ~STATS().

INT32 STATS::rangemax [private]

Definition at line 33 of file statistc.h.

Referenced by add(), clear(), cluster(), ile(), local_min(), max_bucket(), mean(), min_bucket(), mode(), plot(), plotline(), print(), sd(), set_range(), smooth(), and STATS().

INT32 STATS::rangemin [private]

Definition at line 32 of file statistc.h.

Referenced by add(), clear(), cluster(), ile(), local_min(), max_bucket(), mean(), median(), min_bucket(), mode(), plot(), plotline(), print(), sd(), set_range(), short_print(), smooth(), and STATS().

INT32 STATS::total_count [private]

Definition at line 34 of file statistc.h.

Referenced by add(), clear(), cluster(), ile(), mean(), median(), print(), sd(), short_print(), and smooth().


The documentation for this class was generated from the following files:
Generated on Wed Feb 28 19:49:34 2007 for Tesseract by  doxygen 1.5.1