#include <statistc.h>
Definition at line 30 of file statistc.h.
Construct a new stats element by allocating and zeroing the memory.
Definition at line 36 of file statistc.cpp.
References alloc_mem(), buckets, clear(), NULL, rangemax, and rangemin.
00039 { 00040 00041 if (max <= min) { 00042 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00043 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00044 "Illegal range for stats, Min=%d, Max=%d",min,max);*/ 00045 min = 0; 00046 max = 1; 00047 } 00048 rangemin = min; //setup 00049 rangemax = max; 00050 buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32)); 00051 if (buckets != NULL) 00052 this->clear (); //zero it 00053 /* else 00054 err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, 00055 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00056 "No memory for stats, Min=%d, Max=%d",min,max); */ 00057 }
STATS::STATS | ( | ) |
STATS::~STATS | ( | ) |
Add a set of samples to (or delete from) a pile.
Definition at line 118 of file statistc.cpp.
References buckets, NULL, rangemax, rangemin, and total_count.
Referenced by add_in_one_row(), CHAR_SAMPLES::assign_to_char(), block_space_stat(), block_spacing_stats(), cluster(), compute_block_xheight(), compute_row_descdrop(), compute_row_xheight(), count_pitch_stats(), est_ambigs(), filter_noise_blobs(), filter_noise_blobs2(), find_modal_font(), fix_row_pitch(), font_recognition_pass(), GAPMAP::GAPMAP(), get_blob_coords(), horizontal_coutline_projection(), improve_estimate(), isolated_row_stats(), make_first_xheight(), old_first_xheight(), re_estimate_x_ht(), row_pitch_stats(), row_space_stat(), row_spacing_stats(), row_words(), row_words2(), set_word_fonts(), smooth(), try_doc_fixed(), uniformly_spaced(), vertical_coutline_projection(), vertical_cunderline_projection(), vertical_outline_projection(), and vigorous_noise_removal().
00121 { 00122 if (buckets == NULL) { 00123 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00124 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00125 "Empty stats");*/ 00126 return; 00127 } 00128 if (value <= rangemin) 00129 buckets[0] += count; //silently clip to range 00130 else if (value >= rangemax) 00131 buckets[rangemax - rangemin - 1] += count; 00132 else 00133 //add count to cell 00134 buckets[value - rangemin] += count; 00135 total_count += count; //keep count of total 00136 }
void STATS::clear | ( | ) |
Clear out the STATS class by zeroing all the buckets.
Definition at line 95 of file statistc.cpp.
References buckets, NULL, rangemax, rangemin, and total_count.
Referenced by block_space_stat(), count_pitch_stats(), filter_noise_blobs2(), find_row_pitch(), font_recognition_pass(), row_space_stat(), row_words(), row_words2(), set_range(), and STATS().
00095 { //clear out buckets 00096 total_count = 0; 00097 if (buckets != NULL) 00098 memset (buckets, 0, (rangemax - rangemin) * sizeof (INT32)); 00099 //zero it 00100 }
INT32 STATS::cluster | ( | float | lower, | |
float | upper, | |||
float | multiple, | |||
INT32 | max_clusters, | |||
STATS * | clusters | |||
) |
Cluster the samples into max_cluster clusters.
lower | threshold | |
upper | threshold | |
multiple | distance threshold | |
max_clusters | max no to make | |
clusters | array of clusters |
Definition at line 337 of file statistc.cpp.
References add(), alloc_mem(), buckets, count(), FALSE, free_mem(), ile(), MAX_INT32, mode(), NULL, pile_count(), rangemax, rangemin, set_range(), total_count, and TRUE.
Referenced by row_pitch_stats(), row_words(), and row_words2().
00343 { 00344 BOOL8 new_cluster; // added one 00345 float *centres; // cluster centres 00346 INT32 entry; // bucket index 00347 INT32 cluster; // cluster index 00348 INT32 best_cluster; // one to assign to 00349 INT32 new_centre = 0; // residual mode 00350 INT32 new_mode; // pile count of new_centre 00351 INT32 count; // pile to place 00352 float dist; // from cluster 00353 float min_dist; // from best_cluster 00354 INT32 cluster_count; // no of clusters 00355 00356 if (max_clusters < 1) 00357 return 0; 00358 if (buckets == NULL) { 00359 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00360 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00361 "Empty stats");*/ 00362 return 0; 00363 } 00364 centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float)); 00365 if (centres == NULL) { 00366 /* err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, 00367 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00368 "No memory for centres"); */ 00369 return 0; 00370 } 00371 for (cluster_count = 1; cluster_count <= max_clusters 00372 && clusters[cluster_count].buckets != NULL 00373 && clusters[cluster_count].total_count > 0; cluster_count++) { 00374 centres[cluster_count] = 00375 (float) clusters[cluster_count].ile ((float) 0.5); 00376 new_centre = clusters[cluster_count].mode (); 00377 for (entry = new_centre - 1; centres[cluster_count] - entry < lower 00378 && entry >= rangemin 00379 && pile_count (entry) <= pile_count (entry + 1); entry--) { 00380 count = pile_count (entry) - clusters[0].pile_count (entry); 00381 if (count > 0) { 00382 clusters[cluster_count].add (entry, count); 00383 clusters[0].add (entry, count); 00384 } 00385 } 00386 for (entry = new_centre + 1; entry - centres[cluster_count] < lower 00387 && entry < rangemax 00388 && pile_count (entry) <= pile_count (entry - 1); entry++) { 00389 count = pile_count (entry) - clusters[0].pile_count (entry); 00390 if (count > 0) { 00391 clusters[cluster_count].add (entry, count); 00392 clusters[0].add (entry, count); 00393 } 00394 } 00395 } 00396 cluster_count--; 00397 00398 if (cluster_count == 0) { 00399 clusters[0].set_range (rangemin, rangemax); 00400 } 00401 do { 00402 new_cluster = FALSE; 00403 new_mode = 0; 00404 for (entry = 0; entry < rangemax - rangemin; entry++) { 00405 count = buckets[entry] - clusters[0].buckets[entry]; 00406 // remaining pile 00407 if (count > 0) { // any to handle 00408 min_dist = (float) MAX_INT32; 00409 best_cluster = 0; 00410 for (cluster = 1; cluster <= cluster_count; cluster++) { 00411 dist = entry + rangemin - centres[cluster]; 00412 // find distance 00413 if (dist < 0) 00414 dist = -dist; 00415 if (dist < min_dist) { 00416 min_dist = dist; // find least 00417 best_cluster = cluster; 00418 } 00419 } 00420 if (min_dist > upper // far enough for new 00421 && (best_cluster == 0 00422 || entry + rangemin > centres[best_cluster] * multiple 00423 || entry + rangemin < centres[best_cluster] / multiple)) { 00424 if (count > new_mode) { 00425 new_mode = count; 00426 new_centre = entry + rangemin; 00427 } 00428 } 00429 } 00430 } 00431 if (new_mode > 0 && cluster_count < max_clusters) { // need new and room 00432 cluster_count++; 00433 new_cluster = TRUE; 00434 if (!clusters[cluster_count].set_range (rangemin, rangemax)) 00435 return 0; 00436 centres[cluster_count] = (float) new_centre; 00437 clusters[cluster_count].add (new_centre, new_mode); 00438 clusters[0].add (new_centre, new_mode); 00439 for (entry = new_centre - 1; centres[cluster_count] - entry < lower 00440 && entry >= rangemin 00441 && pile_count (entry) <= pile_count (entry + 1); entry--) { 00442 count = pile_count (entry) - clusters[0].pile_count (entry); 00443 if (count > 0) { 00444 clusters[cluster_count].add (entry, count); 00445 clusters[0].add (entry, count); 00446 } 00447 } 00448 for (entry = new_centre + 1; entry - centres[cluster_count] < lower 00449 && entry < rangemax 00450 && pile_count (entry) <= pile_count (entry - 1); entry++) { 00451 count = pile_count (entry) - clusters[0].pile_count (entry); 00452 if (count > 0) { 00453 clusters[cluster_count].add (entry, count); 00454 clusters[0].add (entry, count); 00455 } 00456 } 00457 centres[cluster_count] = 00458 (float) clusters[cluster_count].ile ((float) 0.5); 00459 } 00460 } 00461 while (new_cluster && cluster_count < max_clusters); 00462 free_mem(centres); 00463 return cluster_count; 00464 }
INT32 STATS::get_total | ( | ) | [inline] |
Definition at line 89 of file statistc.h.
Referenced by block_spacing_stats(), compute_row_xheight(), count_pitch_stats(), est_ambigs(), estimate_from_stats(), find_modal_font(), get_blob_coords(), improve_row_threshold(), isolated_row_stats(), old_first_xheight(), old_to_method(), re_estimate_x_ht(), row_pitch_stats(), row_spacing_stats(), row_words(), row_words2(), try_doc_fixed(), and uniformly_spaced().
00089 { //access function 00090 return total_count; //total of all piles 00091 }
float STATS::ile | ( | float | frac | ) |
Definition at line 222 of file statistc.cpp.
References buckets, NULL, rangemax, rangemin, and total_count.
Referenced by block_spacing_stats(), cluster(), compute_block_xheight(), est_ambigs(), estimate_from_stats(), filter_noise_blobs(), filter_noise_blobs2(), find_row_pitch(), fix_row_pitch(), get_blob_coords(), median(), old_first_xheight(), print(), re_estimate_x_ht(), row_words(), short_print(), and try_doc_fixed().
00224 { 00225 INT32 index; //current index 00226 INT32 sum; //sum of cells 00227 float target; //target value 00228 00229 if (buckets == NULL) { 00230 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00231 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00232 "Empty stats"); */ 00233 return (float) rangemin; 00234 } 00235 target = frac * total_count; 00236 if (target <= 0) 00237 target = (float) 1; 00238 if (target > total_count) 00239 target = (float) total_count; 00240 for (sum = 0, index = 0; index < rangemax - rangemin 00241 && sum < target; sum += buckets[index], index++); 00242 if (index > 0) 00243 return rangemin + index - (sum - target) / buckets[index - 1]; 00244 //better than just ints 00245 else 00246 return (float) rangemin; 00247 }
Find minness of x within range of buckets.
x | input |
Definition at line 474 of file statistc.cpp.
References buckets, FALSE, NULL, rangemax, rangemin, and TRUE.
Referenced by check_pitch_sync3().
00476 { 00477 INT32 index; // table index 00478 00479 if (buckets == NULL) { 00480 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00481 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00482 "Empty stats");*/ 00483 return FALSE; 00484 } 00485 if (x < rangemin) 00486 x = rangemin; 00487 if (x >= rangemax) 00488 x = rangemax - 1; 00489 x -= rangemin; 00490 if (buckets[x] == 0) 00491 return TRUE; 00492 for (index = x - 1; index >= 0 && buckets[index] == buckets[x]; index--); 00493 if (index >= 0 && buckets[index] < buckets[x]) 00494 return FALSE; 00495 for (index = x + 1; index < rangemax - rangemin 00496 && buckets[index] == buckets[x]; index++); 00497 if (index < rangemax - rangemin && buckets[index] < buckets[x]) 00498 return FALSE; 00499 else 00500 return TRUE; 00501 }
INT32 STATS::max_bucket | ( | ) |
Find REAL maximum bucket - ile(1.0) isnt necessarily correct.
Definition at line 562 of file statistc.cpp.
References buckets, max, NULL, rangemax, and rangemin.
Referenced by short_print().
00562 { //Find max 00563 INT32 max; 00564 00565 if (buckets == NULL) { 00566 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00567 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00568 "Empty stats");*/ 00569 return rangemin; 00570 } 00571 00572 for (max = rangemax - rangemin - 1; 00573 (max > 0) && (buckets[max] == 0); max--); 00574 return rangemin + max; 00575 }
float STATS::mean | ( | ) |
Find the mean of a stats class.
Definition at line 166 of file statistc.cpp.
References buckets, NULL, rangemax, rangemin, and total_count.
Referenced by est_ambigs(), estimate_from_stats(), isolated_row_stats(), old_to_method(), print(), short_print(), and uniformly_spaced().
00166 { //get mean of samples 00167 INT32 index; //current index 00168 INT32 sum; //sum of cells 00169 00170 if (buckets == NULL) { 00171 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00172 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00173 "Empty stats");*/ 00174 return (float) rangemin; 00175 } 00176 for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) { 00177 //sum all buckets 00178 sum += index * buckets[index]; 00179 } 00180 if (total_count > 0) 00181 //mean value 00182 return (float) sum / total_count + rangemin; 00183 else 00184 return (float) rangemin; //no mean 00185 }
float STATS::median | ( | ) |
Get median.
Finds a more usefule estimate of median than ile(0.5).
Overcomes a problem with ile() - if the samples are, for example, 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway between 6 and 13 = 9.5
Definition at line 258 of file statistc.cpp.
References buckets, ile(), NULL, pile_count(), rangemin, and total_count.
Referenced by block_spacing_stats(), GAPMAP::GAPMAP(), isolated_row_stats(), old_to_method(), row_spacing_stats(), uniformly_spaced(), and vigorous_noise_removal().
00258 { //get median 00259 float median; 00260 INT32 min_pile; 00261 INT32 median_pile; 00262 INT32 max_pile; 00263 00264 if (buckets == NULL) { 00265 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00266 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00267 "Empty stats");*/ 00268 return (float) rangemin; 00269 } 00270 median = (float) ile ((float) 0.5); 00271 median_pile = (INT32) floor (median); 00272 if ((total_count > 1) && (pile_count (median_pile) == 0)) { 00273 /* Find preceeding non zero pile */ 00274 for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--); 00275 /* Find following non zero pile */ 00276 for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++); 00277 median = (float) ((min_pile + max_pile) / 2.0); 00278 } 00279 return median; 00280 }
INT32 STATS::min_bucket | ( | ) |
Find REAL minimum bucket - ile(0.0) isnt necessarily correct.
Definition at line 544 of file statistc.cpp.
References buckets, min, NULL, rangemax, and rangemin.
Referenced by short_print().
00544 { //Find min 00545 INT32 min; 00546 00547 if (buckets == NULL) { 00548 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00549 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00550 "Empty stats");*/ 00551 return rangemin; 00552 } 00553 00554 for (min = 0; (min < rangemax - rangemin) && (buckets[min] == 0); min++); 00555 return rangemin + min; 00556 }
INT32 STATS::mode | ( | ) |
Find the mode of a stats class.
Definition at line 141 of file statistc.cpp.
References buckets, max, NULL, rangemax, and rangemin.
Referenced by cluster(), compute_row_descdrop(), compute_row_xheight(), and find_modal_font().
00141 { //get mode of samples 00142 INT32 index; // current index 00143 INT32 max; // max cell count 00144 INT32 maxindex; // index of max 00145 00146 if (buckets == NULL) { 00147 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00148 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00149 "Empty stats");*/ 00150 return rangemin; 00151 } 00152 for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0; 00153 index--) { 00154 if (buckets[index] > max) { 00155 max = buckets[index]; //find biggest 00156 maxindex = index; 00157 } 00158 } 00159 return maxindex + rangemin; //index of biggest 00160 }
Definition at line 79 of file statistc.h.
Referenced by CHAR_SAMPLES::assign_to_char(), check_pitch_sync(), check_pitch_sync2(), check_pitch_sync3(), cluster(), compute_height_modes(), compute_row_descdrop(), compute_row_xheight(), find_modal_font(), find_top_modes(), median(), pick_x_height(), row_spacing_stats(), stats_count_under(), test_underline(), try_doc_fixed(), and tune_row_pitch2().
00081 { 00082 return value > rangemin ? (value < rangemax 00083 ? buckets[value - 00084 rangemin] : buckets[rangemax - 00085 rangemin - 00086 1]) : buckets[0]; 00087 }
void STATS::plot | ( | WINDOW | window, | |
float | xorigin, | |||
float | yorigin, | |||
float | xscale, | |||
float | yscale, | |||
COLOUR | colour | |||
) |
Draw a histogram of the stats table.
Definition at line 622 of file statistc.cpp.
References buckets, INT_HOLLOW, interior_style, NULL, perimeter_color_index, rangemax, rangemin, and rectangle.
Referenced by compute_pitch_sd(), compute_pitch_sd2(), and try_doc_fixed().
00629 { 00630 INT32 index; //table index 00631 00632 if (buckets == NULL) { 00633 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00634 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00635 "Empty stats");*/ 00636 return; 00637 } 00638 interior_style (window, INT_HOLLOW, 1); 00639 perimeter_color_index(window, colour); 00640 00641 for (index = 0; index < rangemax - rangemin; index++) { 00642 rectangle (window, xorigin + xscale * index, yorigin, 00643 xorigin + xscale * (index + 1), 00644 yorigin + yscale * buckets[index]); 00645 } 00646 }
void STATS::plotline | ( | WINDOW | window, | |
float | xorigin, | |||
float | yorigin, | |||
float | xscale, | |||
float | yscale, | |||
COLOUR | colour | |||
) |
Draw a histogram of the stats table. (Line only.
Definition at line 654 of file statistc.cpp.
References buckets, draw2d, line_color_index, line_type, move2d, NULL, rangemax, rangemin, and SOLID.
00661 { 00662 INT32 index; //table index 00663 00664 if (buckets == NULL) { 00665 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00666 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00667 "Empty stats"); */ 00668 return; 00669 } 00670 line_color_index(window, colour); 00671 line_type(window, SOLID); 00672 00673 move2d (window, xorigin, yorigin + yscale * buckets[0]); 00674 for (index = 0; index < rangemax - rangemin; index++) { 00675 draw2d (window, xorigin + xscale * index, 00676 yorigin + yscale * buckets[index]); 00677 } 00678 }
void STATS::print | ( | FILE * | fp, | |
BOOL8 | dump | |||
) |
Print a summary of the stats and optionally a dump of the table.
dump | If TRUE, dump full table |
Definition at line 509 of file statistc.cpp.
References buckets, ile(), mean(), NULL, rangemax, rangemin, sd(), total_count, and tprintf().
Referenced by row_words(), row_words2(), and test_underline().
00512 { 00513 INT32 index; // table index 00514 00515 if (buckets == NULL) { 00516 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00517 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00518 "Empty stats"); */ 00519 return; 00520 } 00521 if (dump) { 00522 for (index = 0; index < rangemax - rangemin; index++) { 00523 tprintf ("%4d:%-3d ", rangemin + index, buckets[index]); 00524 if (index % 8 == 7) 00525 tprintf ("\n"); 00526 } 00527 tprintf ("\n"); 00528 } 00529 00530 tprintf ("Total count=%d\n", total_count); 00531 tprintf ("Min=%d\n", (INT32) (ile ((float) 0.0))); 00532 tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25)); 00533 tprintf ("Median=%.2f\n", ile ((float) 0.5)); 00534 tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75)); 00535 tprintf ("Max=%d\n", (INT32) (ile ((float) 0.99999))); 00536 tprintf ("Mean= %.2f\n", mean ()); 00537 tprintf ("SD= %.2f\n", sd ()); 00538 }
float STATS::sd | ( | ) |
Find the standard deviation of a stats class.
Definition at line 191 of file statistc.cpp.
References buckets, NULL, rangemax, rangemin, and total_count.
Referenced by print(), and short_print().
00191 { //standard deviation 00192 INT32 index; //current index 00193 INT32 sum; //sum of cells 00194 INT32 sqsum; //sum of squares 00195 float variance; 00196 00197 if (buckets == NULL) { 00198 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00199 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00200 "Empty stats"); */ 00201 return (float) 0.0; 00202 } 00203 for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0; 00204 index--) { 00205 //sum all buckets 00206 sum += index * buckets[index]; 00207 //and squares 00208 sqsum += index * index * buckets[index]; 00209 } 00210 if (total_count > 0) { 00211 variance = sum / ((float) total_count); 00212 variance = sqsum / ((float) total_count) - variance * variance; 00213 return (float) sqrt (variance); 00214 } 00215 else 00216 return (float) 0.0; 00217 }
Alter the range on an existing stats element.
Definition at line 69 of file statistc.cpp.
References alloc_mem(), buckets, clear(), free_mem(), NULL, rangemax, and rangemin.
Referenced by cluster(), compute_block_xheight(), TO_ROW::compute_vertical_projection(), fix_row_pitch(), test_underline(), and try_doc_fixed().
00072 { 00073 00074 if (max <= min) { 00075 return false; 00076 } 00077 rangemin = min; //setup 00078 rangemax = max; 00079 if (buckets != NULL) 00080 free_mem(buckets); //no longer want it 00081 buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32)); 00082 /* if (buckets==NULL) 00083 return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, 00084 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00085 "No memory for stats, Min=%d, Max=%d",min,max);*/ 00086 00087 this->clear (); //zero it 00088 return true; 00089 }
void STATS::short_print | ( | FILE * | fp, | |
BOOL8 | dump | |||
) |
Print a summary of the stats and optionally a dump of the table.
BUT ONLY THE PART OF THE TABLE BETWEEN MIN AND MAX
Definition at line 583 of file statistc.cpp.
References buckets, ile(), max, max_bucket(), mean(), min, min_bucket(), NULL, rangemin, sd(), total_count, and tprintf().
Referenced by block_space_stat(), and row_space_stat().
00586 { 00587 INT32 index; //table index 00588 INT32 min = min_bucket (); 00589 INT32 max = max_bucket (); 00590 00591 if (buckets == NULL) { 00592 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00593 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00594 "Empty stats"); */ 00595 return; 00596 } 00597 if (dump) { 00598 for (index = min; index <= max; index++) { 00599 tprintf ("%4d:%-3d ", rangemin + index, buckets[index]); 00600 if ((index - min) % 8 == 7) 00601 tprintf ("\n"); 00602 } 00603 tprintf ("\n"); 00604 } 00605 00606 tprintf ("Total count=%d\n", total_count); 00607 tprintf ("Min=%d Really=%d\n", (INT32) (ile ((float) 0.0)), min); 00608 tprintf ("Max=%d Really=%d\n", (INT32) (ile ((float) 1.1)), max); 00609 tprintf ("Range=%d\n", max + 1 - min); 00610 tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25)); 00611 tprintf ("Median=%.2f\n", ile ((float) 0.5)); 00612 tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75)); 00613 tprintf ("Mean= %.2f\n", mean ()); 00614 tprintf ("SD= %.2f\n", sd ()); 00615 }
void STATS::smooth | ( | INT32 | factor | ) |
Apply a triangular smoothing filter to the stats.
This makes the modes a bit more useful. The factor gives the height of the triangle, i.e. the weight of the centre.
Definition at line 289 of file statistc.cpp.
References add(), buckets, NULL, rangemax, rangemin, and total_count.
Referenced by block_space_stat(), row_pitch_stats(), row_space_stat(), row_words(), and row_words2().
00291 { 00292 INT32 entry; //bucket index 00293 INT32 offset; //from entry 00294 INT32 entrycount; //no of entries 00295 INT32 bucket; //new smoothed pile 00296 //output stats 00297 STATS result(rangemin, rangemax); 00298 00299 if (buckets == NULL) { 00300 /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, 00301 ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, 00302 "Empty stats"); */ 00303 return; 00304 } 00305 if (factor < 2) 00306 return; //is a no-op 00307 entrycount = rangemax - rangemin; 00308 for (entry = 0; entry < entrycount; entry++) { 00309 //centre weight 00310 bucket = buckets[entry] * factor; 00311 for (offset = 1; offset < factor; offset++) { 00312 if (entry - offset >= 0) 00313 bucket += buckets[entry - offset] * (factor - offset); 00314 if (entry + offset < entrycount) 00315 bucket += buckets[entry + offset] * (factor - offset); 00316 } 00317 result.add (entry + rangemin, bucket); 00318 } 00319 total_count = result.total_count; 00320 memcpy (buckets, result.buckets, entrycount * sizeof (INT32)); 00321 }
INT32* STATS::buckets [private] |
Definition at line 35 of file statistc.h.
Referenced by add(), clear(), cluster(), ile(), local_min(), max_bucket(), mean(), median(), min_bucket(), mode(), plot(), plotline(), print(), sd(), set_range(), short_print(), smooth(), STATS(), and ~STATS().
INT32 STATS::rangemax [private] |
Definition at line 33 of file statistc.h.
Referenced by add(), clear(), cluster(), ile(), local_min(), max_bucket(), mean(), min_bucket(), mode(), plot(), plotline(), print(), sd(), set_range(), smooth(), and STATS().
INT32 STATS::rangemin [private] |
Definition at line 32 of file statistc.h.
Referenced by add(), clear(), cluster(), ile(), local_min(), max_bucket(), mean(), median(), min_bucket(), mode(), plot(), plotline(), print(), sd(), set_range(), short_print(), smooth(), and STATS().
INT32 STATS::total_count [private] |