00001
00020 #include "mfcpch.h"
00021 #include <string.h>
00022 #include <math.h>
00023 #include <stdlib.h>
00024 #include "memry.h"
00025
00026 #include "tprintf.h"
00027 #include "statistc.h"
00028
00029 #define SEED1 0x1234 //default seeds
00030 #define SEED2 0x5678
00031 #define SEED3 0x9abc
00032
00036 STATS::STATS(
00037 INT32 min,
00038 INT32 max
00039 ) {
00040
00041 if (max <= min) {
00042
00043
00044
00045 min = 0;
00046 max = 1;
00047 }
00048 rangemin = min;
00049 rangemax = max;
00050 buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));
00051 if (buckets != NULL)
00052 this->clear ();
00053
00054
00055
00056
00057 }
00058
00059
00060 STATS::STATS() {
00061 rangemax = 0;
00062 rangemin = 0;
00063 buckets = NULL;
00064 }
00065
00069 bool STATS::set_range(
00070 INT32 min,
00071 INT32 max
00072 ) {
00073
00074 if (max <= min) {
00075 return false;
00076 }
00077 rangemin = min;
00078 rangemax = max;
00079 if (buckets != NULL)
00080 free_mem(buckets);
00081 buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));
00082
00083
00084
00085
00086
00087 this->clear ();
00088 return true;
00089 }
00090
00091
00095 void STATS::clear() {
00096 total_count = 0;
00097 if (buckets != NULL)
00098 memset (buckets, 0, (rangemax - rangemin) * sizeof (INT32));
00099
00100 }
00101
00102
00106 STATS::~STATS (
00107 ) {
00108 if (buckets != NULL) {
00109 free_mem(buckets);
00110 buckets = NULL;
00111 }
00112 }
00113
00114
00118 void STATS::add(
00119 INT32 value,
00120 INT32 count
00121 ) {
00122 if (buckets == NULL) {
00123
00124
00125
00126 return;
00127 }
00128 if (value <= rangemin)
00129 buckets[0] += count;
00130 else if (value >= rangemax)
00131 buckets[rangemax - rangemin - 1] += count;
00132 else
00133
00134 buckets[value - rangemin] += count;
00135 total_count += count;
00136 }
00137
00141 INT32 STATS::mode() {
00142 INT32 index;
00143 INT32 max;
00144 INT32 maxindex;
00145
00146 if (buckets == NULL) {
00147
00148
00149
00150 return rangemin;
00151 }
00152 for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0;
00153 index--) {
00154 if (buckets[index] > max) {
00155 max = buckets[index];
00156 maxindex = index;
00157 }
00158 }
00159 return maxindex + rangemin;
00160 }
00161
00162
00166 float STATS::mean() {
00167 INT32 index;
00168 INT32 sum;
00169
00170 if (buckets == NULL) {
00171
00172
00173
00174 return (float) rangemin;
00175 }
00176 for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) {
00177
00178 sum += index * buckets[index];
00179 }
00180 if (total_count > 0)
00181
00182 return (float) sum / total_count + rangemin;
00183 else
00184 return (float) rangemin;
00185 }
00186
00187
00191 float STATS::sd() {
00192 INT32 index;
00193 INT32 sum;
00194 INT32 sqsum;
00195 float variance;
00196
00197 if (buckets == NULL) {
00198
00199
00200
00201 return (float) 0.0;
00202 }
00203 for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0;
00204 index--) {
00205
00206 sum += index * buckets[index];
00207
00208 sqsum += index * index * buckets[index];
00209 }
00210 if (total_count > 0) {
00211 variance = sum / ((float) total_count);
00212 variance = sqsum / ((float) total_count) - variance * variance;
00213 return (float) sqrt (variance);
00214 }
00215 else
00216 return (float) 0.0;
00217 }
00218
00219
00220
00221
00222 float STATS::ile(
00223 float frac
00224 ) {
00225 INT32 index;
00226 INT32 sum;
00227 float target;
00228
00229 if (buckets == NULL) {
00230
00231
00232
00233 return (float) rangemin;
00234 }
00235 target = frac * total_count;
00236 if (target <= 0)
00237 target = (float) 1;
00238 if (target > total_count)
00239 target = (float) total_count;
00240 for (sum = 0, index = 0; index < rangemax - rangemin
00241 && sum < target; sum += buckets[index], index++);
00242 if (index > 0)
00243 return rangemin + index - (sum - target) / buckets[index - 1];
00244
00245 else
00246 return (float) rangemin;
00247 }
00248
00258 float STATS::median() {
00259 float median;
00260 INT32 min_pile;
00261 INT32 median_pile;
00262 INT32 max_pile;
00263
00264 if (buckets == NULL) {
00265
00266
00267
00268 return (float) rangemin;
00269 }
00270 median = (float) ile ((float) 0.5);
00271 median_pile = (INT32) floor (median);
00272 if ((total_count > 1) && (pile_count (median_pile) == 0)) {
00273
00274 for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--);
00275
00276 for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++);
00277 median = (float) ((min_pile + max_pile) / 2.0);
00278 }
00279 return median;
00280 }
00281
00282
00289 void STATS::smooth(
00290 INT32 factor
00291 ) {
00292 INT32 entry;
00293 INT32 offset;
00294 INT32 entrycount;
00295 INT32 bucket;
00296
00297 STATS result(rangemin, rangemax);
00298
00299 if (buckets == NULL) {
00300
00301
00302
00303 return;
00304 }
00305 if (factor < 2)
00306 return;
00307 entrycount = rangemax - rangemin;
00308 for (entry = 0; entry < entrycount; entry++) {
00309
00310 bucket = buckets[entry] * factor;
00311 for (offset = 1; offset < factor; offset++) {
00312 if (entry - offset >= 0)
00313 bucket += buckets[entry - offset] * (factor - offset);
00314 if (entry + offset < entrycount)
00315 bucket += buckets[entry + offset] * (factor - offset);
00316 }
00317 result.add (entry + rangemin, bucket);
00318 }
00319 total_count = result.total_count;
00320 memcpy (buckets, result.buckets, entrycount * sizeof (INT32));
00321 }
00322
00323
00337 INT32 STATS::cluster(
00338 float lower,
00339 float upper,
00340 float multiple,
00341 INT32 max_clusters,
00342 STATS *clusters
00343 ) {
00344 BOOL8 new_cluster;
00345 float *centres;
00346 INT32 entry;
00347 INT32 cluster;
00348 INT32 best_cluster;
00349 INT32 new_centre = 0;
00350 INT32 new_mode;
00351 INT32 count;
00352 float dist;
00353 float min_dist;
00354 INT32 cluster_count;
00355
00356 if (max_clusters < 1)
00357 return 0;
00358 if (buckets == NULL) {
00359
00360
00361
00362 return 0;
00363 }
00364 centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float));
00365 if (centres == NULL) {
00366
00367
00368
00369 return 0;
00370 }
00371 for (cluster_count = 1; cluster_count <= max_clusters
00372 && clusters[cluster_count].buckets != NULL
00373 && clusters[cluster_count].total_count > 0; cluster_count++) {
00374 centres[cluster_count] =
00375 (float) clusters[cluster_count].ile ((float) 0.5);
00376 new_centre = clusters[cluster_count].mode ();
00377 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00378 && entry >= rangemin
00379 && pile_count (entry) <= pile_count (entry + 1); entry--) {
00380 count = pile_count (entry) - clusters[0].pile_count (entry);
00381 if (count > 0) {
00382 clusters[cluster_count].add (entry, count);
00383 clusters[0].add (entry, count);
00384 }
00385 }
00386 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00387 && entry < rangemax
00388 && pile_count (entry) <= pile_count (entry - 1); entry++) {
00389 count = pile_count (entry) - clusters[0].pile_count (entry);
00390 if (count > 0) {
00391 clusters[cluster_count].add (entry, count);
00392 clusters[0].add (entry, count);
00393 }
00394 }
00395 }
00396 cluster_count--;
00397
00398 if (cluster_count == 0) {
00399 clusters[0].set_range (rangemin, rangemax);
00400 }
00401 do {
00402 new_cluster = FALSE;
00403 new_mode = 0;
00404 for (entry = 0; entry < rangemax - rangemin; entry++) {
00405 count = buckets[entry] - clusters[0].buckets[entry];
00406
00407 if (count > 0) {
00408 min_dist = (float) MAX_INT32;
00409 best_cluster = 0;
00410 for (cluster = 1; cluster <= cluster_count; cluster++) {
00411 dist = entry + rangemin - centres[cluster];
00412
00413 if (dist < 0)
00414 dist = -dist;
00415 if (dist < min_dist) {
00416 min_dist = dist;
00417 best_cluster = cluster;
00418 }
00419 }
00420 if (min_dist > upper
00421 && (best_cluster == 0
00422 || entry + rangemin > centres[best_cluster] * multiple
00423 || entry + rangemin < centres[best_cluster] / multiple)) {
00424 if (count > new_mode) {
00425 new_mode = count;
00426 new_centre = entry + rangemin;
00427 }
00428 }
00429 }
00430 }
00431 if (new_mode > 0 && cluster_count < max_clusters) {
00432 cluster_count++;
00433 new_cluster = TRUE;
00434 if (!clusters[cluster_count].set_range (rangemin, rangemax))
00435 return 0;
00436 centres[cluster_count] = (float) new_centre;
00437 clusters[cluster_count].add (new_centre, new_mode);
00438 clusters[0].add (new_centre, new_mode);
00439 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00440 && entry >= rangemin
00441 && pile_count (entry) <= pile_count (entry + 1); entry--) {
00442 count = pile_count (entry) - clusters[0].pile_count (entry);
00443 if (count > 0) {
00444 clusters[cluster_count].add (entry, count);
00445 clusters[0].add (entry, count);
00446 }
00447 }
00448 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00449 && entry < rangemax
00450 && pile_count (entry) <= pile_count (entry - 1); entry++) {
00451 count = pile_count (entry) - clusters[0].pile_count (entry);
00452 if (count > 0) {
00453 clusters[cluster_count].add (entry, count);
00454 clusters[0].add (entry, count);
00455 }
00456 }
00457 centres[cluster_count] =
00458 (float) clusters[cluster_count].ile ((float) 0.5);
00459 }
00460 }
00461 while (new_cluster && cluster_count < max_clusters);
00462 free_mem(centres);
00463 return cluster_count;
00464 }
00465
00474 BOOL8 STATS::local_min(
00475 INT32 x
00476 ) {
00477 INT32 index;
00478
00479 if (buckets == NULL) {
00480
00481
00482
00483 return FALSE;
00484 }
00485 if (x < rangemin)
00486 x = rangemin;
00487 if (x >= rangemax)
00488 x = rangemax - 1;
00489 x -= rangemin;
00490 if (buckets[x] == 0)
00491 return TRUE;
00492 for (index = x - 1; index >= 0 && buckets[index] == buckets[x]; index--);
00493 if (index >= 0 && buckets[index] < buckets[x])
00494 return FALSE;
00495 for (index = x + 1; index < rangemax - rangemin
00496 && buckets[index] == buckets[x]; index++);
00497 if (index < rangemax - rangemin && buckets[index] < buckets[x])
00498 return FALSE;
00499 else
00500 return TRUE;
00501 }
00502
00503
00509 void STATS::print(
00510 FILE *,
00511 BOOL8 dump
00512 ) {
00513 INT32 index;
00514
00515 if (buckets == NULL) {
00516
00517
00518
00519 return;
00520 }
00521 if (dump) {
00522 for (index = 0; index < rangemax - rangemin; index++) {
00523 tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
00524 if (index % 8 == 7)
00525 tprintf ("\n");
00526 }
00527 tprintf ("\n");
00528 }
00529
00530 tprintf ("Total count=%d\n", total_count);
00531 tprintf ("Min=%d\n", (INT32) (ile ((float) 0.0)));
00532 tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
00533 tprintf ("Median=%.2f\n", ile ((float) 0.5));
00534 tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
00535 tprintf ("Max=%d\n", (INT32) (ile ((float) 0.99999)));
00536 tprintf ("Mean= %.2f\n", mean ());
00537 tprintf ("SD= %.2f\n", sd ());
00538 }
00539
00540
00544 INT32 STATS::min_bucket() {
00545 INT32 min;
00546
00547 if (buckets == NULL) {
00548
00549
00550
00551 return rangemin;
00552 }
00553
00554 for (min = 0; (min < rangemax - rangemin) && (buckets[min] == 0); min++);
00555 return rangemin + min;
00556 }
00557
00558
00562 INT32 STATS::max_bucket() {
00563 INT32 max;
00564
00565 if (buckets == NULL) {
00566
00567
00568
00569 return rangemin;
00570 }
00571
00572 for (max = rangemax - rangemin - 1;
00573 (max > 0) && (buckets[max] == 0); max--);
00574 return rangemin + max;
00575 }
00576
00577
00583 void STATS::short_print(
00584 FILE *,
00585 BOOL8 dump
00586 ) {
00587 INT32 index;
00588 INT32 min = min_bucket ();
00589 INT32 max = max_bucket ();
00590
00591 if (buckets == NULL) {
00592
00593
00594
00595 return;
00596 }
00597 if (dump) {
00598 for (index = min; index <= max; index++) {
00599 tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
00600 if ((index - min) % 8 == 7)
00601 tprintf ("\n");
00602 }
00603 tprintf ("\n");
00604 }
00605
00606 tprintf ("Total count=%d\n", total_count);
00607 tprintf ("Min=%d Really=%d\n", (INT32) (ile ((float) 0.0)), min);
00608 tprintf ("Max=%d Really=%d\n", (INT32) (ile ((float) 1.1)), max);
00609 tprintf ("Range=%d\n", max + 1 - min);
00610 tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
00611 tprintf ("Median=%.2f\n", ile ((float) 0.5));
00612 tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
00613 tprintf ("Mean= %.2f\n", mean ());
00614 tprintf ("SD= %.2f\n", sd ());
00615 }
00616
00617
00621 #ifndef GRAPHICS_DISABLED
00622 void STATS::plot(
00623 WINDOW window,
00624 float xorigin,
00625 float yorigin,
00626 float xscale,
00627 float yscale,
00628 COLOUR colour
00629 ) {
00630 INT32 index;
00631
00632 if (buckets == NULL) {
00633
00634
00635
00636 return;
00637 }
00638 interior_style (window, INT_HOLLOW, 1);
00639 perimeter_color_index(window, colour);
00640
00641 for (index = 0; index < rangemax - rangemin; index++) {
00642 rectangle (window, xorigin + xscale * index, yorigin,
00643 xorigin + xscale * (index + 1),
00644 yorigin + yscale * buckets[index]);
00645 }
00646 }
00647 #endif
00648
00649
00653 #ifndef GRAPHICS_DISABLED
00654 void STATS::plotline(
00655 WINDOW window,
00656 float xorigin,
00657 float yorigin,
00658 float xscale,
00659 float yscale,
00660 COLOUR colour
00661 ) {
00662 INT32 index;
00663
00664 if (buckets == NULL) {
00665
00666
00667
00668 return;
00669 }
00670 line_color_index(window, colour);
00671 line_type(window, SOLID);
00672
00673 move2d (window, xorigin, yorigin + yscale * buckets[0]);
00674 for (index = 0; index < rangemax - rangemin; index++) {
00675 draw2d (window, xorigin + xscale * index,
00676 yorigin + yscale * buckets[index]);
00677 }
00678 }
00679 #endif
00680
00681
00692 DLLSYM INT32 choose_nth_item(
00693 INT32 index,
00694 float *array,
00695 INT32 count
00696 ) {
00697 static UINT16 seeds[3] = { SEED1, SEED2, SEED3 };
00698
00699 INT32 next_sample;
00700 INT32 next_lesser;
00701 INT32 prev_greater;
00702 INT32 equal_count;
00703 float pivot;
00704 float sample;
00705
00706 if (count <= 1)
00707 return 0;
00708 if (count == 2) {
00709 if (array[0] < array[1]) {
00710 return index >= 1 ? 1 : 0;
00711 }
00712 else {
00713 return index >= 1 ? 0 : 1;
00714 }
00715 }
00716 else {
00717 if (index < 0)
00718 index = 0;
00719 else if (index >= count)
00720 index = count - 1;
00721 #ifdef __UNIX__
00722 equal_count = (INT32) (nrand48 (seeds) % count);
00723 #else
00724 equal_count = (INT32) (rand () % count);
00725 #endif
00726 pivot = array[equal_count];
00727 array[equal_count] = array[0];
00728 next_lesser = 0;
00729 prev_greater = count;
00730 equal_count = 1;
00731 for (next_sample = 1; next_sample < prev_greater;) {
00732 sample = array[next_sample];
00733 if (sample < pivot) {
00734 array[next_lesser++] = sample;
00735 next_sample++;
00736 }
00737 else if (sample > pivot) {
00738 prev_greater--;
00739 array[next_sample] = array[prev_greater];
00740 array[prev_greater] = sample;
00741 }
00742 else {
00743 equal_count++;
00744 next_sample++;
00745 }
00746 }
00747 for (next_sample = next_lesser; next_sample < prev_greater;)
00748 array[next_sample++] = pivot;
00749 if (index < next_lesser)
00750 return choose_nth_item (index, array, next_lesser);
00751 else if (index < prev_greater)
00752 return next_lesser;
00753 else
00754 return choose_nth_item (index - prev_greater,
00755 array + prev_greater,
00756 count - prev_greater) + prev_greater;
00757 }
00758 }
00759
00760
00765 DLLSYM INT32
00766 choose_nth_item (
00767 INT32 index,
00768 void *array,
00769 INT32 count,
00770 size_t size,
00771
00772 int (*compar) (const void *, const void *)
00773 ) {
00774 static UINT16 seeds[3] = { SEED1, SEED2, SEED3 };
00775
00776 int result;
00777 INT32 next_sample;
00778 INT32 next_lesser;
00779 INT32 prev_greater;
00780 INT32 equal_count;
00781 INT32 pivot;
00782
00783 if (count <= 1)
00784 return 0;
00785 if (count == 2) {
00786 if (compar (array, (char *) array + size) < 0) {
00787 return index >= 1 ? 1 : 0;
00788 }
00789 else {
00790 return index >= 1 ? 0 : 1;
00791 }
00792 }
00793 if (index < 0)
00794 index = 0;
00795 else if (index >= count)
00796 index = count - 1;
00797 #ifdef __UNIX__
00798 pivot = (INT32) (nrand48 (seeds) % count);
00799 #else
00800 pivot = (INT32) (rand () % count);
00801 #endif
00802 swap_entries (array, size, pivot, 0);
00803 next_lesser = 0;
00804 prev_greater = count;
00805 equal_count = 1;
00806 for (next_sample = 1; next_sample < prev_greater;) {
00807 result =
00808 compar ((char *) array + size * next_sample,
00809 (char *) array + size * next_lesser);
00810 if (result < 0) {
00811 swap_entries (array, size, next_lesser++, next_sample++);
00812
00813 }
00814 else if (result > 0) {
00815 prev_greater--;
00816 swap_entries(array, size, prev_greater, next_sample);
00817 }
00818 else {
00819 equal_count++;
00820 next_sample++;
00821 }
00822 }
00823 if (index < next_lesser)
00824 return choose_nth_item (index, array, next_lesser, size, compar);
00825 else if (index < prev_greater)
00826 return next_lesser;
00827 else
00828 return choose_nth_item (index - prev_greater,
00829 (char *) array + size * prev_greater,
00830 count - prev_greater, size,
00831 compar) + prev_greater;
00832 }
00833
00834
00838 void swap_entries(
00839 void *array,
00840 size_t size,
00841 INT32 index1,
00842 INT32 index2) {
00843 char tmp;
00844 char *ptr1;
00845 char *ptr2;
00846 size_t count;
00847
00848 ptr1 = (char *) array + index1 * size;
00849 ptr2 = (char *) array + index2 * size;
00850 for (count = 0; count < size; count++) {
00851 tmp = *ptr1;
00852 *ptr1++ = *ptr2;
00853 *ptr2++ = tmp;
00854 }
00855 }