ccstruct/statistc.cpp

Go to the documentation of this file.
00001 
00020 #include          "mfcpch.h"     //precompiled headers
00021 #include          <string.h>
00022 #include          <math.h>
00023 #include          <stdlib.h>
00024 #include          "memry.h"
00025 //#include                                      "ipeerr.h"
00026 #include          "tprintf.h"
00027 #include          "statistc.h"
00028 
00029 #define SEED1       0x1234       //default seeds
00030 #define SEED2       0x5678
00031 #define SEED3       0x9abc
00032 
00036 STATS::STATS(            //constructor
00037              INT32 min,  // min of range
00038              INT32 max   // max of range
00039             ) {
00040 
00041   if (max <= min) {
00042     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00043             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00044             "Illegal range for stats, Min=%d, Max=%d",min,max);*/
00045     min = 0;
00046     max = 1;
00047   }
00048   rangemin = min;                //setup
00049   rangemax = max;
00050   buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));
00051   if (buckets != NULL)
00052     this->clear ();              //zero it
00053   /*   else
00054      err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
00055      ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00056      "No memory for stats, Min=%d, Max=%d",min,max); */
00057 }
00058 
00059 
00060 STATS::STATS() {  //constructor
00061   rangemax = 0;   //empty
00062   rangemin = 0;
00063   buckets = NULL;
00064 }
00065 
00069 bool STATS::set_range(            //constructor
00070                       INT32 min,  //min of range
00071                       INT32 max   //max of range
00072                      ) {
00073 
00074   if (max <= min) {
00075     return false;
00076   }
00077   rangemin = min;                //setup
00078   rangemax = max;
00079   if (buckets != NULL)
00080     free_mem(buckets);  //no longer want it
00081   buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));
00082   /*  if (buckets==NULL)
00083       return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
00084           ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00085           "No memory for stats, Min=%d, Max=%d",min,max);*/
00086 
00087   this->clear ();                //zero it
00088   return true;
00089 }
00090 
00091 
00095 void STATS::clear() {  //clear out buckets
00096   total_count = 0;
00097   if (buckets != NULL)
00098     memset (buckets, 0, (rangemax - rangemin) * sizeof (INT32));
00099   //zero it
00100 }
00101 
00102 
00106 STATS::~STATS (                  //destructor
00107 ) {
00108   if (buckets != NULL) {
00109     free_mem(buckets); 
00110     buckets = NULL;
00111   }
00112 }
00113 
00114 
00118 void STATS::add(              //add sample
00119                 INT32 value,  //bucket
00120                 INT32 count   //no to add
00121                ) {
00122   if (buckets == NULL) {
00123     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00124             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00125             "Empty stats");*/
00126     return;
00127   }
00128   if (value <= rangemin)
00129     buckets[0] += count;         //silently clip to range
00130   else if (value >= rangemax)
00131     buckets[rangemax - rangemin - 1] += count;
00132   else
00133                                  //add count to cell
00134     buckets[value - rangemin] += count;
00135   total_count += count;          //keep count of total
00136 }
00137 
00141 INT32 STATS::mode() {  //get mode of samples
00142   INT32 index;         // current index
00143   INT32 max;           // max cell count
00144   INT32 maxindex;      // index of max
00145 
00146   if (buckets == NULL) {
00147     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00148             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00149             "Empty stats");*/
00150     return rangemin;
00151   }
00152   for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0;
00153   index--) {
00154     if (buckets[index] > max) {
00155       max = buckets[index];      //find biggest
00156       maxindex = index;
00157     }
00158   }
00159   return maxindex + rangemin;    //index of biggest
00160 }
00161 
00162 
00166 float STATS::mean() {  //get mean of samples
00167   INT32 index;                   //current index
00168   INT32 sum;                     //sum of cells
00169 
00170   if (buckets == NULL) {
00171     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00172             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00173             "Empty stats");*/
00174     return (float) rangemin;
00175   }
00176   for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) {
00177                                  //sum all buckets
00178     sum += index * buckets[index];
00179   }
00180   if (total_count > 0)
00181                                  //mean value
00182     return (float) sum / total_count + rangemin;
00183   else
00184     return (float) rangemin;     //no mean
00185 }
00186 
00187 
00191 float STATS::sd() {  //standard deviation
00192   INT32 index;                   //current index
00193   INT32 sum;                     //sum of cells
00194   INT32 sqsum;                   //sum of squares
00195   float variance;
00196 
00197   if (buckets == NULL) {
00198     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00199        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00200        "Empty stats"); */
00201     return (float) 0.0;
00202   }
00203   for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0;
00204   index--) {
00205                                  //sum all buckets
00206     sum += index * buckets[index];
00207                                  //and squares
00208     sqsum += index * index * buckets[index];
00209   }
00210   if (total_count > 0) {
00211     variance = sum / ((float) total_count);
00212     variance = sqsum / ((float) total_count) - variance * variance;
00213     return (float) sqrt (variance);
00214   }
00215   else
00216     return (float) 0.0;
00217 }
00218 
00219 /*
00220 \brief Find an arbitrary %ile of a stats class.
00221 */
00222 float STATS::ile(            //percentile
00223                  float frac  //fraction to find
00224                 ) {
00225   INT32 index;                   //current index
00226   INT32 sum;                     //sum of cells
00227   float target;                  //target value
00228 
00229   if (buckets == NULL) {
00230     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00231        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00232        "Empty stats"); */
00233     return (float) rangemin;
00234   }
00235   target = frac * total_count;
00236   if (target <= 0)
00237     target = (float) 1;
00238   if (target > total_count)
00239     target = (float) total_count;
00240   for (sum = 0, index = 0; index < rangemax - rangemin
00241     && sum < target; sum += buckets[index], index++);
00242   if (index > 0)
00243     return rangemin + index - (sum - target) / buckets[index - 1];
00244   //better than just ints
00245   else
00246     return (float) rangemin;
00247 }
00248 
00258 float STATS::median() {  //get median
00259   float median;
00260   INT32 min_pile;
00261   INT32 median_pile;
00262   INT32 max_pile;
00263 
00264   if (buckets == NULL) {
00265     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00266             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00267             "Empty stats");*/
00268     return (float) rangemin;
00269   }
00270   median = (float) ile ((float) 0.5);
00271   median_pile = (INT32) floor (median);
00272   if ((total_count > 1) && (pile_count (median_pile) == 0)) {
00273     /* Find preceeding non zero pile */
00274     for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--);
00275     /* Find following non zero pile */
00276     for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++);
00277     median = (float) ((min_pile + max_pile) / 2.0);
00278   }
00279   return median;
00280 }
00281 
00282 
00289 void STATS::smooth(              //smooth samples
00290                    INT32 factor  //size of triangle
00291                   ) {
00292   INT32 entry;                   //bucket index
00293   INT32 offset;                  //from entry
00294   INT32 entrycount;              //no of entries
00295   INT32 bucket;                  //new smoothed pile
00296                                  //output stats
00297   STATS result(rangemin, rangemax); 
00298 
00299   if (buckets == NULL) {
00300     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00301        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00302        "Empty stats"); */
00303     return;
00304   }
00305   if (factor < 2)
00306     return;                      //is a no-op
00307   entrycount = rangemax - rangemin;
00308   for (entry = 0; entry < entrycount; entry++) {
00309                                  //centre weight
00310     bucket = buckets[entry] * factor;
00311     for (offset = 1; offset < factor; offset++) {
00312       if (entry - offset >= 0)
00313         bucket += buckets[entry - offset] * (factor - offset);
00314       if (entry + offset < entrycount)
00315         bucket += buckets[entry + offset] * (factor - offset);
00316     }
00317     result.add (entry + rangemin, bucket);
00318   }
00319   total_count = result.total_count;
00320   memcpy (buckets, result.buckets, entrycount * sizeof (INT32));
00321 }
00322 
00323 
00337 INT32 STATS::cluster(
00338                      float lower,
00339                      float upper,
00340                      float multiple,
00341                      INT32 max_clusters,
00342                      STATS *clusters
00343                     ) {
00344   BOOL8 new_cluster;             // added one
00345   float *centres;                // cluster centres
00346   INT32 entry;                   // bucket index
00347   INT32 cluster;                 // cluster index
00348   INT32 best_cluster;            // one to assign to
00349   INT32 new_centre = 0;          // residual mode
00350   INT32 new_mode;                // pile count of new_centre
00351   INT32 count;                   // pile to place
00352   float dist;                    // from cluster
00353   float min_dist;                // from best_cluster
00354   INT32 cluster_count;           // no of clusters
00355 
00356   if (max_clusters < 1)
00357     return 0;
00358   if (buckets == NULL) {
00359     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00360             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00361             "Empty stats");*/
00362     return 0;
00363   }
00364   centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float));
00365   if (centres == NULL) {
00366     /*     err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
00367        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00368        "No memory for centres"); */
00369     return 0;
00370   }
00371   for (cluster_count = 1; cluster_count <= max_clusters
00372     && clusters[cluster_count].buckets != NULL
00373   && clusters[cluster_count].total_count > 0; cluster_count++) {
00374     centres[cluster_count] =
00375       (float) clusters[cluster_count].ile ((float) 0.5);
00376     new_centre = clusters[cluster_count].mode ();
00377     for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00378       && entry >= rangemin
00379     && pile_count (entry) <= pile_count (entry + 1); entry--) {
00380       count = pile_count (entry) - clusters[0].pile_count (entry);
00381       if (count > 0) {
00382         clusters[cluster_count].add (entry, count);
00383         clusters[0].add (entry, count);
00384       }
00385     }
00386     for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00387       && entry < rangemax
00388     && pile_count (entry) <= pile_count (entry - 1); entry++) {
00389       count = pile_count (entry) - clusters[0].pile_count (entry);
00390       if (count > 0) {
00391         clusters[cluster_count].add (entry, count);
00392         clusters[0].add (entry, count);
00393       }
00394     }
00395   }
00396   cluster_count--;
00397 
00398   if (cluster_count == 0) {
00399     clusters[0].set_range (rangemin, rangemax);
00400   }
00401   do {
00402     new_cluster = FALSE;
00403     new_mode = 0;
00404     for (entry = 0; entry < rangemax - rangemin; entry++) {
00405       count = buckets[entry] - clusters[0].buckets[entry];
00406       // remaining pile
00407       if (count > 0) {           // any to handle
00408         min_dist = (float) MAX_INT32;
00409         best_cluster = 0;
00410         for (cluster = 1; cluster <= cluster_count; cluster++) {
00411           dist = entry + rangemin - centres[cluster];
00412           // find distance
00413           if (dist < 0)
00414             dist = -dist;
00415           if (dist < min_dist) {
00416             min_dist = dist;     // find least
00417             best_cluster = cluster;
00418           }
00419         }
00420         if (min_dist > upper     // far enough for new
00421           && (best_cluster == 0
00422           || entry + rangemin > centres[best_cluster] * multiple
00423         || entry + rangemin < centres[best_cluster] / multiple)) {
00424           if (count > new_mode) {
00425             new_mode = count;
00426             new_centre = entry + rangemin;
00427           }
00428         }
00429       }
00430     }
00431     if (new_mode > 0 && cluster_count < max_clusters) { // need new and room
00432       cluster_count++;
00433       new_cluster = TRUE;
00434       if (!clusters[cluster_count].set_range (rangemin, rangemax))
00435         return 0;
00436       centres[cluster_count] = (float) new_centre;
00437       clusters[cluster_count].add (new_centre, new_mode);
00438       clusters[0].add (new_centre, new_mode);
00439       for (entry = new_centre - 1; centres[cluster_count] - entry < lower
00440         && entry >= rangemin
00441       && pile_count (entry) <= pile_count (entry + 1); entry--) {
00442         count = pile_count (entry) - clusters[0].pile_count (entry);
00443         if (count > 0) {
00444           clusters[cluster_count].add (entry, count);
00445           clusters[0].add (entry, count);
00446         }
00447       }
00448       for (entry = new_centre + 1; entry - centres[cluster_count] < lower
00449         && entry < rangemax
00450       && pile_count (entry) <= pile_count (entry - 1); entry++) {
00451         count = pile_count (entry) - clusters[0].pile_count (entry);
00452         if (count > 0) {
00453           clusters[cluster_count].add (entry, count);
00454           clusters[0].add (entry, count);
00455         }
00456       }
00457       centres[cluster_count] =
00458         (float) clusters[cluster_count].ile ((float) 0.5);
00459     }
00460   }
00461   while (new_cluster && cluster_count < max_clusters);
00462   free_mem(centres); 
00463   return cluster_count;
00464 }
00465 
00474 BOOL8 STATS::local_min(
00475                        INT32 x
00476                       ) {
00477   INT32 index;                   // table index
00478 
00479   if (buckets == NULL) {
00480     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00481             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00482             "Empty stats");*/
00483     return FALSE;
00484   }
00485   if (x < rangemin)
00486     x = rangemin;
00487   if (x >= rangemax)
00488     x = rangemax - 1;
00489   x -= rangemin;
00490   if (buckets[x] == 0)
00491     return TRUE;
00492   for (index = x - 1; index >= 0 && buckets[index] == buckets[x]; index--);
00493   if (index >= 0 && buckets[index] < buckets[x])
00494     return FALSE;
00495   for (index = x + 1; index < rangemax - rangemin
00496     && buckets[index] == buckets[x]; index++);
00497   if (index < rangemax - rangemin && buckets[index] < buckets[x])
00498     return FALSE;
00499   else
00500     return TRUE;
00501 }
00502 
00503 
00509 void STATS::print(
00510                   FILE *,
00511                   BOOL8 dump
00512                  ) {
00513   INT32 index;                   // table index
00514 
00515   if (buckets == NULL) {
00516     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00517        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00518        "Empty stats"); */
00519     return;
00520   }
00521   if (dump) {
00522     for (index = 0; index < rangemax - rangemin; index++) {
00523       tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
00524       if (index % 8 == 7)
00525         tprintf ("\n");
00526     }
00527     tprintf ("\n");
00528   }
00529 
00530   tprintf ("Total count=%d\n", total_count);
00531   tprintf ("Min=%d\n", (INT32) (ile ((float) 0.0)));
00532   tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
00533   tprintf ("Median=%.2f\n", ile ((float) 0.5));
00534   tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
00535   tprintf ("Max=%d\n", (INT32) (ile ((float) 0.99999)));
00536   tprintf ("Mean= %.2f\n", mean ());
00537   tprintf ("SD= %.2f\n", sd ());
00538 }
00539 
00540 
00544 INT32 STATS::min_bucket() {  //Find min
00545   INT32 min;
00546 
00547   if (buckets == NULL) {
00548     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00549             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00550             "Empty stats");*/
00551     return rangemin;
00552   }
00553 
00554   for (min = 0; (min < rangemax - rangemin) && (buckets[min] == 0); min++);
00555   return rangemin + min;
00556 }
00557 
00558 
00562 INT32 STATS::max_bucket() {  //Find max
00563   INT32 max;
00564 
00565   if (buckets == NULL) {
00566     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00567             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00568             "Empty stats");*/
00569     return rangemin;
00570   }
00571 
00572   for (max = rangemax - rangemin - 1;
00573     (max > 0) && (buckets[max] == 0); max--);
00574   return rangemin + max;
00575 }
00576 
00577 
00583 void STATS::short_print(            //print stats table
00584                         FILE *,     //Now uses tprintf instead
00585                         BOOL8 dump  //dump full table
00586                        ) {
00587   INT32 index;                   //table index
00588   INT32 min = min_bucket ();
00589   INT32 max = max_bucket ();
00590 
00591   if (buckets == NULL) {
00592     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00593        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00594        "Empty stats"); */
00595     return;
00596   }
00597   if (dump) {
00598     for (index = min; index <= max; index++) {
00599       tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
00600       if ((index - min) % 8 == 7)
00601         tprintf ("\n");
00602     }
00603     tprintf ("\n");
00604   }
00605 
00606   tprintf ("Total count=%d\n", total_count);
00607   tprintf ("Min=%d Really=%d\n", (INT32) (ile ((float) 0.0)), min);
00608   tprintf ("Max=%d Really=%d\n", (INT32) (ile ((float) 1.1)), max);
00609   tprintf ("Range=%d\n", max + 1 - min);
00610   tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
00611   tprintf ("Median=%.2f\n", ile ((float) 0.5));
00612   tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
00613   tprintf ("Mean= %.2f\n", mean ());
00614   tprintf ("SD= %.2f\n", sd ());
00615 }
00616 
00617 
00621 #ifndef GRAPHICS_DISABLED
00622 void STATS::plot(                //plot stats table
00623                  WINDOW window,  //to draw in
00624                  float xorigin,  //bottom left
00625                  float yorigin,
00626                  float xscale,   //one x unit
00627                  float yscale,   //one y unit
00628                  COLOUR colour   //colour to draw in
00629                 ) {
00630   INT32 index;                   //table index
00631 
00632   if (buckets == NULL) {
00633     /*      err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00634             ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00635             "Empty stats");*/
00636     return;
00637   }
00638   interior_style (window, INT_HOLLOW, 1);
00639   perimeter_color_index(window, colour); 
00640 
00641   for (index = 0; index < rangemax - rangemin; index++) {
00642     rectangle (window, xorigin + xscale * index, yorigin,
00643       xorigin + xscale * (index + 1),
00644       yorigin + yscale * buckets[index]);
00645   }
00646 }
00647 #endif
00648 
00649 
00653 #ifndef GRAPHICS_DISABLED
00654 void STATS::plotline(                //plot stats table
00655                      WINDOW window,  //to draw in
00656                      float xorigin,  //bottom left
00657                      float yorigin,
00658                      float xscale,   //one x unit
00659                      float yscale,   //one y unit
00660                      COLOUR colour   //colour to draw in
00661                     ) {
00662   INT32 index;                   //table index
00663 
00664   if (buckets == NULL) {
00665     /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
00666        ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
00667        "Empty stats"); */
00668     return;
00669   }
00670   line_color_index(window, colour); 
00671   line_type(window, SOLID); 
00672 
00673   move2d (window, xorigin, yorigin + yscale * buckets[0]);
00674   for (index = 0; index < rangemax - rangemin; index++) {
00675     draw2d (window, xorigin + xscale * index,
00676       yorigin + yscale * buckets[index]);
00677   }
00678 }
00679 #endif
00680 
00681 
00692 DLLSYM INT32 choose_nth_item(
00693                              INT32 index,
00694                              float *array,
00695                              INT32 count
00696                             ) {
00697   static UINT16 seeds[3] = { SEED1, SEED2, SEED3 };
00698   //for nrand
00699   INT32 next_sample;             
00700   INT32 next_lesser;             
00701   INT32 prev_greater;            
00702   INT32 equal_count;             
00703   float pivot;                   
00704   float sample;                  
00705 
00706   if (count <= 1)
00707     return 0;
00708   if (count == 2) {
00709     if (array[0] < array[1]) {
00710       return index >= 1 ? 1 : 0;
00711     }
00712     else {
00713       return index >= 1 ? 0 : 1;
00714     }
00715   }
00716   else {
00717     if (index < 0)
00718       index = 0;                 // ensure lergal
00719     else if (index >= count)
00720       index = count - 1;
00721     #ifdef __UNIX__
00722     equal_count = (INT32) (nrand48 (seeds) % count);
00723     #else
00724     equal_count = (INT32) (rand () % count);
00725     #endif
00726     pivot = array[equal_count];
00727     array[equal_count] = array[0]; // fill gap
00728     next_lesser = 0;
00729     prev_greater = count;
00730     equal_count = 1;
00731     for (next_sample = 1; next_sample < prev_greater;) {
00732       sample = array[next_sample];
00733       if (sample < pivot) {
00734         array[next_lesser++] = sample; // shuffle
00735         next_sample++;
00736       }
00737       else if (sample > pivot) {
00738         prev_greater--;
00739         array[next_sample] = array[prev_greater]; // juggle
00740         array[prev_greater] = sample;
00741       }
00742       else {
00743         equal_count++;
00744         next_sample++;
00745       }
00746     }
00747     for (next_sample = next_lesser; next_sample < prev_greater;)
00748       array[next_sample++] = pivot;
00749     if (index < next_lesser)
00750       return choose_nth_item (index, array, next_lesser);
00751     else if (index < prev_greater)
00752       return next_lesser;        
00753     else
00754       return choose_nth_item (index - prev_greater,
00755         array + prev_greater,
00756         count - prev_greater) + prev_greater;
00757   }
00758 }
00759 
00760 
00765 DLLSYM INT32
00766 choose_nth_item (                //fast median
00767 INT32 index,                     //index to choose
00768 void *array,                     //array of items
00769 INT32 count,                     //no of items
00770 size_t size,                     //element size
00771                                  //comparator
00772 int (*compar) (const void *, const void *)
00773 ) {
00774   static UINT16 seeds[3] = { SEED1, SEED2, SEED3 };
00775   //for nrand
00776   int result;                    //of compar
00777   INT32 next_sample;             //next one to do
00778   INT32 next_lesser;             //space for new
00779   INT32 prev_greater;            //last one saved
00780   INT32 equal_count;             //no of equal ones
00781   INT32 pivot;                   //proposed median
00782 
00783   if (count <= 1)
00784     return 0;
00785   if (count == 2) {
00786     if (compar (array, (char *) array + size) < 0) {
00787       return index >= 1 ? 1 : 0;
00788     }
00789     else {
00790       return index >= 1 ? 0 : 1;
00791     }
00792   }
00793   if (index < 0)
00794     index = 0;                   //ensure lergal
00795   else if (index >= count)
00796     index = count - 1;
00797   #ifdef __UNIX__
00798   pivot = (INT32) (nrand48 (seeds) % count);
00799   #else
00800   pivot = (INT32) (rand () % count);
00801   #endif
00802   swap_entries (array, size, pivot, 0);
00803   next_lesser = 0;
00804   prev_greater = count;
00805   equal_count = 1;
00806   for (next_sample = 1; next_sample < prev_greater;) {
00807     result =
00808       compar ((char *) array + size * next_sample,
00809       (char *) array + size * next_lesser);
00810     if (result < 0) {
00811       swap_entries (array, size, next_lesser++, next_sample++);
00812       //shuffle
00813     }
00814     else if (result > 0) {
00815       prev_greater--;
00816       swap_entries(array, size, prev_greater, next_sample); 
00817     }
00818     else {
00819       equal_count++;
00820       next_sample++;
00821     }
00822   }
00823   if (index < next_lesser)
00824     return choose_nth_item (index, array, next_lesser, size, compar);
00825   else if (index < prev_greater)
00826     return next_lesser;          //in equal bracket
00827   else
00828     return choose_nth_item (index - prev_greater,
00829       (char *) array + size * prev_greater,
00830       count - prev_greater, size,
00831       compar) + prev_greater;
00832 }
00833 
00834 
00838 void swap_entries(               //swap in place
00839                   void *array,   //array of entries
00840                   size_t size,   //size of entry
00841                   INT32 index1,  //entries to swap
00842                   INT32 index2) {
00843   char tmp;
00844   char *ptr1;                    //to entries
00845   char *ptr2;
00846   size_t count;                  //of bytes
00847 
00848   ptr1 = (char *) array + index1 * size;
00849   ptr2 = (char *) array + index2 * size;
00850   for (count = 0; count < size; count++) {
00851     tmp = *ptr1;
00852     *ptr1++ = *ptr2;
00853     *ptr2++ = tmp;               //tedious!
00854   }
00855 }

Generated on Wed Feb 28 19:49:08 2007 for Tesseract by  doxygen 1.5.1