00001
00019 #include "oldheap.h"
00020 #include "const.h"
00021 #include "cluster.h"
00022 #include "emalloc.h"
00023 #include "danerror.h"
00024 #include "freelist.h"
00025 #include <math.h>
00026
00027 #define HOTELLING 1 // If true use Hotelling's test to decide where to split.
00028 #define FTABLE_X 10 // Size of FTable.
00029 #define FTABLE_Y 100 // Size of FTable.
00030
00035 double FTable[FTABLE_Y][FTABLE_X] = {
00036 {4052.19,4999.52,5403.34,5624.62,5763.65,5858.97, 5928.33, 5981.10, 6022.50, 6055.85,},
00037 {98.502,99.000, 99.166, 99.249, 99.300, 99.333, 99.356, 99.374, 99.388, 99.399,},
00038 {34.116,30.816, 29.457, 28.710, 28.237, 27.911, 27.672, 27.489, 27.345, 27.229,},
00039 {21.198,18.000, 16.694, 15.977, 15.522, 15.207, 14.976, 14.799, 14.659, 14.546,},
00040 {16.258,13.274, 12.060, 11.392, 10.967, 10.672, 10.456, 10.289, 10.158, 10.051,},
00041 {13.745,10.925, 9.780, 9.148, 8.746, 8.466, 8.260, 8.102, 7.976, 7.874,},
00042 {12.246, 9.547, 8.451, 7.847, 7.460, 7.191, 6.993, 6.840, 6.719, 6.620,},
00043 {11.259, 8.649, 7.591, 7.006, 6.632, 6.371, 6.178, 6.029, 5.911, 5.814,},
00044 {10.561, 8.022, 6.992, 6.422, 6.057, 5.802, 5.613, 5.467, 5.351, 5.257,},
00045 {10.044, 7.559, 6.552, 5.994, 5.636, 5.386, 5.200, 5.057, 4.942, 4.849,},
00046 { 9.646, 7.206, 6.217, 5.668, 5.316, 5.069, 4.886, 4.744, 4.632, 4.539,},
00047 { 9.330, 6.927, 5.953, 5.412, 5.064, 4.821, 4.640, 4.499, 4.388, 4.296,},
00048 { 9.074, 6.701, 5.739, 5.205, 4.862, 4.620, 4.441, 4.302, 4.191, 4.100,},
00049 { 8.862, 6.515, 5.564, 5.035, 4.695, 4.456, 4.278, 4.140, 4.030, 3.939,},
00050 { 8.683, 6.359, 5.417, 4.893, 4.556, 4.318, 4.142, 4.004, 3.895, 3.805,},
00051 { 8.531, 6.226, 5.292, 4.773, 4.437, 4.202, 4.026, 3.890, 3.780, 3.691,},
00052 { 8.400, 6.112, 5.185, 4.669, 4.336, 4.102, 3.927, 3.791, 3.682, 3.593,},
00053 { 8.285, 6.013, 5.092, 4.579, 4.248, 4.015, 3.841, 3.705, 3.597, 3.508,},
00054 { 8.185, 5.926, 5.010, 4.500, 4.171, 3.939, 3.765, 3.631, 3.523, 3.434,},
00055 { 8.096, 5.849, 4.938, 4.431, 4.103, 3.871, 3.699, 3.564, 3.457, 3.368,},
00056 { 8.017, 5.780, 4.874, 4.369, 4.042, 3.812, 3.640, 3.506, 3.398, 3.310,},
00057 { 7.945, 5.719, 4.817, 4.313, 3.988, 3.758, 3.587, 3.453, 3.346, 3.258,},
00058 { 7.881, 5.664, 4.765, 4.264, 3.939, 3.710, 3.539, 3.406, 3.299, 3.211,},
00059 { 7.823, 5.614, 4.718, 4.218, 3.895, 3.667, 3.496, 3.363, 3.256, 3.168,},
00060 { 7.770, 5.568, 4.675, 4.177, 3.855, 3.627, 3.457, 3.324, 3.217, 3.129,},
00061 { 7.721, 5.526, 4.637, 4.140, 3.818, 3.591, 3.421, 3.288, 3.182, 3.094,},
00062 { 7.677, 5.488, 4.601, 4.106, 3.785, 3.558, 3.388, 3.256, 3.149, 3.062,},
00063 { 7.636, 5.453, 4.568, 4.074, 3.754, 3.528, 3.358, 3.226, 3.120, 3.032,},
00064 { 7.598, 5.420, 4.538, 4.045, 3.725, 3.499, 3.330, 3.198, 3.092, 3.005,},
00065 { 7.562, 5.390, 4.510, 4.018, 3.699, 3.473, 3.305, 3.173, 3.067, 2.979,},
00066 { 7.530, 5.362, 4.484, 3.993, 3.675, 3.449, 3.281, 3.149, 3.043, 2.955,},
00067 { 7.499, 5.336, 4.459, 3.969, 3.652, 3.427, 3.258, 3.127, 3.021, 2.934,},
00068 { 7.471, 5.312, 4.437, 3.948, 3.630, 3.406, 3.238, 3.106, 3.000, 2.913,},
00069 { 7.444, 5.289, 4.416, 3.927, 3.611, 3.386, 3.218, 3.087, 2.981, 2.894,},
00070 { 7.419, 5.268, 4.396, 3.908, 3.592, 3.368, 3.200, 3.069, 2.963, 2.876,},
00071 { 7.396, 5.248, 4.377, 3.890, 3.574, 3.351, 3.183, 3.052, 2.946, 2.859,},
00072 { 7.373, 5.229, 4.360, 3.873, 3.558, 3.334, 3.167, 3.036, 2.930, 2.843,},
00073 { 7.353, 5.211, 4.343, 3.858, 3.542, 3.319, 3.152, 3.021, 2.915, 2.828,},
00074 { 7.333, 5.194, 4.327, 3.843, 3.528, 3.305, 3.137, 3.006, 2.901, 2.814,},
00075 { 7.314, 5.179, 4.313, 3.828, 3.514, 3.291, 3.124, 2.993, 2.888, 2.801,},
00076 { 7.296, 5.163, 4.299, 3.815, 3.501, 3.278, 3.111, 2.980, 2.875, 2.788,},
00077 { 7.280, 5.149, 4.285, 3.802, 3.488, 3.266, 3.099, 2.968, 2.863, 2.776,},
00078 { 7.264, 5.136, 4.273, 3.790, 3.476, 3.254, 3.087, 2.957, 2.851, 2.764,},
00079 { 7.248, 5.123, 4.261, 3.778, 3.465, 3.243, 3.076, 2.946, 2.840, 2.754,},
00080 { 7.234, 5.110, 4.249, 3.767, 3.454, 3.232, 3.066, 2.935, 2.830, 2.743,},
00081 { 7.220, 5.099, 4.238, 3.757, 3.444, 3.222, 3.056, 2.925, 2.820, 2.733,},
00082 { 7.207, 5.087, 4.228, 3.747, 3.434, 3.213, 3.046, 2.916, 2.811, 2.724,},
00083 { 7.194, 5.077, 4.218, 3.737, 3.425, 3.204, 3.037, 2.907, 2.802, 2.715,},
00084 { 7.182, 5.066, 4.208, 3.728, 3.416, 3.195, 3.028, 2.898, 2.793, 2.706,},
00085 { 7.171, 5.057, 4.199, 3.720, 3.408, 3.186, 3.020, 2.890, 2.785, 2.698,},
00086 { 7.159, 5.047, 4.191, 3.711, 3.400, 3.178, 3.012, 2.882, 2.777, 2.690,},
00087 { 7.149, 5.038, 4.182, 3.703, 3.392, 3.171, 3.005, 2.874, 2.769, 2.683,},
00088 { 7.139, 5.030, 4.174, 3.695, 3.384, 3.163, 2.997, 2.867, 2.762, 2.675,},
00089 { 7.129, 5.021, 4.167, 3.688, 3.377, 3.156, 2.990, 2.860, 2.755, 2.668,},
00090 { 7.119, 5.013, 4.159, 3.681, 3.370, 3.149, 2.983, 2.853, 2.748, 2.662,},
00091 { 7.110, 5.006, 4.152, 3.674, 3.363, 3.143, 2.977, 2.847, 2.742, 2.655,},
00092 { 7.102, 4.998, 4.145, 3.667, 3.357, 3.136, 2.971, 2.841, 2.736, 2.649,},
00093 { 7.093, 4.991, 4.138, 3.661, 3.351, 3.130, 2.965, 2.835, 2.730, 2.643,},
00094 { 7.085, 4.984, 4.132, 3.655, 3.345, 3.124, 2.959, 2.829, 2.724, 2.637,},
00095 { 7.077, 4.977, 4.126, 3.649, 3.339, 3.119, 2.953, 2.823, 2.718, 2.632,},
00096 { 7.070, 4.971, 4.120, 3.643, 3.333, 3.113, 2.948, 2.818, 2.713, 2.626,},
00097 { 7.062, 4.965, 4.114, 3.638, 3.328, 3.108, 2.942, 2.813, 2.708, 2.621,},
00098 { 7.055, 4.959, 4.109, 3.632, 3.323, 3.103, 2.937, 2.808, 2.703, 2.616,},
00099 { 7.048, 4.953, 4.103, 3.627, 3.318, 3.098, 2.932, 2.803, 2.698, 2.611,},
00100 { 7.042, 4.947, 4.098, 3.622, 3.313, 3.093, 2.928, 2.798, 2.693, 2.607,},
00101 { 7.035, 4.942, 4.093, 3.618, 3.308, 3.088, 2.923, 2.793, 2.689, 2.602,},
00102 { 7.029, 4.937, 4.088, 3.613, 3.304, 3.084, 2.919, 2.789, 2.684, 2.598,},
00103 { 7.023, 4.932, 4.083, 3.608, 3.299, 3.080, 2.914, 2.785, 2.680, 2.593,},
00104 { 7.017, 4.927, 4.079, 3.604, 3.295, 3.075, 2.910, 2.781, 2.676, 2.589,},
00105 { 7.011, 4.922, 4.074, 3.600, 3.291, 3.071, 2.906, 2.777, 2.672, 2.585,},
00106 { 7.006, 4.917, 4.070, 3.596, 3.287, 3.067, 2.902, 2.773, 2.668, 2.581,},
00107 { 7.001, 4.913, 4.066, 3.591, 3.283, 3.063, 2.898, 2.769, 2.664, 2.578,},
00108 { 6.995, 4.908, 4.062, 3.588, 3.279, 3.060, 2.895, 2.765, 2.660, 2.574,},
00109 { 6.990, 4.904, 4.058, 3.584, 3.275, 3.056, 2.891, 2.762, 2.657, 2.570,},
00110 { 6.985, 4.900, 4.054, 3.580, 3.272, 3.052, 2.887, 2.758, 2.653, 2.567,},
00111 { 6.981, 4.896, 4.050, 3.577, 3.268, 3.049, 2.884, 2.755, 2.650, 2.563,},
00112 { 6.976, 4.892, 4.047, 3.573, 3.265, 3.046, 2.881, 2.751, 2.647, 2.560,},
00113 { 6.971, 4.888, 4.043, 3.570, 3.261, 3.042, 2.877, 2.748, 2.644, 2.557,},
00114 { 6.967, 4.884, 4.040, 3.566, 3.258, 3.039, 2.874, 2.745, 2.640, 2.554,},
00115 { 6.963, 4.881, 4.036, 3.563, 3.255, 3.036, 2.871, 2.742, 2.637, 2.551,},
00116 { 6.958, 4.877, 4.033, 3.560, 3.252, 3.033, 2.868, 2.739, 2.634, 2.548,},
00117 { 6.954, 4.874, 4.030, 3.557, 3.249, 3.030, 2.865, 2.736, 2.632, 2.545,},
00118 { 6.950, 4.870, 4.027, 3.554, 3.246, 3.027, 2.863, 2.733, 2.629, 2.542,},
00119 { 6.947, 4.867, 4.024, 3.551, 3.243, 3.025, 2.860, 2.731, 2.626, 2.539,},
00120 { 6.943, 4.864, 4.021, 3.548, 3.240, 3.022, 2.857, 2.728, 2.623, 2.537,},
00121 { 6.939, 4.861, 4.018, 3.545, 3.238, 3.019, 2.854, 2.725, 2.621, 2.534,},
00122 { 6.935, 4.858, 4.015, 3.543, 3.235, 3.017, 2.852, 2.723, 2.618, 2.532,},
00123 { 6.932, 4.855, 4.012, 3.540, 3.233, 3.014, 2.849, 2.720, 2.616, 2.529,},
00124 { 6.928, 4.852, 4.010, 3.538, 3.230, 3.012, 2.847, 2.718, 2.613, 2.527,},
00125 { 6.925, 4.849, 4.007, 3.535, 3.228, 3.009, 2.845, 2.715, 2.611, 2.524,},
00126 { 6.922, 4.846, 4.004, 3.533, 3.225, 3.007, 2.842, 2.713, 2.609, 2.522,},
00127 { 6.919, 4.844, 4.002, 3.530, 3.223, 3.004, 2.840, 2.711, 2.606, 2.520,},
00128 { 6.915, 4.841, 3.999, 3.528, 3.221, 3.002, 2.838, 2.709, 2.604, 2.518,},
00129 { 6.912, 4.838, 3.997, 3.525, 3.218, 3.000, 2.835, 2.706, 2.602, 2.515,},
00130 { 6.909, 4.836, 3.995, 3.523, 3.216, 2.998, 2.833, 2.704, 2.600, 2.513,},
00131 { 6.906, 4.833, 3.992, 3.521, 3.214, 2.996, 2.831, 2.702, 2.598, 2.511,},
00132 { 6.904, 4.831, 3.990, 3.519, 3.212, 2.994, 2.829, 2.700, 2.596, 2.509,},
00133 { 6.901, 4.829, 3.988, 3.517, 3.210, 2.992, 2.827, 2.698, 2.594, 2.507,},
00134 { 6.898, 4.826, 3.986, 3.515, 3.208, 2.990, 2.825, 2.696, 2.592, 2.505,},
00135 { 6.895, 4.824, 3.984, 3.513, 3.206, 2.988, 2.823, 2.694, 2.590, 2.503}
00136 };
00137
00146 #define MINVARIANCE 0.0001
00147
00158 #define MINBUCKETS 5
00159 #define MINSAMPLESPERBUCKET 5
00160 #define MINSAMPLES (MINBUCKETS * MINSAMPLESPERBUCKET)
00161 #define MINSAMPLESNEEDED 1
00162
00176 #define BUCKETTABLESIZE 1024
00177 #define NORMALEXTENT 3.0
00178
00183 typedef struct
00184 {
00185 CLUSTER *Cluster;
00186 CLUSTER *Neighbor;
00187 } TEMPCLUSTER;
00188
00193 typedef struct
00194 {
00195 FLOAT32 AvgVariance;
00196 FLOAT32 *CoVariance;
00197 FLOAT32 *Min;
00198 FLOAT32 *Max;
00199 } STATISTICS;
00200
00205 typedef struct
00206 {
00207 DISTRIBUTION Distribution;
00208 UINT32 SampleCount;
00209 FLOAT64 Confidence;
00210 FLOAT64 ChiSquared;
00211 UINT16 NumberOfBuckets;
00212 UINT16 Bucket[BUCKETTABLESIZE];
00213 UINT32 *Count;
00214 FLOAT32 *ExpectedCount;
00215 } BUCKETS;
00216
00221 typedef struct
00222 {
00223 UINT16 DegreesOfFreedom;
00224 FLOAT64 Alpha;
00225 FLOAT64 ChiSquared;
00226 } CHISTRUCT;
00227
00228 typedef FLOAT64 (*DENSITYFUNC) (INT32);
00229 typedef FLOAT64 (*SOLVEFUNC) (CHISTRUCT *, double);
00230
00231 #define Odd(N) ((N)%2)
00232 #define Mirror(N,R) ((R) - (N) - 1)
00233 #define Abs(N) ( ( (N) < 0 ) ? ( -(N) ) : (N) )
00234
00235
00236
00241 static HEAP *Heap;
00242 static TEMPCLUSTER *TempCluster;
00243 static KDTREE *Tree;
00244 static INT32 CurrentTemp;
00245
00256 #define SqrtOf2Pi 2.506628275
00257 static FLOAT64 NormalStdDev = BUCKETTABLESIZE / (2.0 * NORMALEXTENT);
00258 static FLOAT64 NormalVariance =
00259 (BUCKETTABLESIZE * BUCKETTABLESIZE) / (4.0 * NORMALEXTENT * NORMALEXTENT);
00260 static FLOAT64 NormalMagnitude =
00261 (2.0 * NORMALEXTENT) / (SqrtOf2Pi * BUCKETTABLESIZE);
00262 static FLOAT64 NormalMean = BUCKETTABLESIZE / 2;
00263
00265 static LIST OldBuckets[] = { NIL, NIL, NIL };
00266
00274 #define LOOKUPTABLESIZE 8
00275 #define MAXBUCKETS 39
00276 #define MAXDEGREESOFFREEDOM MAXBUCKETS
00277 static UINT32 CountTable[LOOKUPTABLESIZE] = {
00278 MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000
00279 };
00280 static UINT16 BucketsTable[LOOKUPTABLESIZE] = {
00281 MINBUCKETS, 16, 20, 24, 27, 30, 35, MAXBUCKETS
00282 };
00283
00284
00285
00286
00287 void CreateClusterTree(CLUSTERER *Clusterer);
00288
00289 void MakePotentialClusters(CLUSTER *Cluster, VISIT Order, INT32 Level);
00290
00291 CLUSTER *FindNearestNeighbor(KDTREE *Tree,
00292 CLUSTER *Cluster,
00293 FLOAT32 *Distance);
00294
00295 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster);
00296
00297 INT32 MergeClusters (INT16 N,
00298 register PARAM_DESC ParamDesc[],
00299 register INT32 n1,
00300 register INT32 n2,
00301 register FLOAT32 m[],
00302 register FLOAT32 m1[], register FLOAT32 m2[]);
00303
00304 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
00305
00306 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
00307 CLUSTERCONFIG *Config,
00308 CLUSTER *Cluster);
00309
00310 PROTOTYPE *MakeDegenerateProto(UINT16 N,
00311 CLUSTER *Cluster,
00312 STATISTICS *Statistics,
00313 PROTOSTYLE Style,
00314 INT32 MinSamples);
00315
00316 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
00317 CLUSTER *Cluster,
00318 STATISTICS *Statistics);
00319
00320 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,
00321 CLUSTER *Cluster,
00322 STATISTICS *Statistics,
00323 BUCKETS *Buckets);
00324
00325 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
00326 CLUSTER *Cluster,
00327 STATISTICS *Statistics,
00328 BUCKETS *Buckets);
00329
00330 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,
00331 CLUSTER *Cluster,
00332 STATISTICS *Statistics,
00333 BUCKETS *NormalBuckets,
00334 FLOAT64 Confidence);
00335
00336 void MakeDimRandom(UINT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc);
00337
00338 void MakeDimUniform(UINT16 i, PROTOTYPE *Proto, STATISTICS *Statistics);
00339
00340 STATISTICS *ComputeStatistics (INT16 N,
00341 PARAM_DESC ParamDesc[], CLUSTER * Cluster);
00342
00343 PROTOTYPE *NewSphericalProto(UINT16 N,
00344 CLUSTER *Cluster,
00345 STATISTICS *Statistics);
00346
00347 PROTOTYPE *NewEllipticalProto(INT16 N,
00348 CLUSTER *Cluster,
00349 STATISTICS *Statistics);
00350
00351 PROTOTYPE *NewMixedProto(INT16 N, CLUSTER *Cluster, STATISTICS *Statistics);
00352
00353 PROTOTYPE *NewSimpleProto(INT16 N, CLUSTER *Cluster);
00354
00355 BOOL8 Independent (PARAM_DESC ParamDesc[],
00356 INT16 N, FLOAT32 * CoVariance, FLOAT32 Independence);
00357
00358 BUCKETS *GetBuckets(DISTRIBUTION Distribution,
00359 UINT32 SampleCount,
00360 FLOAT64 Confidence);
00361
00362 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
00363 UINT32 SampleCount,
00364 FLOAT64 Confidence);
00365
00366 UINT16 OptimumNumberOfBuckets(UINT32 SampleCount);
00367
00368 FLOAT64 ComputeChiSquared(UINT16 DegreesOfFreedom, FLOAT64 Alpha);
00369
00370 FLOAT64 NormalDensity(INT32 x);
00371
00372 FLOAT64 UniformDensity(INT32 x);
00373
00374 FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx);
00375
00376 void FillBuckets(BUCKETS *Buckets,
00377 CLUSTER *Cluster,
00378 UINT16 Dim,
00379 PARAM_DESC *ParamDesc,
00380 FLOAT32 Mean,
00381 FLOAT32 StdDev);
00382
00383 UINT16 NormalBucket(PARAM_DESC *ParamDesc,
00384 FLOAT32 x,
00385 FLOAT32 Mean,
00386 FLOAT32 StdDev);
00387
00388 UINT16 UniformBucket(PARAM_DESC *ParamDesc,
00389 FLOAT32 x,
00390 FLOAT32 Mean,
00391 FLOAT32 StdDev);
00392
00393 BOOL8 DistributionOK(BUCKETS *Buckets);
00394
00395 void FreeStatistics(STATISTICS *Statistics);
00396
00397 void FreeBuckets(BUCKETS *Buckets);
00398
00399 void FreeCluster(CLUSTER *Cluster);
00400
00401 UINT16 DegreesOfFreedom(DISTRIBUTION Distribution, UINT16 HistogramBuckets);
00402
00403 int NumBucketsMatch(void *arg1,
00404 void *arg2);
00405
00406 int ListEntryMatch(void *arg1, void *arg2);
00407
00408 void AdjustBuckets(BUCKETS *Buckets, UINT32 NewSampleCount);
00409
00410 void InitBuckets(BUCKETS *Buckets);
00411
00412 int AlphaMatch(void *arg1,
00413 void *arg2);
00414
00415 CHISTRUCT *NewChiStruct(UINT16 DegreesOfFreedom, FLOAT64 Alpha);
00416
00417 FLOAT64 Solve(SOLVEFUNC Function,
00418 void *FunctionParams,
00419 FLOAT64 InitialGuess,
00420 FLOAT64 Accuracy);
00421
00422 FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x);
00423
00424 BOOL8 MultipleCharSamples(CLUSTERER *Clusterer,
00425 CLUSTER *Cluster,
00426 FLOAT32 MaxIllegal);
00427
00428 double InvertMatrix(const float* input, int size, float* inv);
00429
00430
00441 CLUSTERER *
00442 MakeClusterer (INT16 SampleSize, PARAM_DESC ParamDesc[]) {
00443 CLUSTERER *Clusterer;
00444 int i;
00445
00446
00447 Clusterer = (CLUSTERER *) Emalloc (sizeof (CLUSTERER));
00448 Clusterer->SampleSize = SampleSize;
00449 Clusterer->NumberOfSamples = 0;
00450 Clusterer->NumChar = 0;
00451
00452
00453 Clusterer->Root = NULL;
00454 Clusterer->ProtoList = NIL;
00455
00456
00457 Clusterer->ParamDesc =
00458 (PARAM_DESC *) Emalloc (SampleSize * sizeof (PARAM_DESC));
00459 for (i = 0; i < SampleSize; i++) {
00460 Clusterer->ParamDesc[i].Circular = ParamDesc[i].Circular;
00461 Clusterer->ParamDesc[i].NonEssential = ParamDesc[i].NonEssential;
00462 Clusterer->ParamDesc[i].Min = ParamDesc[i].Min;
00463 Clusterer->ParamDesc[i].Max = ParamDesc[i].Max;
00464 Clusterer->ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
00465 Clusterer->ParamDesc[i].HalfRange = Clusterer->ParamDesc[i].Range / 2;
00466 Clusterer->ParamDesc[i].MidRange =
00467 (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
00468 }
00469
00470
00471 Clusterer->KDTree = MakeKDTree (SampleSize, ParamDesc);
00472
00473
00474
00475
00476 return (Clusterer);
00477 }
00478
00479
00497 SAMPLE *
00498 MakeSample (CLUSTERER * Clusterer, FLOAT32 Feature[], INT32 CharID) {
00499 SAMPLE *Sample;
00500 int i;
00501
00502
00503 if (Clusterer->Root != NULL)
00504 DoError (ALREADYCLUSTERED,
00505 "Can't add samples after they have been clustered");
00506
00507
00508 Sample = (SAMPLE *) Emalloc (sizeof (SAMPLE) +
00509 (Clusterer->SampleSize -
00510 1) * sizeof (FLOAT32));
00511 Sample->Clustered = FALSE;
00512 Sample->Prototype = FALSE;
00513 Sample->SampleCount = 1;
00514 Sample->Left = NULL;
00515 Sample->Right = NULL;
00516 Sample->CharID = CharID;
00517
00518 for (i = 0; i < Clusterer->SampleSize; i++)
00519 Sample->Mean[i] = Feature[i];
00520
00521
00522 Clusterer->NumberOfSamples++;
00523 KDStore (Clusterer->KDTree, Sample->Mean, (char *) Sample);
00524 if (CharID >= Clusterer->NumChar)
00525 Clusterer->NumChar = CharID + 1;
00526
00527
00528
00529
00530 return (Sample);
00531 }
00532
00533
00557 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
00558
00559 if (Clusterer->Root == NULL)
00560 CreateClusterTree(Clusterer);
00561
00562
00563 FreeProtoList (&Clusterer->ProtoList);
00564 Clusterer->ProtoList = NIL;
00565
00566
00567 ComputePrototypes(Clusterer, Config);
00568 return (Clusterer->ProtoList);
00569 }
00570
00571
00588 void FreeClusterer(CLUSTERER *Clusterer) {
00589 if (Clusterer != NULL) {
00590 memfree (Clusterer->ParamDesc);
00591 if (Clusterer->KDTree != NULL)
00592 FreeKDTree (Clusterer->KDTree);
00593 if (Clusterer->Root != NULL)
00594 FreeCluster (Clusterer->Root);
00595 iterate (Clusterer->ProtoList) {
00596 ((PROTOTYPE *) (first (Clusterer->ProtoList)))->Cluster = NULL;
00597 }
00598 memfree(Clusterer);
00599 }
00600 }
00601
00602
00614 void FreeProtoList(LIST *ProtoList) {
00615 destroy_nodes(*ProtoList, FreePrototype);
00616 }
00617
00618
00631 void FreePrototype(void *arg) {
00632 PROTOTYPE *Prototype = (PROTOTYPE *) arg;
00633
00634
00635 if (Prototype->Cluster != NULL)
00636 Prototype->Cluster->Prototype = FALSE;
00637
00638
00639 if (Prototype->Distrib != NULL)
00640 memfree (Prototype->Distrib);
00641 if (Prototype->Mean != NULL)
00642 memfree (Prototype->Mean);
00643 if (Prototype->Style != spherical) {
00644 if (Prototype->Variance.Elliptical != NULL)
00645 memfree (Prototype->Variance.Elliptical);
00646 if (Prototype->Magnitude.Elliptical != NULL)
00647 memfree (Prototype->Magnitude.Elliptical);
00648 if (Prototype->Weight.Elliptical != NULL)
00649 memfree (Prototype->Weight.Elliptical);
00650 }
00651 memfree(Prototype);
00652 }
00653
00654
00672 CLUSTER *NextSample(LIST *SearchState) {
00673 CLUSTER *Cluster;
00674
00675 if (*SearchState == NIL)
00676 return (NULL);
00677 Cluster = (CLUSTER *) first (*SearchState);
00678 *SearchState = pop (*SearchState);
00679 while (TRUE) {
00680 if (Cluster->Left == NULL)
00681 return (Cluster);
00682 *SearchState = push (*SearchState, Cluster->Right);
00683 Cluster = Cluster->Left;
00684 }
00685 }
00686
00687
00698 FLOAT32 Mean(PROTOTYPE *Proto, UINT16 Dimension) {
00699 return (Proto->Mean[Dimension]);
00700 }
00701
00702
00712 FLOAT32 StandardDeviation(PROTOTYPE *Proto, UINT16 Dimension) {
00713 switch (Proto->Style) {
00714 case spherical:
00715 return ((FLOAT32) sqrt ((double) Proto->Variance.Spherical));
00716 case elliptical:
00717 return ((FLOAT32)
00718 sqrt ((double) Proto->Variance.Elliptical[Dimension]));
00719 case mixed:
00720 switch (Proto->Distrib[Dimension]) {
00721 case normal:
00722 return ((FLOAT32)
00723 sqrt ((double) Proto->Variance.Elliptical[Dimension]));
00724 case uniform:
00725 case D_random:
00726 return (Proto->Variance.Elliptical[Dimension]);
00727 }
00728 }
00729 return 0.0f;
00730 }
00731
00732
00733
00734
00735
00736
00756 void CreateClusterTree(CLUSTERER *Clusterer) {
00757 HEAPENTRY HeapEntry;
00758 TEMPCLUSTER *PotentialCluster;
00759
00760
00761 Tree = Clusterer->KDTree;
00762
00763
00764 TempCluster = (TEMPCLUSTER *)
00765 Emalloc (Clusterer->NumberOfSamples * sizeof (TEMPCLUSTER));
00766 CurrentTemp = 0;
00767
00768
00769
00770 Heap = MakeHeap (Clusterer->NumberOfSamples);
00771 KDWalk (Tree, (void_proc) MakePotentialClusters);
00772
00773
00774 while (GetTopOfHeap (Heap, &HeapEntry) != EMPTY) {
00775 PotentialCluster = (TEMPCLUSTER *) (HeapEntry.Data);
00776
00777
00778
00779 if (PotentialCluster->Cluster->Clustered) {
00780 continue;
00781 }
00782
00783
00784
00785 else if (PotentialCluster->Neighbor->Clustered) {
00786 PotentialCluster->Neighbor =
00787 FindNearestNeighbor (Tree, PotentialCluster->Cluster,
00788 &(HeapEntry.Key));
00789 if (PotentialCluster->Neighbor != NULL) {
00790 HeapStore(Heap, &HeapEntry);
00791 }
00792 }
00793
00794
00795 else {
00796 PotentialCluster->Cluster =
00797 MakeNewCluster(Clusterer, PotentialCluster);
00798 PotentialCluster->Neighbor =
00799 FindNearestNeighbor (Tree, PotentialCluster->Cluster,
00800 &(HeapEntry.Key));
00801 if (PotentialCluster->Neighbor != NULL) {
00802 HeapStore(Heap, &HeapEntry);
00803 }
00804 }
00805 }
00806
00807
00808 Clusterer->Root = (CLUSTER *) RootOf (Clusterer->KDTree);
00809
00810
00811 FreeKDTree(Tree);
00812 Clusterer->KDTree = NULL;
00813 FreeHeap(Heap);
00814 memfree(TempCluster);
00815 }
00816
00817
00838 void MakePotentialClusters(CLUSTER *Cluster, VISIT Order, INT32 Level) {
00839 HEAPENTRY HeapEntry;
00840
00841 if ((Order == preorder) || (Order == leaf)) {
00842 TempCluster[CurrentTemp].Cluster = Cluster;
00843 HeapEntry.Data = (char *) &(TempCluster[CurrentTemp]);
00844 TempCluster[CurrentTemp].Neighbor =
00845 FindNearestNeighbor (Tree, TempCluster[CurrentTemp].Cluster,
00846 &(HeapEntry.Key));
00847 if (TempCluster[CurrentTemp].Neighbor != NULL) {
00848 HeapStore(Heap, &HeapEntry);
00849 CurrentTemp++;
00850 }
00851 }
00852 }
00853
00854
00874 CLUSTER *
00875 FindNearestNeighbor (KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance)
00876 #define MAXNEIGHBORS 2
00877 #define MAXDISTANCE MAX_FLOAT32
00878 {
00879 CLUSTER *Neighbor[MAXNEIGHBORS];
00880 FLOAT32 Dist[MAXNEIGHBORS];
00881 INT32 NumberOfNeighbors;
00882 INT32 i;
00883 CLUSTER *BestNeighbor;
00884
00885
00886 NumberOfNeighbors = KDNearestNeighborSearch
00887 (Tree, Cluster->Mean, MAXNEIGHBORS, MAXDISTANCE, Neighbor, Dist);
00888
00889
00890 *Distance = MAXDISTANCE;
00891 BestNeighbor = NULL;
00892 for (i = 0; i < NumberOfNeighbors; i++) {
00893 if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) {
00894 *Distance = Dist[i];
00895 BestNeighbor = Neighbor[i];
00896 }
00897 }
00898 return (BestNeighbor);
00899 }
00900
00901
00916 CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) {
00917 CLUSTER *Cluster;
00918
00919
00920 Cluster = (CLUSTER *) Emalloc (sizeof (CLUSTER) +
00921 (Clusterer->SampleSize -
00922 1) * sizeof (FLOAT32));
00923 Cluster->Clustered = FALSE;
00924 Cluster->Prototype = FALSE;
00925 Cluster->Left = TempCluster->Cluster;
00926 Cluster->Right = TempCluster->Neighbor;
00927 Cluster->CharID = -1;
00928
00929
00930 Cluster->Left->Clustered = TRUE;
00931 Cluster->Right->Clustered = TRUE;
00932 KDDelete (Clusterer->KDTree, Cluster->Left->Mean, Cluster->Left);
00933 KDDelete (Clusterer->KDTree, Cluster->Right->Mean, Cluster->Right);
00934
00935
00936 Cluster->SampleCount =
00937 MergeClusters (Clusterer->SampleSize, Clusterer->ParamDesc,
00938 Cluster->Left->SampleCount, Cluster->Right->SampleCount,
00939 Cluster->Mean, Cluster->Left->Mean, Cluster->Right->Mean);
00940
00941
00942 KDStore (Clusterer->KDTree, Cluster->Mean, Cluster);
00943 return (Cluster);
00944 }
00945
00946
00964 INT32 MergeClusters (INT16 N,
00965 register PARAM_DESC ParamDesc[],
00966 register INT32 n1,
00967 register INT32 n2,
00968 register FLOAT32 m[],
00969 register FLOAT32 m1[], register FLOAT32 m2[]) {
00970 register INT32 i, n;
00971
00972 n = n1 + n2;
00973 for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
00974 if (ParamDesc->Circular) {
00975
00976
00977
00978
00979
00980 if ((*m2 - *m1) > ParamDesc->HalfRange) {
00981 *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
00982 if (*m < ParamDesc->Min)
00983 *m += ParamDesc->Range;
00984 }
00985 else if ((*m1 - *m2) > ParamDesc->HalfRange) {
00986 *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
00987 if (*m < ParamDesc->Min)
00988 *m += ParamDesc->Range;
00989 }
00990 else
00991 *m = (n1 * *m1 + n2 * *m2) / n;
00992 }
00993 else
00994 *m = (n1 * *m1 + n2 * *m2) / n;
00995 }
00996 return (n);
00997 }
00998
00999
01011 void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
01012 LIST ClusterStack = NIL;
01013 CLUSTER *Cluster;
01014 PROTOTYPE *Prototype;
01015
01016
01017
01018 if (Clusterer->Root != NULL)
01019 ClusterStack = push (NIL, Clusterer->Root);
01020
01021
01022 while (ClusterStack != NIL) {
01023
01024
01025
01026 Cluster = (CLUSTER *) first (ClusterStack);
01027 ClusterStack = pop (ClusterStack);
01028 Prototype = MakePrototype (Clusterer, Config, Cluster);
01029 if (Prototype != NULL) {
01030 Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype);
01031 }
01032 else {
01033 ClusterStack = push (ClusterStack, Cluster->Right);
01034 ClusterStack = push (ClusterStack, Cluster->Left);
01035 }
01036 }
01037 }
01038
01039
01060 PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
01061 CLUSTERCONFIG *Config,
01062 CLUSTER *Cluster) {
01063 STATISTICS *Statistics;
01064 PROTOTYPE *Proto;
01065 BUCKETS *Buckets;
01066
01067
01068 if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal))
01069 return (NULL);
01070
01071
01072 Statistics =
01073 ComputeStatistics (Clusterer->SampleSize, Clusterer->ParamDesc, Cluster);
01074
01075
01076
01077
01078 Proto = MakeDegenerateProto (Clusterer->SampleSize, Cluster, Statistics,
01079 Config->ProtoStyle,
01080 (INT32) (Config->MinSamples *
01081 Clusterer->NumChar));
01082 if (Proto != NULL) {
01083 FreeStatistics(Statistics);
01084 return (Proto);
01085 }
01086
01087 if (!Independent (Clusterer->ParamDesc, Clusterer->SampleSize,
01088 Statistics->CoVariance, Config->Independence)) {
01089 FreeStatistics(Statistics);
01090 return (NULL);
01091 }
01092
01093 if (HOTELLING && Config->ProtoStyle == elliptical) {
01094 Proto = TestEllipticalProto(Clusterer, Cluster, Statistics);
01095 if (Proto != NULL) {
01096 FreeStatistics(Statistics);
01097 return Proto;
01098 }
01099 }
01100
01101
01102 Buckets = GetBuckets (normal, Cluster->SampleCount, Config->Confidence);
01103
01104
01105 switch (Config->ProtoStyle) {
01106 case spherical:
01107 Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets);
01108 break;
01109 case elliptical:
01110 Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets);
01111 break;
01112 case mixed:
01113 Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets,
01114 Config->Confidence);
01115 break;
01116 case automatic:
01117 Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets);
01118 if (Proto != NULL)
01119 break;
01120 Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets);
01121 if (Proto != NULL)
01122 break;
01123 Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets,
01124 Config->Confidence);
01125 break;
01126 }
01127 FreeBuckets(Buckets);
01128 FreeStatistics(Statistics);
01129 return (Proto);
01130 }
01131
01132
01155 PROTOTYPE *MakeDegenerateProto(
01156 UINT16 N,
01157 CLUSTER *Cluster,
01158 STATISTICS *Statistics,
01159 PROTOSTYLE Style,
01160 INT32 MinSamples) {
01161 PROTOTYPE *Proto = NULL;
01162
01163 if (MinSamples < MINSAMPLESNEEDED)
01164 MinSamples = MINSAMPLESNEEDED;
01165
01166 if (Cluster->SampleCount < MinSamples) {
01167 switch (Style) {
01168 case spherical:
01169 Proto = NewSphericalProto (N, Cluster, Statistics);
01170 break;
01171 case elliptical:
01172 case automatic:
01173 Proto = NewEllipticalProto (N, Cluster, Statistics);
01174 break;
01175 case mixed:
01176 Proto = NewMixedProto (N, Cluster, Statistics);
01177 break;
01178 }
01179 Proto->Significant = FALSE;
01180 }
01181 return (Proto);
01182 }
01183
01197 PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
01198 CLUSTER *Cluster,
01199 STATISTICS *Statistics) {
01200 int N = Clusterer->SampleSize;
01201 CLUSTER* Left = Cluster->Left;
01202 CLUSTER* Right = Cluster->Right;
01203 if (Left == NULL || Right == NULL)
01204 return NULL;
01205 int TotalDims = Left->SampleCount + Right->SampleCount;
01206 if (TotalDims < N + 1)
01207 return NULL;
01208 FLOAT32* Inverse = (FLOAT32 *) Emalloc(N * N * sizeof(FLOAT32));
01209 FLOAT32* Delta = (FLOAT32*) Emalloc(N * sizeof(FLOAT32));
01210 double err = InvertMatrix(Statistics->CoVariance, N, Inverse);
01211 if (err > 1) {
01212 cprintf("Clustering error: Matrix inverse failed with error %g\n", err);
01213 }
01214 for (int dim = 0; dim < N; ++dim) {
01215 Delta[dim] = Left->Mean[dim] - Right->Mean[dim];
01216 }
01217
01218 double Tsq = 0.0;
01219 for (int x = 0; x < N; ++x) {
01220 double temp = 0.0;
01221 for (int y = 0; y < N; ++y) {
01222 temp += Inverse[y + N*x] * Delta[y];
01223 }
01224 Tsq += Delta[x] * temp;
01225 }
01226 memfree(Inverse);
01227 memfree(Delta);
01228 Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;
01229 double F = Tsq * (TotalDims - N - 1) / ((TotalDims - N) * 2);
01230 int Fx = N;
01231 if (Fx > FTABLE_X)
01232 Fx = FTABLE_X;
01233 --Fx;
01234 int Fy = TotalDims - N - 1;
01235 if (Fy > FTABLE_Y)
01236 Fy = FTABLE_Y;
01237 --Fy;
01238 if (F < FTable[Fy][Fx]) {
01239 return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
01240 }
01241 return NULL;
01242 }
01243
01259 PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,
01260 CLUSTER *Cluster,
01261 STATISTICS *Statistics,
01262 BUCKETS *Buckets) {
01263 PROTOTYPE *Proto = NULL;
01264 int i;
01265
01266
01267 for (i = 0; i < Clusterer->SampleSize; i++) {
01268 if (Clusterer->ParamDesc[i].NonEssential)
01269 continue;
01270
01271 FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),
01272 Cluster->Mean[i],
01273 sqrt ((FLOAT64) (Statistics->AvgVariance)));
01274 if (!DistributionOK (Buckets))
01275 break;
01276 }
01277
01278 if (i >= Clusterer->SampleSize)
01279 Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics);
01280 return (Proto);
01281 }
01282
01283
01299 PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,
01300 CLUSTER *Cluster,
01301 STATISTICS *Statistics,
01302 BUCKETS *Buckets) {
01303 PROTOTYPE *Proto = NULL;
01304 int i;
01305
01306
01307 for (i = 0; i < Clusterer->SampleSize; i++) {
01308 if (Clusterer->ParamDesc[i].NonEssential)
01309 continue;
01310
01311 FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),
01312 Cluster->Mean[i],
01313 sqrt ((FLOAT64) Statistics->
01314 CoVariance[i * (Clusterer->SampleSize + 1)]));
01315 if (!DistributionOK (Buckets))
01316 break;
01317 }
01318
01319 if (i >= Clusterer->SampleSize)
01320 Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
01321 return (Proto);
01322 }
01323
01324
01344 PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,
01345 CLUSTER *Cluster,
01346 STATISTICS *Statistics,
01347 BUCKETS *NormalBuckets,
01348 FLOAT64 Confidence) {
01349 PROTOTYPE *Proto;
01350 int i;
01351 BUCKETS *UniformBuckets = NULL;
01352 BUCKETS *RandomBuckets = NULL;
01353
01354
01355 Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics);
01356
01357
01358 for (i = 0; i < Clusterer->SampleSize; i++) {
01359 if (Clusterer->ParamDesc[i].NonEssential)
01360 continue;
01361
01362 FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
01363 Proto->Mean[i],
01364 sqrt ((FLOAT64) Proto->Variance.Elliptical[i]));
01365 if (DistributionOK (NormalBuckets))
01366 continue;
01367
01368 if (RandomBuckets == NULL)
01369 RandomBuckets =
01370 GetBuckets (D_random, Cluster->SampleCount, Confidence);
01371 MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i]));
01372 FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
01373 Proto->Mean[i], Proto->Variance.Elliptical[i]);
01374 if (DistributionOK (RandomBuckets))
01375 continue;
01376
01377 if (UniformBuckets == NULL)
01378 UniformBuckets =
01379 GetBuckets (uniform, Cluster->SampleCount, Confidence);
01380 MakeDimUniform(i, Proto, Statistics);
01381 FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),
01382 Proto->Mean[i], Proto->Variance.Elliptical[i]);
01383 if (DistributionOK (UniformBuckets))
01384 continue;
01385 break;
01386 }
01387
01388 if (i < Clusterer->SampleSize) {
01389 FreePrototype(Proto);
01390 Proto = NULL;
01391 }
01392 if (UniformBuckets != NULL)
01393 FreeBuckets(UniformBuckets);
01394 if (RandomBuckets != NULL)
01395 FreeBuckets(RandomBuckets);
01396 return (Proto);
01397 }
01398
01399
01410 void MakeDimRandom(UINT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {
01411 Proto->Distrib[i] = D_random;
01412 Proto->Mean[i] = ParamDesc->MidRange;
01413 Proto->Variance.Elliptical[i] = ParamDesc->HalfRange;
01414
01415
01416 Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];
01417 Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range;
01418 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
01419 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
01420
01421
01422 }
01423
01424
01436 void MakeDimUniform(UINT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {
01437 Proto->Distrib[i] = uniform;
01438 Proto->Mean[i] = Proto->Cluster->Mean[i] +
01439 (Statistics->Min[i] + Statistics->Max[i]) / 2;
01440 Proto->Variance.Elliptical[i] =
01441 (Statistics->Max[i] - Statistics->Min[i]) / 2;
01442 if (Proto->Variance.Elliptical[i] < MINVARIANCE)
01443 Proto->Variance.Elliptical[i] = MINVARIANCE;
01444
01445
01446 Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];
01447 Proto->Magnitude.Elliptical[i] =
01448 1.0 / (2.0 * Proto->Variance.Elliptical[i]);
01449 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
01450 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
01451
01452
01453 }
01454
01455
01473 STATISTICS *
01474 ComputeStatistics (INT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {
01475 STATISTICS *Statistics;
01476 int i, j;
01477 FLOAT32 *CoVariance;
01478 FLOAT32 *Distance;
01479 LIST SearchState;
01480 SAMPLE *Sample;
01481 UINT32 SampleCountAdjustedForBias;
01482
01483
01484 Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS));
01485 Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32));
01486 Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01487 Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01488
01489
01490 Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01491
01492
01493 Statistics->AvgVariance = 1.0;
01494 CoVariance = Statistics->CoVariance;
01495 for (i = 0; i < N; i++) {
01496 Statistics->Min[i] = 0.0;
01497 Statistics->Max[i] = 0.0;
01498 for (j = 0; j < N; j++, CoVariance++)
01499 *CoVariance = 0;
01500 }
01501
01502 InitSampleSearch(SearchState, Cluster);
01503 while ((Sample = NextSample (&SearchState)) != NULL) {
01504 for (i = 0; i < N; i++) {
01505 Distance[i] = Sample->Mean[i] - Cluster->Mean[i];
01506 if (ParamDesc[i].Circular) {
01507 if (Distance[i] > ParamDesc[i].HalfRange)
01508 Distance[i] -= ParamDesc[i].Range;
01509 if (Distance[i] < -ParamDesc[i].HalfRange)
01510 Distance[i] += ParamDesc[i].Range;
01511 }
01512 if (Distance[i] < Statistics->Min[i])
01513 Statistics->Min[i] = Distance[i];
01514 if (Distance[i] > Statistics->Max[i])
01515 Statistics->Max[i] = Distance[i];
01516 }
01517 CoVariance = Statistics->CoVariance;
01518 for (i = 0; i < N; i++)
01519 for (j = 0; j < N; j++, CoVariance++)
01520 *CoVariance += Distance[i] * Distance[j];
01521 }
01522
01523
01524
01525
01526 if (Cluster->SampleCount > 1)
01527 SampleCountAdjustedForBias = Cluster->SampleCount - 1;
01528 else
01529 SampleCountAdjustedForBias = 1;
01530 CoVariance = Statistics->CoVariance;
01531 for (i = 0; i < N; i++)
01532 for (j = 0; j < N; j++, CoVariance++) {
01533 *CoVariance /= SampleCountAdjustedForBias;
01534 if (j == i) {
01535 if (*CoVariance < MINVARIANCE)
01536 *CoVariance = MINVARIANCE;
01537 Statistics->AvgVariance *= *CoVariance;
01538 }
01539 }
01540 Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance,
01541 1.0 / N);
01542
01543
01544 memfree(Distance);
01545 return (Statistics);
01546 }
01547
01548
01564 PROTOTYPE *NewSphericalProto(UINT16 N,
01565 CLUSTER *Cluster,
01566 STATISTICS *Statistics) {
01567 PROTOTYPE *Proto;
01568
01569 Proto = NewSimpleProto (N, Cluster);
01570
01571 Proto->Variance.Spherical = Statistics->AvgVariance;
01572 if (Proto->Variance.Spherical < MINVARIANCE)
01573 Proto->Variance.Spherical = MINVARIANCE;
01574
01575 Proto->Magnitude.Spherical =
01576 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
01577 Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical,
01578 (double) N);
01579 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
01580 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
01581
01582 return (Proto);
01583 }
01584
01585
01600 PROTOTYPE *NewEllipticalProto(INT16 N,
01601 CLUSTER *Cluster,
01602 STATISTICS *Statistics) {
01603 PROTOTYPE *Proto;
01604 FLOAT32 *CoVariance;
01605 int i;
01606
01607 Proto = NewSimpleProto (N, Cluster);
01608 Proto->Variance.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01609 Proto->Magnitude.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01610 Proto->Weight.Elliptical = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01611
01612 CoVariance = Statistics->CoVariance;
01613 Proto->TotalMagnitude = 1.0;
01614 for (i = 0; i < N; i++, CoVariance += N + 1) {
01615 Proto->Variance.Elliptical[i] = *CoVariance;
01616 if (Proto->Variance.Elliptical[i] < MINVARIANCE)
01617 Proto->Variance.Elliptical[i] = MINVARIANCE;
01618
01619 Proto->Magnitude.Elliptical[i] =
01620 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
01621 Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
01622 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
01623 }
01624 Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
01625 Proto->Style = elliptical;
01626 return (Proto);
01627 }
01628
01629
01647 PROTOTYPE *NewMixedProto(INT16 N, CLUSTER *Cluster, STATISTICS *Statistics) {
01648 PROTOTYPE *Proto;
01649 int i;
01650
01651 Proto = NewEllipticalProto (N, Cluster, Statistics);
01652 Proto->Distrib = (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
01653
01654 for (i = 0; i < N; i++) {
01655 Proto->Distrib[i] = normal;
01656 }
01657 Proto->Style = mixed;
01658 return (Proto);
01659 }
01660
01661
01673 PROTOTYPE *NewSimpleProto(INT16 N, CLUSTER *Cluster) {
01674 PROTOTYPE *Proto;
01675 int i;
01676
01677 Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
01678 Proto->Mean = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
01679
01680 for (i = 0; i < N; i++)
01681 Proto->Mean[i] = Cluster->Mean[i];
01682 Proto->Distrib = NULL;
01683
01684 Proto->Significant = TRUE;
01685 Proto->Style = spherical;
01686 Proto->NumSamples = Cluster->SampleCount;
01687 Proto->Cluster = Cluster;
01688 Proto->Cluster->Prototype = TRUE;
01689 return (Proto);
01690 }
01691
01692
01714 BOOL8
01715 Independent (PARAM_DESC ParamDesc[],
01716 INT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) {
01717 int i, j;
01718 FLOAT32 *VARii;
01719 FLOAT32 *VARjj;
01720 FLOAT32 CorrelationCoeff;
01721
01722 VARii = CoVariance;
01723 for (i = 0; i < N; i++, VARii += N + 1) {
01724 if (ParamDesc[i].NonEssential)
01725 continue;
01726
01727 VARjj = VARii + N + 1;
01728 CoVariance = VARii + 1;
01729 for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) {
01730 if (ParamDesc[j].NonEssential)
01731 continue;
01732
01733 if ((*VARii == 0.0) || (*VARjj == 0.0))
01734 CorrelationCoeff = 0.0;
01735 else
01736 CorrelationCoeff =
01737 sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj)));
01738 if (CorrelationCoeff > Independence)
01739 return (FALSE);
01740 }
01741 }
01742 return (TRUE);
01743 }
01744
01745
01765 BUCKETS *GetBuckets(DISTRIBUTION Distribution,
01766 UINT32 SampleCount,
01767 FLOAT64 Confidence) {
01768 UINT16 NumberOfBuckets;
01769 BUCKETS *Buckets;
01770
01771
01772 NumberOfBuckets = OptimumNumberOfBuckets (SampleCount);
01773 Buckets = (BUCKETS *) first (search (OldBuckets[(int) Distribution],
01774 &NumberOfBuckets, NumBucketsMatch));
01775
01776
01777 if (Buckets != NULL) {
01778 OldBuckets[(int) Distribution] =
01779 delete_d (OldBuckets[(int) Distribution], Buckets, ListEntryMatch);
01780 if (SampleCount != Buckets->SampleCount)
01781 AdjustBuckets(Buckets, SampleCount);
01782 if (Confidence != Buckets->Confidence) {
01783 Buckets->Confidence = Confidence;
01784 Buckets->ChiSquared = ComputeChiSquared
01785 (DegreesOfFreedom (Distribution, Buckets->NumberOfBuckets),
01786 Confidence);
01787 }
01788 InitBuckets(Buckets);
01789 }
01790 else
01791 Buckets = MakeBuckets (Distribution, SampleCount, Confidence);
01792 return (Buckets);
01793 }
01794
01795
01817 BUCKETS *MakeBuckets(DISTRIBUTION Distribution,
01818 UINT32 SampleCount,
01819 FLOAT64 Confidence) {
01820 static DENSITYFUNC DensityFunction[] =
01821 { NormalDensity, UniformDensity, UniformDensity };
01822 int i, j;
01823 BUCKETS *Buckets;
01824 FLOAT64 BucketProbability;
01825 FLOAT64 NextBucketBoundary;
01826 FLOAT64 Probability;
01827 FLOAT64 ProbabilityDelta;
01828 FLOAT64 LastProbDensity;
01829 FLOAT64 ProbDensity;
01830 UINT16 CurrentBucket;
01831 BOOL8 Symmetrical;
01832
01833
01834 Buckets = (BUCKETS *) Emalloc (sizeof (BUCKETS));
01835 Buckets->NumberOfBuckets = OptimumNumberOfBuckets (SampleCount);
01836 Buckets->SampleCount = SampleCount;
01837 Buckets->Confidence = Confidence;
01838 Buckets->Count =
01839 (UINT32 *) Emalloc (Buckets->NumberOfBuckets * sizeof (UINT32));
01840 Buckets->ExpectedCount =
01841 (FLOAT32 *) Emalloc (Buckets->NumberOfBuckets * sizeof (FLOAT32));
01842
01843
01844 Buckets->Distribution = Distribution;
01845 for (i = 0; i < Buckets->NumberOfBuckets; i++) {
01846 Buckets->Count[i] = 0;
01847 Buckets->ExpectedCount[i] = 0.0;
01848 }
01849
01850
01851 Symmetrical = TRUE;
01852 Buckets->ChiSquared = ComputeChiSquared
01853 (DegreesOfFreedom (Distribution, Buckets->NumberOfBuckets), Confidence);
01854
01855 if (Symmetrical) {
01856
01857 BucketProbability = 1.0 / (FLOAT64) (Buckets->NumberOfBuckets);
01858
01859
01860 CurrentBucket = Buckets->NumberOfBuckets / 2;
01861 if (Odd (Buckets->NumberOfBuckets))
01862 NextBucketBoundary = BucketProbability / 2;
01863 else
01864 NextBucketBoundary = BucketProbability;
01865
01866 Probability = 0.0;
01867 LastProbDensity =
01868 (*DensityFunction[(int) Distribution]) (BUCKETTABLESIZE / 2);
01869 for (i = BUCKETTABLESIZE / 2; i < BUCKETTABLESIZE; i++) {
01870 ProbDensity = (*DensityFunction[(int) Distribution]) (i + 1);
01871 ProbabilityDelta = Integral (LastProbDensity, ProbDensity, 1.0);
01872 Probability += ProbabilityDelta;
01873 if (Probability > NextBucketBoundary) {
01874 if (CurrentBucket < Buckets->NumberOfBuckets - 1)
01875 CurrentBucket++;
01876 NextBucketBoundary += BucketProbability;
01877 }
01878 Buckets->Bucket[i] = CurrentBucket;
01879 Buckets->ExpectedCount[CurrentBucket] +=
01880 (FLOAT32) (ProbabilityDelta * SampleCount);
01881 LastProbDensity = ProbDensity;
01882 }
01883
01884 Buckets->ExpectedCount[CurrentBucket] +=
01885 (FLOAT32) ((0.5 - Probability) * SampleCount);
01886
01887
01888 for (i = 0, j = BUCKETTABLESIZE - 1; i < j; i++, j--)
01889 Buckets->Bucket[i] =
01890 Mirror (Buckets->Bucket[j], Buckets->NumberOfBuckets);
01891
01892
01893 for (i = 0, j = Buckets->NumberOfBuckets - 1; i <= j; i++, j--)
01894 Buckets->ExpectedCount[i] += Buckets->ExpectedCount[j];
01895 }
01896 return (Buckets);
01897 }
01898
01899
01920 UINT16 OptimumNumberOfBuckets(UINT32 SampleCount) {
01921 UINT8 Last, Next;
01922 FLOAT32 Slope;
01923
01924 if (SampleCount < CountTable[0])
01925 return (BucketsTable[0]);
01926
01927 for (Last = 0, Next = 1; Next < LOOKUPTABLESIZE; Last++, Next++) {
01928 if (SampleCount <= CountTable[Next]) {
01929 Slope = (FLOAT32) (BucketsTable[Next] - BucketsTable[Last]) /
01930 (FLOAT32) (CountTable[Next] - CountTable[Last]);
01931 return ((UINT16) (BucketsTable[Last] +
01932 Slope * (SampleCount - CountTable[Last])));
01933 }
01934 }
01935 return (BucketsTable[Last]);
01936 }
01937
01938
01960 FLOAT64
01961 ComputeChiSquared (UINT16 DegreesOfFreedom, FLOAT64 Alpha)
01962 #define CHIACCURACY 0.01
01963 #define MINALPHA (1e-200)
01964 {
01965 static LIST ChiWith[MAXDEGREESOFFREEDOM + 1];
01966
01967 CHISTRUCT *OldChiSquared;
01968 CHISTRUCT SearchKey;
01969
01970
01971
01972
01973
01974 if (Alpha < MINALPHA)
01975 Alpha = MINALPHA;
01976 if (Alpha > 1.0)
01977 Alpha = 1.0;
01978 if (Odd (DegreesOfFreedom))
01979 DegreesOfFreedom++;
01980
01981
01982
01983
01984
01985
01986 SearchKey.Alpha = Alpha;
01987 OldChiSquared = (CHISTRUCT *) first (search (ChiWith[DegreesOfFreedom],
01988 &SearchKey, AlphaMatch));
01989
01990 if (OldChiSquared == NULL) {
01991 OldChiSquared = NewChiStruct (DegreesOfFreedom, Alpha);
01992 OldChiSquared->ChiSquared = Solve (ChiArea, OldChiSquared,
01993 (FLOAT64) DegreesOfFreedom,
01994 (FLOAT64) CHIACCURACY);
01995 ChiWith[DegreesOfFreedom] = push (ChiWith[DegreesOfFreedom],
01996 OldChiSquared);
01997 }
01998 else {
01999
02000 }
02001
02002 return (OldChiSquared->ChiSquared);
02003
02004 }
02005
02006
02025 FLOAT64 NormalDensity(INT32 x) {
02026 FLOAT64 Distance;
02027
02028 Distance = x - NormalMean;
02029 return (NormalMagnitude *
02030 exp (-0.5 * Distance * Distance / NormalVariance));
02031 }
02032
02033
02046 FLOAT64 UniformDensity(INT32 x) {
02047 static FLOAT64 UniformDistributionDensity = (FLOAT64) 1.0 / BUCKETTABLESIZE;
02048
02049 if ((x >= 0.0) && (x <= BUCKETTABLESIZE))
02050 return (UniformDistributionDensity);
02051 else
02052 return ((FLOAT64) 0.0);
02053 }
02054
02055
02067 FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) {
02068 return ((f1 + f2) * Dx / 2.0);
02069 }
02070
02071
02097 void FillBuckets(BUCKETS *Buckets,
02098 CLUSTER *Cluster,
02099 UINT16 Dim,
02100 PARAM_DESC *ParamDesc,
02101 FLOAT32 Mean,
02102 FLOAT32 StdDev) {
02103 UINT16 BucketID;
02104 int i;
02105 LIST SearchState;
02106 SAMPLE *Sample;
02107
02108
02109 for (i = 0; i < Buckets->NumberOfBuckets; i++)
02110 Buckets->Count[i] = 0;
02111
02112 if (StdDev == 0.0) {
02123 InitSampleSearch(SearchState, Cluster);
02124 i = 0;
02125 while ((Sample = NextSample (&SearchState)) != NULL) {
02126 if (Sample->Mean[Dim] > Mean)
02127 BucketID = Buckets->NumberOfBuckets - 1;
02128 else if (Sample->Mean[Dim] < Mean)
02129 BucketID = 0;
02130 else
02131 BucketID = i;
02132 Buckets->Count[BucketID] += 1;
02133 i++;
02134 if (i >= Buckets->NumberOfBuckets)
02135 i = 0;
02136 }
02137 }
02138 else {
02139
02140 InitSampleSearch(SearchState, Cluster);
02141 while ((Sample = NextSample (&SearchState)) != NULL) {
02142 switch (Buckets->Distribution) {
02143 case normal:
02144 BucketID = NormalBucket (ParamDesc, Sample->Mean[Dim],
02145 Mean, StdDev);
02146 break;
02147 case D_random:
02148 case uniform:
02149 BucketID = UniformBucket (ParamDesc, Sample->Mean[Dim],
02150 Mean, StdDev);
02151 break;
02152 default:
02153 BucketID = 0;
02154 }
02155 Buckets->Count[Buckets->Bucket[BucketID]] += 1;
02156 }
02157 }
02158 }
02159
02160
02180 UINT16 NormalBucket(PARAM_DESC *ParamDesc,
02181 FLOAT32 x,
02182 FLOAT32 Mean,
02183 FLOAT32 StdDev) {
02184 FLOAT32 X;
02185
02186
02187 if (ParamDesc->Circular) {
02188 if (x - Mean > ParamDesc->HalfRange)
02189 x -= ParamDesc->Range;
02190 else if (x - Mean < -ParamDesc->HalfRange)
02191 x += ParamDesc->Range;
02192 }
02193
02194 X = ((x - Mean) / StdDev) * NormalStdDev + NormalMean;
02195 if (X < 0)
02196 return ((UINT16) 0);
02197 if (X > BUCKETTABLESIZE - 1)
02198 return ((UINT16) (BUCKETTABLESIZE - 1));
02199 return ((UINT16) floor ((FLOAT64) X));
02200 }
02201
02202
02220 UINT16 UniformBucket(PARAM_DESC *ParamDesc,
02221 FLOAT32 x,
02222 FLOAT32 Mean,
02223 FLOAT32 StdDev) {
02224 FLOAT32 X;
02225
02226
02227 if (ParamDesc->Circular) {
02228 if (x - Mean > ParamDesc->HalfRange)
02229 x -= ParamDesc->Range;
02230 else if (x - Mean < -ParamDesc->HalfRange)
02231 x += ParamDesc->Range;
02232 }
02233
02234 X = ((x - Mean) / (2 * StdDev) * BUCKETTABLESIZE + BUCKETTABLESIZE / 2.0);
02235 if (X < 0)
02236 return ((UINT16) 0);
02237 if (X > BUCKETTABLESIZE - 1)
02238 return ((UINT16) (BUCKETTABLESIZE - 1));
02239 return ((UINT16) floor ((FLOAT64) X));
02240 }
02241
02242
02258 BOOL8 DistributionOK(BUCKETS *Buckets) {
02259 FLOAT32 FrequencyDifference;
02260 FLOAT32 TotalDifference;
02261 int i;
02262
02263
02264 TotalDifference = 0.0;
02265 for (i = 0; i < Buckets->NumberOfBuckets; i++) {
02266 FrequencyDifference = Buckets->Count[i] - Buckets->ExpectedCount[i];
02267 TotalDifference += (FrequencyDifference * FrequencyDifference) /
02268 Buckets->ExpectedCount[i];
02269 }
02270
02271
02272 if (TotalDifference > Buckets->ChiSquared)
02273 return (FALSE);
02274 else
02275 return (TRUE);
02276 }
02277
02278
02288 void FreeStatistics(STATISTICS *Statistics) {
02289 memfree (Statistics->CoVariance);
02290 memfree (Statistics->Min);
02291 memfree (Statistics->Max);
02292 memfree(Statistics);
02293 }
02294
02295
02308 void FreeBuckets(BUCKETS *Buckets) {
02309 int Dist;
02310
02311 if (Buckets != NULL) {
02312 Dist = (int) Buckets->Distribution;
02313 OldBuckets[Dist] = (LIST) push (OldBuckets[Dist], Buckets);
02314 }
02315
02316 }
02317
02318
02330 void FreeCluster(CLUSTER *Cluster) {
02331 if (Cluster != NULL) {
02332 FreeCluster (Cluster->Left);
02333 FreeCluster (Cluster->Right);
02334 memfree(Cluster);
02335 }
02336 }
02337
02338
02356 UINT16 DegreesOfFreedom(DISTRIBUTION Distribution, UINT16 HistogramBuckets) {
02357 static UINT8 DegreeOffsets[] = { 3, 3, 1 };
02358
02359 UINT16 AdjustedNumBuckets;
02360
02361 AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int) Distribution];
02362 if (Odd (AdjustedNumBuckets))
02363 AdjustedNumBuckets++;
02364 return (AdjustedNumBuckets);
02365
02366 }
02367
02368
02381 int NumBucketsMatch(void *arg1,
02382 void *arg2) {
02383 BUCKETS *Histogram = (BUCKETS *) arg1;
02384 UINT16 *DesiredNumberOfBuckets = (UINT16 *) arg2;
02385
02386 return (*DesiredNumberOfBuckets == Histogram->NumberOfBuckets);
02387
02388 }
02389
02390
02401 int ListEntryMatch(void *arg1,
02402 void *arg2) {
02403 return (arg1 == arg2);
02404
02405 }
02406
02407
02419 void AdjustBuckets(BUCKETS *Buckets, UINT32 NewSampleCount) {
02420 int i;
02421 FLOAT64 AdjustFactor;
02422
02423 AdjustFactor = (((FLOAT64) NewSampleCount) /
02424 ((FLOAT64) Buckets->SampleCount));
02425
02426 for (i = 0; i < Buckets->NumberOfBuckets; i++) {
02427 Buckets->ExpectedCount[i] *= AdjustFactor;
02428 }
02429
02430 Buckets->SampleCount = NewSampleCount;
02431
02432 }
02433
02434
02443 void InitBuckets(BUCKETS *Buckets) {
02444 int i;
02445
02446 for (i = 0; i < Buckets->NumberOfBuckets; i++) {
02447 Buckets->Count[i] = 0;
02448 }
02449
02450 }
02451
02452
02466 int AlphaMatch(void *arg1,
02467 void *arg2) {
02468 CHISTRUCT *ChiStruct = (CHISTRUCT *) arg1;
02469 CHISTRUCT *SearchKey = (CHISTRUCT *) arg2;
02470
02471 return (ChiStruct->Alpha == SearchKey->Alpha);
02472
02473 }
02474
02475
02487 CHISTRUCT *NewChiStruct(UINT16 DegreesOfFreedom, FLOAT64 Alpha) {
02488 CHISTRUCT *NewChiStruct;
02489
02490 NewChiStruct = (CHISTRUCT *) Emalloc (sizeof (CHISTRUCT));
02491 NewChiStruct->DegreesOfFreedom = DegreesOfFreedom;
02492 NewChiStruct->Alpha = Alpha;
02493 return (NewChiStruct);
02494
02495 }
02496
02497
02516 FLOAT64 Solve (SOLVEFUNC Function,
02517 void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
02518 #define INITIALDELTA 0.1
02519 #define DELTARATIO 0.1
02520 {
02521 FLOAT64 x;
02522 FLOAT64 f;
02523 FLOAT64 Slope;
02524 FLOAT64 Delta;
02525 FLOAT64 NewDelta;
02526 FLOAT64 xDelta;
02527 FLOAT64 LastPosX, LastNegX;
02528
02529 x = InitialGuess;
02530 Delta = INITIALDELTA;
02531 LastPosX = MAX_FLOAT32;
02532 LastNegX = -MAX_FLOAT32;
02533 f = (*Function) ((CHISTRUCT *) FunctionParams, x);
02534 while (Abs (LastPosX - LastNegX) > Accuracy) {
02535
02536 if (f < 0)
02537 LastNegX = x;
02538 else
02539 LastPosX = x;
02540
02541
02542 Slope =
02543 ((*Function) ((CHISTRUCT *) FunctionParams, x + Delta) - f) / Delta;
02544
02545
02546 xDelta = f / Slope;
02547 x -= xDelta;
02548
02549
02550
02551 NewDelta = Abs (xDelta) * DELTARATIO;
02552 if (NewDelta < Delta)
02553 Delta = NewDelta;
02554
02555
02556 f = (*Function) ((CHISTRUCT *) FunctionParams, x);
02557 }
02558 return (x);
02559
02560 }
02561
02562
02590 FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) {
02591 int i, N;
02592 FLOAT64 SeriesTotal;
02593 FLOAT64 Denominator;
02594 FLOAT64 PowerOfx;
02595
02596 N = ChiParams->DegreesOfFreedom / 2 - 1;
02597 SeriesTotal = 1;
02598 Denominator = 1;
02599 PowerOfx = 1;
02600 for (i = 1; i <= N; i++) {
02601 Denominator *= 2 * i;
02602 PowerOfx *= x;
02603 SeriesTotal += PowerOfx / Denominator;
02604 }
02605 return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->Alpha);
02606
02607 }
02608
02609
02641 BOOL8
02642 MultipleCharSamples (CLUSTERER * Clusterer,
02643 CLUSTER * Cluster, FLOAT32 MaxIllegal)
02644 #define ILLEGAL_CHAR 2
02645 {
02646 static BOOL8 *CharFlags = NULL;
02647 static INT32 NumFlags = 0;
02648 int i;
02649 LIST SearchState;
02650 SAMPLE *Sample;
02651 INT32 CharID;
02652 INT32 NumCharInCluster;
02653 INT32 NumIllegalInCluster;
02654 FLOAT32 PercentIllegal;
02655
02656
02657 NumCharInCluster = Cluster->SampleCount;
02658 NumIllegalInCluster = 0;
02659
02660 if (Clusterer->NumChar > NumFlags) {
02661 if (CharFlags != NULL)
02662 memfree(CharFlags);
02663 NumFlags = Clusterer->NumChar;
02664 CharFlags = (BOOL8 *) Emalloc (NumFlags * sizeof (BOOL8));
02665 }
02666
02667 for (i = 0; i < NumFlags; i++)
02668 CharFlags[i] = FALSE;
02669
02670
02671 InitSampleSearch(SearchState, Cluster);
02672 while ((Sample = NextSample (&SearchState)) != NULL) {
02673 CharID = Sample->CharID;
02674 if (CharFlags[CharID] == FALSE) {
02675 CharFlags[CharID] = TRUE;
02676 }
02677 else {
02678 if (CharFlags[CharID] == TRUE) {
02679 NumIllegalInCluster++;
02680 CharFlags[CharID] = ILLEGAL_CHAR;
02681 }
02682 NumCharInCluster--;
02683 PercentIllegal = (FLOAT32) NumIllegalInCluster / NumCharInCluster;
02684 if (PercentIllegal > MaxIllegal)
02685 return (TRUE);
02686 }
02687 }
02688 return (FALSE);
02689
02690 }
02691
02704 double InvertMatrix(const float* input, int size, float* inv) {
02705 double** U;
02706 double* Umem;
02707 double** U_inv;
02708 double* U_invmem;
02709 double** L;
02710 double* Lmem;
02711
02712
02713 ALLOC_2D_ARRAY(size, size, Umem, U, double);
02714 ALLOC_2D_ARRAY(size, size, U_invmem, U_inv, double);
02715 ALLOC_2D_ARRAY(size, size, Lmem, L, double);
02716
02717
02718 int row;
02719 int col;
02720 for (row = 0; row < size; row++) {
02721 for (col = 0; col < size; col++) {
02722 U[row][col] = input[row*size + col];
02723 L[row][col] = row == col ? 1.0 : 0.0;
02724 U_inv[row][col] = 0.0;
02725 }
02726 }
02727
02728
02729 for (col = 0; col < size; ++col) {
02730
02731 int best_row = 0;
02732 double best_pivot = -1.0;
02733 for (row = col; row < size; ++row) {
02734 if (Abs(U[row][col]) > best_pivot) {
02735 best_pivot = Abs(U[row][col]);
02736 best_row = row;
02737 }
02738 }
02739
02740 if (best_row != col) {
02741 for (int k = 0; k < size; ++k) {
02742 double tmp = U[best_row][k];
02743 U[best_row][k] = U[col][k];
02744 U[col][k] = tmp;
02745 tmp = L[best_row][k];
02746 L[best_row][k] = L[col][k];
02747 L[col][k] = tmp;
02748 }
02749 }
02750
02751 for (row = col + 1; row < size; ++row) {
02752 double ratio = -U[row][col] / U[col][col];
02753 for (int j = col; j < size; ++j) {
02754 U[row][j] += U[col][j] * ratio;
02755 }
02756 for (int k = 0; k < size; ++k) {
02757 L[row][k] += L[col][k] * ratio;
02758 }
02759 }
02760 }
02761
02762 for (col = 0; col < size; ++col) {
02763 U_inv[col][col] = 1.0 / U[col][col];
02764 for (row = col - 1; row >= 0; --row) {
02765 double total = 0.0;
02766 for (int k = col; k > row; --k) {
02767 total += U[row][k] * U_inv[k][col];
02768 }
02769 U_inv[row][col] = -total / U[row][row];
02770 }
02771 }
02772
02773 for (row = 0; row < size; row++) {
02774 for (col = 0; col < size; col++) {
02775 double sum = 0.0;
02776 for (int k = row; k < size; ++k) {
02777 sum += U_inv[row][k] * L[k][col];
02778 }
02779 inv[row*size + col] = sum;
02780 }
02781 }
02782
02783 double error_sum = 0.0;
02784 for (row = 0; row < size; row++) {
02785 for (col = 0; col < size; col++) {
02786 double sum = 0.0;
02787 for (int k = 0; k < size; ++k) {
02788 sum += input[row*size + k] * inv[k *size + col];
02789 }
02790 if (row != col) {
02791 error_sum += Abs(sum);
02792 }
02793 }
02794 }
02795 return error_sum;
02796 }
02797
02798