classify/clusttool.cpp File Reference

#include "clusttool.h"
#include "const.h"
#include "danerror.h"
#include "emalloc.h"
#include "scanutils.h"
#include <stdio.h>
#include <math.h>

Go to the source code of this file.

Defines

Functions


Define Documentation

#define MAXSAMPLESIZE   65535

max num of dimensions in feature space

Definition at line 33 of file clusttool.cpp.

Referenced by ReadSampleSize().

#define TOKENSIZE   80

max size of tokens read from an input file

Definition at line 31 of file clusttool.cpp.

Referenced by ReadParamDesc(), ReadProtoStyle(), and ReadPrototype().


Function Documentation

double fake_drand48 (  ) 

Cheap replacement for drand48 which is not available on the PC.

Parameters:
none 

Definition at line 504 of file clusttool.cpp.

Referenced by UniformRandomNumber().

00504                       { 
00505   return rand () / (RAND_MAX + 1.0);
00506 }

FLOAT32* ReadNFloats ( FILE *  File,
UINT16  N,
FLOAT32  Buffer[] 
)

Reads N floats from the specified text file and places them into Buffer.

Parameters:
File Open text file to read floats from
N Number of floats to read
Buffer Pointer to buffer to place floats into
Returns:
Pointer to buffer holding floats or NULL if EOF
If Buffer is NULL, a buffer is created and passed back to the caller. If EOF is encountered before any floats can be read, NULL is returned.
Note:
Exceptions: ILLEGALFLOAT
Date:
6/6/89, DSJ, Created.

Definition at line 291 of file clusttool.cpp.

References DoError(), Emalloc(), fscanf(), and ILLEGALFLOAT.

Referenced by ReadPrototype().

00291                                                       {
00292   int i;
00293   int NumFloatsRead;
00294 
00295   if (Buffer == NULL)
00296     Buffer = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00297 
00298   for (i = 0; i < N; i++) {
00299     NumFloatsRead = fscanf (File, "%f", &(Buffer[i]));
00300     if (NumFloatsRead != 1) {
00301       if ((NumFloatsRead == EOF) && (i == 0))
00302         return (NULL);
00303       else
00304         DoError (ILLEGALFLOAT, "Illegal float specification");
00305     }
00306   }
00307   return (Buffer);
00308 }                                // ReadNFloats

PARAM_DESC* ReadParamDesc ( FILE *  File,
UINT16  N 
)

Reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters:
File open text file to read N parameter descriptions from
N number of parameter descriptions to read
Returns:
Pointer to an array of parameter descriptors.
Note:
Exceptions:
  • ILLEGALCIRCULARSPEC
  • ILLEGALESSENTIALSPEC
  • ILLEGALMINMAXSPEC
Date:
6/6/89, DSJ, Created.

Definition at line 71 of file clusttool.cpp.

References PARAM_DESC::Circular, DoError(), Emalloc(), FALSE, fscanf(), PARAM_DESC::HalfRange, ILLEGALCIRCULARSPEC, ILLEGALESSENTIALSPEC, ILLEGALMINMAXSPEC, PARAM_DESC::Max, PARAM_DESC::MidRange, PARAM_DESC::Min, PARAM_DESC::NonEssential, PARAM_DESC::Range, TOKENSIZE, and TRUE.

Referenced by ReadNormProtos().

00071                                                 { 
00072   int i;
00073   PARAM_DESC *ParamDesc;
00074   char Token[TOKENSIZE];
00075 
00076   ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
00077   for (i = 0; i < N; i++) {
00078     if (fscanf (File, "%s", Token) != 1)
00079       DoError (ILLEGALCIRCULARSPEC,
00080         "Illegal circular/linear specification");
00081     if (Token[0] == 'c')
00082       ParamDesc[i].Circular = TRUE;
00083     else
00084       ParamDesc[i].Circular = FALSE;
00085 
00086     if (fscanf (File, "%s", Token) != 1)
00087       DoError (ILLEGALESSENTIALSPEC,
00088         "Illegal essential/non-essential spec");
00089     if (Token[0] == 'e')
00090       ParamDesc[i].NonEssential = FALSE;
00091     else
00092       ParamDesc[i].NonEssential = TRUE;
00093     if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) !=
00094       2)
00095       DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
00096     ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
00097     ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
00098     ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
00099   }
00100   return (ParamDesc);
00101 }                                // ReadParamDesc

PROTOSTYLE ReadProtoStyle ( FILE *  File  ) 

Reads an single token from the specified text file and interprets it as a prototype specification.

Parameters:
File open text file to read prototype style from
Returns:
Prototype style read from text file
Note:
Exceptions: ILLEGALSTYLESPEC Illegal prototype style specification
Date:
6/8/89, DSJ, Created.

Definition at line 248 of file clusttool.cpp.

References automatic, DoError(), elliptical, fscanf(), ILLEGALSTYLESPEC, mixed, spherical, and TOKENSIZE.

Referenced by ReadPrototype().

00248                                       { 
00249   char Token[TOKENSIZE];
00250   PROTOSTYLE Style;
00251 
00252   if (fscanf (File, "%s", Token) != 1)
00253     DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
00254   switch (Token[0]) {
00255     case 's':
00256       Style = spherical;
00257       break;
00258     case 'e':
00259       Style = elliptical;
00260       break;
00261     case 'm':
00262       Style = mixed;
00263       break;
00264     case 'a':
00265       Style = automatic;
00266       break;
00267     default:
00268       Style = elliptical;
00269       DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
00270   }
00271   return (Style);
00272 }                                // ReadProtoStyle

PROTOTYPE* ReadPrototype ( FILE *  File,
UINT16  N 
)

Reads a textual description of a prototype from File.

Parameters:
File Open text file to read prototype from
N Number of dimensions used in prototype
Returns:
List of prototypes
File is usually 'tessdata/normproto'

Note:
Exceptions:
  • ILLEGALSIGNIFICANCESPEC
  • ILLEGALSAMPLECOUNT
  • ILLEGALMEANSPEC
  • ILLEGALVARIANCESPEC
  • ILLEGALDISTRIBUTION
Date:
6/6/89, DSJ, Created.

Definition at line 121 of file clusttool.cpp.

References proto::Cluster, D_random, proto::Distrib, DoError(), elliptical, FLOATUNION::Elliptical, Emalloc(), FALSE, fscanf(), ILLEGALDISTRIBUTION, ILLEGALMEANSPEC, ILLEGALSAMPLECOUNT, ILLEGALVARIANCESPEC, proto::LogMagnitude, proto::Magnitude, proto::Mean, mixed, normal, NULL, proto::NumSamples, PI, ReadNFloats(), ReadProtoStyle(), proto::Significant, spherical, FLOATUNION::Spherical, proto::Style, TOKENSIZE, proto::TotalMagnitude, TRUE, uniform, proto::Variance, and proto::Weight.

Referenced by ReadNormProtos().

00121                                                { 
00122   char Token[TOKENSIZE];
00123   int Status;
00124   PROTOTYPE *Proto;
00125   int SampleCount;
00126   int i;
00127 
00128   if ((Status = fscanf (File, "%s", Token)) == 1) {
00129     Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
00130     Proto->Cluster = NULL;
00131     if (Token[0] == 's')
00132       Proto->Significant = TRUE;
00133     else
00134       Proto->Significant = FALSE;
00135 
00136     Proto->Style = ReadProtoStyle (File);
00137 
00138     if ((fscanf (File, "%d", &SampleCount) != 1) || (SampleCount < 0))
00139       DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
00140     Proto->NumSamples = SampleCount;
00141 
00142     Proto->Mean = ReadNFloats (File, N, NULL);
00143     if (Proto->Mean == NULL)
00144       DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
00145 
00146     switch (Proto->Style) {
00147       case spherical:
00148         if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
00149           DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
00150         Proto->Magnitude.Spherical =
00151           1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
00152         Proto->TotalMagnitude =
00153           pow (Proto->Magnitude.Spherical, (double) N);
00154         Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
00155         Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
00156         Proto->Distrib = NULL;
00157         break;
00158       case elliptical:
00159         Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
00160         if (Proto->Variance.Elliptical == NULL)
00161           DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
00162         Proto->Magnitude.Elliptical =
00163           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00164         Proto->Weight.Elliptical =
00165           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00166         Proto->TotalMagnitude = 1.0;
00167         for (i = 0; i < N; i++) {
00168           Proto->Magnitude.Elliptical[i] =
00169             1.0 /
00170             sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
00171           Proto->Weight.Elliptical[i] =
00172             1.0 / Proto->Variance.Elliptical[i];
00173           Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
00174         }
00175         Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
00176         Proto->Distrib = NULL;
00177         break;
00178       case mixed:
00179         Proto->Distrib =
00180           (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
00181         for (i = 0; i < N; i++) {
00182           if (fscanf (File, "%s", Token) != 1)
00183             DoError (ILLEGALDISTRIBUTION,
00184               "Illegal prototype distribution");
00185           switch (Token[0]) {
00186             case 'n':
00187               Proto->Distrib[i] = normal;
00188               break;
00189             case 'u':
00190               Proto->Distrib[i] = uniform;
00191               break;
00192             case 'r':
00193               Proto->Distrib[i] = D_random;
00194               break;
00195             default:
00196               DoError (ILLEGALDISTRIBUTION,
00197                 "Illegal prototype distribution");
00198           }
00199         }
00200         Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
00201         if (Proto->Variance.Elliptical == NULL)
00202           DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
00203         Proto->Magnitude.Elliptical =
00204           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00205         Proto->Weight.Elliptical =
00206           (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
00207         Proto->TotalMagnitude = 1.0;
00208         for (i = 0; i < N; i++) {
00209           switch (Proto->Distrib[i]) {
00210             case normal:
00211               Proto->Magnitude.Elliptical[i] = 1.0 /
00212                 sqrt ((double)
00213                 (2.0 * PI * Proto->Variance.Elliptical[i]));
00214               Proto->Weight.Elliptical[i] =
00215                 1.0 / Proto->Variance.Elliptical[i];
00216               break;
00217             case uniform:
00218             case D_random:
00219               Proto->Magnitude.Elliptical[i] = 1.0 /
00220                 (2.0 * Proto->Variance.Elliptical[i]);
00221               break;
00222           }
00223           Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
00224         }
00225         Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
00226         break;
00227     }
00228     return (Proto);
00229   }
00230   else if (Status == EOF)
00231     return (NULL);
00232   else {
00233     DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
00234     return (NULL);
00235   }
00236 }                                // ReadPrototype

UINT16 ReadSampleSize ( FILE *  File  ) 

Reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters:
File open text file to read sample size from
Returns:
Sample size
Note:
Exceptions: ILLEGALSAMPLESIZE illegal format or range
Date:
6/6/89, DSJ, Created.

Definition at line 48 of file clusttool.cpp.

References DoError(), fscanf(), ILLEGALSAMPLESIZE, and MAXSAMPLESIZE.

Referenced by ReadNormProtos().

00048                                   { 
00049   int SampleSize;
00050 
00051   if ((fscanf (File, "%d", &SampleSize) != 1) ||
00052     (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
00053     DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
00054   return (SampleSize);
00055 }                                // ReadSampleSize

FLOAT32 UniformRandomNumber ( FLOAT32  MMin,
FLOAT32  MMax 
)

Computes a random number which comes from a uniform distribution over the range from MMin to MMax.

Parameters:
MMin lower range of uniform distribution
MMax upper range of uniform distribution
Returns:
Uniform random number
Note:
Exceptions: None
Date:
6/6/89, DSJ, Created.

Definition at line 491 of file clusttool.cpp.

References fake_drand48().

Referenced by ReadTrainingSamples(), SmearBulges(), and SmearExtremities().

00491                                                         { 
00492   double fake_drand48(); 
00493   FLOAT32 RandomNumber;
00494 
00495   RandomNumber = fake_drand48 ();
00496   return (MMin + (RandomNumber * (MMax - MMin)));
00497 }                                // UniformRandomNumber

void WriteNFloats ( FILE *  File,
UINT16  N,
FLOAT32  Array[] 
)

Writes a text representation of N floats from an array to a file.

Parameters:
File Open text file to write N floats to
N Number of floats to write
Array Array of floats to write
Returns:
None
All of the floats are placed on one line.
Note:
Exceptions: None
Date:
6/6/89, DSJ, Created.

Definition at line 404 of file clusttool.cpp.

Referenced by ComputeNormMatch(), and WritePrototype().

00404                                                       {
00405   int i;
00406 
00407   for (i = 0; i < N; i++)
00408     fprintf (File, " %9.6f", Array[i]);
00409   fprintf (File, "\n");
00410 }                                // WriteNFloats

void WriteParamDesc ( FILE *  File,
UINT16  N,
PARAM_DESC  ParamDesc[] 
)

Writes an array of dimension descriptors to the specified text file.

Parameters:
File Open text file to write param descriptors to
N Number of param descriptors to write
ParamDesc Array of param descriptors to write
Returns:
None
Note:
Exceptions: None
Date:
6/6/89, DSJ, Created.

Definition at line 322 of file clusttool.cpp.

Referenced by WriteNormProtos(), and WriteProtoList().

00322                                                                {
00323   int i;
00324 
00325   for (i = 0; i < N; i++) {
00326     if (ParamDesc[i].Circular)
00327       fprintf (File, "circular ");
00328     else
00329       fprintf (File, "linear   ");
00330 
00331     if (ParamDesc[i].NonEssential)
00332       fprintf (File, "non-essential ");
00333     else
00334       fprintf (File, "essential     ");
00335 
00336     fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
00337   }
00338 }                                // WriteParamDesc

void WriteProtoList ( FILE *  File,
UINT16  N,
PARAM_DESC  ParamDesc[],
LIST  ProtoList,
BOOL8  WriteSigProtos,
BOOL8  WriteInsigProtos 
)

Write textual descriptions of prototypes to a file.

Parameters:
File Open text file to write prototypes to
N Number of dimensions in feature space
ParamDesc Descriptions for each dimension
ProtoList List of prototypes to be written
WriteSigProtos TRUE to write out significant prototypes
WriteInsigProtos TRUE to write out insignificants
Writes a textual description of each prototype in the prototype list to the specified file. It also writes a file header which includes the number of dimensions in feature space and the descriptions for each dimension.
Date:
6/12/89, DSJ, Created.
Note:
New in v1.03, needed by training code

Definition at line 457 of file clusttool.cpp.

References first, iterate, proto::Significant, WriteParamDesc(), and WritePrototype().

Referenced by WriteClusteredTrainingSamples().

00464 {
00465   PROTOTYPE *Proto;
00466   
00467   /* write file header */
00468   fprintf(File,"%0d\n",N);
00469   WriteParamDesc(File,N,ParamDesc);
00470 
00471   /* write prototypes */
00472   iterate(ProtoList)
00473     {
00474       Proto = (PROTOTYPE *) first ( ProtoList );
00475       if (( Proto->Significant && WriteSigProtos ) ||
00476      ( ! Proto->Significant && WriteInsigProtos ) )
00477    WritePrototype( File, N, Proto );
00478     }
00479 }  /* WriteProtoList */

void WriteProtoStyle ( FILE *  File,
PROTOSTYLE  ProtoStyle 
)

Writes to the specified text file a word which represents the ProtoStyle, withouth appending CR to the end.

Parameters:
File open text file to write prototype style to
ProtoStyle prototype style to write
Returns:
None
Note:
Exceptions: None
Date:
6/8/89, DSJ, Created.

Definition at line 423 of file clusttool.cpp.

References automatic, elliptical, mixed, and spherical.

Referenced by WritePrototype().

00423                                                         { 
00424   switch (ProtoStyle) {
00425     case spherical:
00426       fprintf (File, "spherical");
00427       break;
00428     case elliptical:
00429       fprintf (File, "elliptical");
00430       break;
00431     case mixed:
00432       fprintf (File, "mixed");
00433       break;
00434     case automatic:
00435       fprintf (File, "automatic");
00436       break;
00437   }
00438 }                                // WriteProtoStyle

void WritePrototype ( FILE *  File,
UINT16  N,
PROTOTYPE Proto 
)

Writes a textual description of a prototype to the specified text file.

Parameters:
File open text file to write prototype to
N number of dimensions in feature space
Proto prototype to write out
Returns:
None
Note:
Exceptions: None
Date:
6/12/89, DSJ, Created.

Definition at line 352 of file clusttool.cpp.

References D_random, proto::Distrib, elliptical, FLOATUNION::Elliptical, proto::Mean, mixed, normal, proto::NumSamples, proto::Significant, spherical, FLOATUNION::Spherical, proto::Style, uniform, proto::Variance, WriteNFloats(), and WriteProtoStyle().

Referenced by WriteProtoList(), and WriteProtos().

00352                                                             { 
00353   int i;
00354 
00355   if (Proto->Significant)
00356     fprintf (File, "significant   ");
00357   else
00358     fprintf (File, "insignificant ");
00359   WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
00360   fprintf (File, "%6d\n\t", Proto->NumSamples);
00361   WriteNFloats (File, N, Proto->Mean);
00362   fprintf (File, "\t");
00363 
00364   switch (Proto->Style) {
00365     case spherical:
00366       WriteNFloats (File, 1, &(Proto->Variance.Spherical));
00367       break;
00368     case elliptical:
00369       WriteNFloats (File, N, Proto->Variance.Elliptical);
00370       break;
00371     case mixed:
00372       for (i = 0; i < N; i++)
00373       switch (Proto->Distrib[i]) {
00374         case normal:
00375           fprintf (File, " %9s", "normal");
00376           break;
00377         case uniform:
00378           fprintf (File, " %9s", "uniform");
00379           break;
00380         case D_random:
00381           fprintf (File, " %9s", "random");
00382           break;
00383       }
00384       fprintf (File, "\n\t");
00385       WriteNFloats (File, N, Proto->Variance.Elliptical);
00386   }
00387 }                                // WritePrototype


Generated on Wed Feb 28 19:49:19 2007 for Tesseract by  doxygen 1.5.1