training/mfTraining.cpp

Go to the documentation of this file.
00001 
00025 /*----------------------------------------------------------------------------
00026                Include Files and Type Defines
00027 ----------------------------------------------------------------------------**/
00028 #include "oldlist.h"
00029 #include "efio.h"
00030 #include "emalloc.h"
00031 #include "featdefs.h"
00032 #include "getopt.h"
00033 #include "ocrfeatures.h"
00034 #include "general.h"
00035 #include "clusttool.h"
00036 #include "cluster.h"
00037 #include "protos.h"
00038 #include "minmax.h"
00039 #include "debug.h"
00040 #include "const.h"
00041 #include "mergenf.h"
00042 #include "name2char.h"
00043 #include "intproto.h"
00044 #include "variables.h"
00045 #include "freelist.h"
00046 
00047 #include <string.h>
00048 #include <stdio.h>
00049 #include <math.h>
00050 
00051 #define MAXNAMESIZE  80
00052 #define MAX_NUM_SAMPLES 10000
00053 #define PROGRAM_FEATURE_TYPE "mf"
00054 #define MINSD (1.0f / 128.0f)
00055 
00056 int   row_number;                /* cjn: fixes link problem */
00057 
00066 typedef struct
00067 {
00068   char      *Label;
00069   LIST      List;
00070 }
00071 LABELEDLISTNODE, *LABELEDLIST;
00072 
00077 typedef struct
00078 {
00079    char* Label;
00080    int   NumMerged[MAX_NUM_PROTOS];
00081    CLASS_TYPE Class;
00082 }MERGE_CLASS_NODE;
00083 
00088 typedef MERGE_CLASS_NODE* MERGE_CLASS;
00089 
00090 #define round(x,frag)(floor(x/frag+.5)*frag)
00091 
00092 
00093 /*----------------------------------------------------------------------------
00094                Public Function Prototypes
00095 ----------------------------------------------------------------------------**/
00096 int main (
00097      int argc,
00098      char   **argv);
00099 
00100 /*----------------------------------------------------------------------------
00101                Private Function Prototypes
00102 ----------------------------------------------------------------------------**/
00103 void ParseArguments(
00104 int   argc,
00105 char  **argv);
00106 
00107 char *GetNextFilename ();
00108 
00109 LIST ReadTrainingSamples (
00110      FILE   *File);
00111 
00112 LABELEDLIST FindList (
00113      LIST   List,
00114      char   *Label);
00115 
00116 MERGE_CLASS FindClass (
00117      LIST   List,
00118      char   *Label);
00119 
00120 LABELEDLIST NewLabeledList (
00121      char   *Label);
00122 
00123 MERGE_CLASS NewLabeledClass (
00124      char   *Label);
00125 
00126 void WriteTrainingSamples (
00127      char   *Directory,
00128      LIST   CharList);
00129 
00130 void WriteClusteredTrainingSamples (
00131      char   *Directory,
00132      LIST   ProtoList,
00133     CLUSTERER *Clusterer,
00134     LABELEDLIST CharSample);
00135 
00136 void WriteMergedTrainingSamples(
00137     char *Directory,
00138    LIST ClassList);
00139 
00140 void WriteMicrofeat(
00141     char *Directory,
00142    LIST  ClassList);
00143 
00144 void WriteProtos(
00145    FILE* File,
00146    MERGE_CLASS MergeClass);
00147 
00148 void WriteConfigs(
00149    FILE* File,
00150    CLASS_TYPE Class);
00151 
00152 void FreeTrainingSamples (
00153      LIST   CharList);
00154 
00155 void FreeLabeledClassList (
00156      LIST   ClassList);
00157 
00158 void FreeLabeledList (
00159      LABELEDLIST  LabeledList);
00160 
00161 CLUSTERER *SetUpForClustering(
00162      LABELEDLIST  CharSample);
00163 /*
00164 PARAMDESC *ConvertToPARAMDESC(
00165    PARAM_DESC* Param_Desc,
00166    int N);
00167 */
00168 LIST RemoveInsignificantProtos(
00169    LIST ProtoList,
00170    BOOL8 KeepSigProtos,
00171    BOOL8 KeepInsigProtos,
00172    int N);
00173 
00174 void CleanUpUnusedData(
00175    LIST ProtoList);
00176 
00177 void Normalize (
00178    float  *Values);
00179 
00180 void SetUpForFloat2Int(
00181    LIST LabeledClassList);
00182 
00183 void WritePFFMTable(
00184    INT_TEMPLATES Templates,
00185    const char* filename);
00186 
00187 //--------------Global Data Definitions and Declarations--------------
00188 static char FontName[MAXNAMESIZE];
00189 // globals used for parsing command line arguments
00190 static char *Directory = NULL;
00191 static int  MaxNumSamples = MAX_NUM_SAMPLES;
00192 static int  Argc;
00193 static char **Argv;
00194 
00195 // globals used to control what information is saved in the output file
00196 static BOOL8      ShowAllSamples = FALSE;
00197 static BOOL8      ShowSignificantProtos = TRUE;
00198 static BOOL8      ShowInsignificantProtos = FALSE;
00199 
00207 static CLUSTERCONFIG Config =
00208 {
00209    elliptical, 0.40, 0.05, 1.0, 1e-6
00210 };
00211 
00212 static FLOAT32 RoundingAccuracy = 0.0;
00213 
00214 /*----------------------------------------------------------------------------
00215                   Public Code
00216 -----------------------------------------------------------------------------*/
00217 /*---------------------------------------------------------------------------*/
00218 
00252 int main (
00253      int argc,
00254      char   **argv)
00255 {
00256    char  *PageName;
00257    FILE  *TrainingPage;
00258    FILE  *OutFile;
00259    LIST  CharList;
00260    CLUSTERER   *Clusterer = NULL;
00261    LIST     ProtoList = NIL;
00262    LABELEDLIST CharSample;
00263    PROTOTYPE   *Prototype;
00264    LIST     ClassList = NIL;
00265    int      Cid, Pid;
00266    PROTO    Proto;
00267    PROTO_STRUCT   DummyProto;
00268    BIT_VECTOR  Config2;
00269    MERGE_CLASS MergeClass;
00270    INT_TEMPLATES  IntTemplates;
00271    LIST pCharList, pProtoList;
00272    char Filename[MAXNAMESIZE];
00273 
00274    ParseArguments (argc, argv);
00275    InitFastTrainerVars ();
00276    InitSubfeatureVars ();
00277    while ((PageName = GetNextFilename()) != NULL)
00278    {
00279       printf ("\nReading %s ...", PageName);
00280       TrainingPage = Efopen (PageName, "r");
00281       CharList = ReadTrainingSamples (TrainingPage);
00282       fclose (TrainingPage);
00283       //WriteTrainingSamples (Directory, CharList);
00284       pCharList = CharList;
00285       iterate(pCharList)
00286       {
00287          //Cluster
00288          CharSample = (LABELEDLIST) first (pCharList);
00289          printf ("\nClustering %s ...", CharSample->Label);
00290          Clusterer = SetUpForClustering(CharSample);
00291          ProtoList = ClusterSamples(Clusterer, &Config);
00292          //WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample);
00293          CleanUpUnusedData(ProtoList);
00294 
00295          //Merge
00296          ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
00297             ShowInsignificantProtos, Clusterer->SampleSize);
00298          FreeClusterer(Clusterer);
00299          MergeClass = FindClass (ClassList, CharSample->Label);
00300          if (MergeClass == NULL)
00301          {
00302             MergeClass = NewLabeledClass (CharSample->Label);
00303             ClassList = push (ClassList, MergeClass);
00304          }
00305          Cid = AddConfigToClass(MergeClass->Class);
00306          pProtoList = ProtoList;
00307          iterate (pProtoList)
00308          {
00309             Prototype = (PROTOTYPE *) first (pProtoList);
00310 
00311             // see if proto can be approximated by existing proto
00312             Pid = FindClosestExistingProto (MergeClass->Class, MergeClass->NumMerged, Prototype);
00313             if (Pid == NO_PROTO)
00314             {
00315                Pid = AddProtoToClass (MergeClass->Class);
00316                Proto = ProtoIn (MergeClass->Class, Pid);
00317                MakeNewFromOld (Proto, Prototype);
00318                MergeClass->NumMerged[Pid] = 1;
00319             }
00320             else
00321             {
00322                MakeNewFromOld (&DummyProto, Prototype);
00323                ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
00324                   (FLOAT32) MergeClass->NumMerged[Pid], 1.0,
00325                   ProtoIn (MergeClass->Class, Pid));
00326                MergeClass->NumMerged[Pid] ++;
00327             }
00328             Config2 = ConfigIn (MergeClass->Class, Cid);
00329             AddProtoToConfig (Pid, Config2);
00330          }//iterate (pProtoList)
00331          FreeProtoList (&ProtoList);
00332       }//iterate (pCharList)
00333       FreeTrainingSamples (CharList);
00334       printf ("\n");
00335    }//while PageName
00336 
00337    //WriteMergedTrainingSamples(Directory,ClassList);
00338    WriteMicrofeat(Directory, ClassList);
00339    InitIntProtoVars ();
00340    InitPrototypes ();
00341    SetUpForFloat2Int(ClassList);
00342    IntTemplates = CreateIntTemplates(TrainingData);
00343    strcpy (Filename, "");
00344    if (Directory != NULL)
00345    {
00346       strcat (Filename, Directory);
00347       strcat (Filename, "/");
00348    }
00349    strcat (Filename, "inttemp");
00350 #ifdef __UNIX__
00351    OutFile = Efopen (Filename, "w");
00352 #else
00353    OutFile = Efopen (Filename, "wb");  // BINARY file
00354 #endif
00355    WriteIntTemplates(OutFile, IntTemplates);
00356    fclose (OutFile);
00357 
00358    // Now create pffmtable.
00359    WritePFFMTable(IntTemplates, "pffmtable");
00360    printf ("\nDone!\n"); 
00361    FreeLabeledClassList (ClassList);
00362    return 0;
00363 }// main
00364 
00365 
00366 /*----------------------------------------------------------------------------
00367                      Private Code
00368 ----------------------------------------------------------------------------**/
00369 /*---------------------------------------------------------------------------*/
00370 
00404 void ParseArguments(
00405    int   argc,
00406    char  **argv)
00407 {
00408    int      Option;
00409    int      ParametersRead;
00410    BOOL8    Error;
00411    extern char *optarg;
00412 
00413    Error = FALSE;
00414    Argc = argc;
00415    Argv = argv;
00416    while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
00417    {
00418       switch ( Option )
00419       {
00420       case 'n':
00421          // sscanf(optarg,"%d", &ParametersRead);
00422          ShowInsignificantProtos = FALSE;
00423          break;
00424       case 'p':
00425          // sscanf(optarg,"%d", &ParametersRead);
00426          ShowSignificantProtos = FALSE;
00427          break;
00428       case 'd':
00429          ShowAllSamples = FALSE;
00430          break;
00431       case 'C':
00432          ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) );
00433          if ( ParametersRead != 1 ) Error = TRUE;
00434          else if ( Config.Confidence > 1 ) Config.Confidence = 1;
00435          else if ( Config.Confidence < 0 ) Config.Confidence = 0;
00436          break;
00437       case 'I':
00438          ParametersRead = sscanf( optarg, "%f", &(Config.Independence) );
00439          if ( ParametersRead != 1 ) Error = TRUE;
00440          else if ( Config.Independence > 1 ) Config.Independence = 1;
00441          else if ( Config.Independence < 0 ) Config.Independence = 0;
00442          break;
00443       case 'M':
00444          ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) );
00445          if ( ParametersRead != 1 ) Error = TRUE;
00446          else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
00447          else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
00448          break;
00449       case 'B':
00450          ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) );
00451          if ( ParametersRead != 1 ) Error = TRUE;
00452          else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
00453          else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
00454          break;
00455       case 'R':
00456          ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy );
00457          if ( ParametersRead != 1 ) Error = TRUE;
00458          else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
00459          else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
00460          break;
00461       case 'S':
00462          switch ( optarg[0] )
00463          {
00464          case 's': Config.ProtoStyle = spherical; break;
00465          case 'e': Config.ProtoStyle = elliptical; break;
00466          case 'm': Config.ProtoStyle = mixed; break;
00467          case 'a': Config.ProtoStyle = automatic; break;
00468          default: Error = TRUE;
00469          }
00470          break;
00471          case 'D':
00472             Directory = optarg;
00473             break;
00474          case 'N':
00475             if (sscanf (optarg, "%d", &MaxNumSamples) != 1 ||
00476                MaxNumSamples <= 0)
00477                Error = TRUE;
00478             break;
00479          case '?':
00480             Error = TRUE;
00481             break;
00482       }
00483       if ( Error )
00484       {
00485          fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
00486          fprintf (stderr, "\t[-S ProtoStyle]\n");
00487          fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
00488          fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
00489          exit (2);
00490       }
00491    }//while
00492 }// ParseArguments
00493 
00494 /*---------------------------------------------------------------------------*/
00509 char *GetNextFilename ()
00510 {
00511    if (optind < Argc)
00512       return (Argv [optind++]);
00513    else
00514       return (NULL);
00515 
00516 }// GetNextFilename
00517 
00518 /*---------------------------------------------------------------------------*/
00519 
00530 LIST ReadTrainingSamples (
00531      FILE   *File)
00532 {
00533    char        CharName[MAXNAMESIZE];
00534    LABELEDLIST CharSample;
00535   FEATURE_SET FeatureSamples;
00536    LIST        TrainingSamples = NIL;
00537    CHAR_DESC      CharDesc;
00538    int         Type, i;
00539 
00540    while (fscanf (File, "%s %s", FontName, CharName) == 2) {
00541       CharSample = FindList (TrainingSamples, CharName);
00542       if (CharSample == NULL) {
00543          CharSample = NewLabeledList (CharName);
00544          TrainingSamples = push (TrainingSamples, CharSample);
00545       }
00546       CharDesc = ReadCharDescription (File);
00547       Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
00548       FeatureSamples = FeaturesOfType(CharDesc, Type);
00549 
00550       for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
00551          FEATURE f = FeatureSamples->Features[feature];
00552          for (int dim =0; dim < f->Type->NumParams; ++dim)
00553             f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
00554       }//for feature
00555       CharSample->List = push (CharSample->List, FeatureSamples);
00556       for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
00557          if (Type != i)
00558             FreeFeatureSet (FeaturesOfType (CharDesc, i));
00559       free (CharDesc);
00560     }//while
00561    return (TrainingSamples);
00562 
00563 }// ReadTrainingSamples
00564 
00565 /*---------------------------------------------------------------------------*/
00566 
00578 LABELEDLIST FindList (
00579      LIST   List,
00580      char   *Label)
00581 {
00582    LABELEDLIST LabeledList;
00583 
00584    iterate (List)
00585     {
00586       LabeledList = (LABELEDLIST) first (List);
00587       if (strcmp (LabeledList->Label, Label) == 0)
00588          return (LabeledList);
00589     }
00590    return (NULL);
00591 
00592 }// FindList
00593 
00594 /*----------------------------------------------------------------------------*/
00605 MERGE_CLASS FindClass (
00606      LIST   List,
00607      char   *Label)
00608 {
00609    MERGE_CLASS MergeClass;
00610 
00611    iterate (List)
00612     {
00613       MergeClass = (MERGE_CLASS) first (List);
00614       if (strcmp (MergeClass->Label, Label) == 0)
00615          return (MergeClass);
00616     }
00617    return (NULL);
00618 
00619 }// FindClass
00620 
00621 /*---------------------------------------------------------------------------*/
00622 
00633 LABELEDLIST NewLabeledList (
00634      char   *Label)
00635 {
00636    LABELEDLIST LabeledList;
00637 
00638    LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
00639    LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
00640    strcpy (LabeledList->Label, Label);
00641    LabeledList->List = NIL;
00642    return (LabeledList);
00643 
00644 }  /* NewLabeledList */
00645 
00646 /*---------------------------------------------------------------------------*/
00647 
00654 void WritePFFMTable(
00655       INT_TEMPLATES Templates, const char* filename) {
00656 
00657   FILE* fp = Efopen(filename, "wb");
00658 
00659   // Write out each class
00660   for (int i = 0; i < NumClassesIn (Templates); i++) {
00661     int MaxLength = 0;
00662     INT_CLASS Class = ClassForIndex (Templates, i);
00663     for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
00664       if (LengthForConfigId (Class, ConfigId) > MaxLength)
00665         MaxLength = LengthForConfigId (Class, ConfigId);
00666     }
00667     fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength);
00668   }
00669   fclose(fp);
00670 }// WritePFFMTable
00671 
00678 MERGE_CLASS NewLabeledClass (
00679      char   *Label)
00680 {
00681    MERGE_CLASS MergeClass;
00682 
00683    MergeClass = (MERGE_CLASS) Emalloc (sizeof (MERGE_CLASS_NODE));
00684    MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
00685    strcpy (MergeClass->Label, Label);
00686    MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
00687    return (MergeClass);
00688 
00689 }  /* NewLabeledClass */
00690 
00691 /*---------------------------------------------------------------------------*/
00692 
00693 /*
00694 ** Parameters:
00695 **    Directory   directory to place sample files into
00696 **    FontList list of fonts used in the training samples
00697 ** Globals:
00698 **    MaxNumSamples  max number of samples per class to write
00699 ** Operation:
00700 **    This routine writes the specified samples into files which
00701 **    are organized according to the font name and character name
00702 **    of the samples.
00703 ** Return: none
00704 ** Exceptions: none
00705 ** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
00706 */
00707 void WriteTrainingSamples (
00708      char   *Directory,
00709      LIST   CharList)
00710 {
00711    LABELEDLIST CharSample;
00712    FEATURE_SET FeatureSet;
00713    LIST     FeatureList;
00714    FILE     *File;
00715    char     Filename[MAXNAMESIZE];
00716    int      NumSamples;
00717 
00718    iterate (CharList)      // iterate thru all of the fonts
00719    {
00720       CharSample = (LABELEDLIST) first (CharList);
00721 
00722       // construct the full pathname for the current samples file
00723       strcpy (Filename, "");
00724       if (Directory != NULL)
00725       {
00726          strcat (Filename, Directory);
00727          strcat (Filename, "/");
00728       }
00729       strcat (Filename, FontName);
00730       strcat (Filename, "/");
00731       strcat (Filename, CharSample->Label);
00732       strcat (Filename, ".");
00733       strcat (Filename, PROGRAM_FEATURE_TYPE);
00734       printf ("\nWriting %s ...", Filename);
00735 
00736       /* if file does not exist, create a new one with an appropriate
00737       header; otherwise append samples to the existing file */
00738       File = fopen (Filename, "r");
00739       if (File == NULL)
00740       {
00741          File = Efopen (Filename, "w");
00742          WriteOldParamDesc
00743             (File, DefinitionOf (ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)));
00744       }
00745       else
00746       {
00747          fclose (File);
00748          File = Efopen (Filename, "a");
00749       }
00750 
00751       // append samples onto the file
00752       FeatureList = CharSample->List;
00753       NumSamples = 0;
00754       iterate (FeatureList)
00755       {
00756          if (NumSamples >= MaxNumSamples) break;
00757 
00758          FeatureSet = (FEATURE_SET) first (FeatureList);
00759          WriteFeatureSet (File, FeatureSet);
00760          NumSamples++;
00761       }
00762       fclose (File);
00763    }
00764 }// WriteTrainingSamples
00765 
00766 
00767 /*----------------------------------------------------------------------------*/
00768 
00784 void WriteClusteredTrainingSamples (
00785      char   *Directory,
00786      LIST   ProtoList,
00787     CLUSTERER *Clusterer,
00788     LABELEDLIST CharSample)
00789 {
00790    FILE     *File;
00791    char     Filename[MAXNAMESIZE];
00792 
00793    strcpy (Filename, "");
00794    if (Directory != NULL)
00795    {
00796       strcat (Filename, Directory);
00797       strcat (Filename, "/");
00798    }
00799    strcat (Filename, FontName);
00800    strcat (Filename, "/");
00801    strcat (Filename, CharSample->Label);
00802    strcat (Filename, ".");
00803    strcat (Filename, PROGRAM_FEATURE_TYPE);
00804    strcat (Filename, ".p");
00805    printf ("\nWriting %s ...", Filename);
00806 #ifdef __UNIX__
00807    File = Efopen (Filename, "w");
00808 #else
00809    File = Efopen (Filename, "wb"); // BINARY file
00810 #endif
00811    WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc,
00812       ProtoList, ShowSignificantProtos, ShowInsignificantProtos);
00813    fclose (File);
00814 
00815 }  /* WriteClusteredTrainingSamples */
00816 
00817 /*---------------------------------------------------------------------------*/
00824 void WriteMergedTrainingSamples(
00825     char *Directory,
00826    LIST ClassList)
00827 
00828 {
00829    FILE     *File;
00830    char     Filename[MAXNAMESIZE];
00831    MERGE_CLASS MergeClass;
00832 
00833    iterate (ClassList)
00834    {
00835       MergeClass = (MERGE_CLASS) first (ClassList);
00836       strcpy (Filename, "");
00837       if (Directory != NULL)
00838       {
00839          strcat (Filename, Directory);
00840          strcat (Filename, "/");
00841       }
00842       strcat (Filename, "Merged/");
00843       strcat (Filename, MergeClass->Label);
00844       strcat (Filename, PROTO_SUFFIX);
00845       printf ("\nWriting Merged %s ...", Filename);
00846       File = Efopen (Filename, "w");
00847       WriteOldProtoFile (File, MergeClass->Class);
00848       fclose (File);
00849 
00850       strcpy (Filename, "");
00851       if (Directory != NULL)
00852       {
00853          strcat (Filename, Directory);
00854          strcat (Filename, "/");
00855       }
00856       strcat (Filename, "Merged/");
00857       strcat (Filename, MergeClass->Label);
00858       strcat (Filename, CONFIG_SUFFIX);
00859       printf ("\nWriting Merged %s ...", Filename);
00860       File = Efopen (Filename, "w");
00861       WriteOldConfigFile (File, MergeClass->Class);
00862       fclose (File);
00863    }//iterate (ClassList)
00864 
00865 }// WriteMergedTrainingSamples
00866 
00867 /*--------------------------------------------------------------------------*/
00874 void WriteMicrofeat(
00875     char *Directory,
00876    LIST  ClassList)
00877 
00878 {
00879    FILE     *File;
00880    char     Filename[MAXNAMESIZE];
00881    MERGE_CLASS MergeClass;
00882 
00883    strcpy (Filename, "");
00884    if (Directory != NULL)
00885    {
00886       strcat (Filename, Directory);
00887       strcat (Filename, "/");
00888    }
00889    strcat (Filename, "Microfeat");
00890    File = Efopen (Filename, "w");
00891    printf ("\nWriting Merged %s ...", Filename);
00892    iterate(ClassList)
00893    {
00894       MergeClass = (MERGE_CLASS) first (ClassList);
00895       WriteProtos(File, MergeClass);
00896       WriteConfigs(File, MergeClass->Class);
00897    }
00898    fclose (File);
00899 } // WriteMicrofeat
00900 
00901 /*---------------------------------------------------------------------------*/
00908 void WriteProtos(
00909    FILE* File,
00910    MERGE_CLASS MergeClass)
00911 {
00912    float Values[3];
00913    int i;
00914    PROTO Proto;
00915 
00916    fprintf(File, "%c\n", NameToChar(MergeClass->Label));
00917    fprintf(File, "%d\n", NumProtosIn(MergeClass->Class));
00918    for(i=0; i < NumProtosIn(MergeClass->Class); i++)
00919    {
00920       Proto = ProtoIn(MergeClass->Class,i);
00921       fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", ProtoX(Proto), ProtoY(Proto),
00922          ProtoLength(Proto), ProtoAngle(Proto));
00923       Values[0] = ProtoX(Proto);
00924       Values[1] = ProtoY(Proto);
00925       Values[2] = ProtoAngle(Proto);
00926       Normalize(Values);
00927       fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
00928    }
00929 } // WriteProtos
00930 
00931 /*----------------------------------------------------------------------------*/
00938 void WriteConfigs(
00939    FILE* File,
00940    CLASS_TYPE Class)
00941 {
00942    BIT_VECTOR Config;
00943    int i, j, WordsPerConfig;
00944 
00945    WordsPerConfig = WordsInVectorOfSize(NumProtosIn(Class));
00946    fprintf(File, "%d %d\n", NumConfigsIn(Class),WordsPerConfig);
00947    for(i=0; i < NumConfigsIn(Class); i++)
00948    {
00949       Config = ConfigIn(Class,i);
00950       for(j=0; j < WordsPerConfig; j++)
00951          fprintf(File, "%08x ", Config[j]);
00952       fprintf(File, "\n");
00953    }
00954    fprintf(File, "\n");
00955 } // WriteConfigs
00956 
00957 /*---------------------------------------------------------------------------*/
00958 
00969 void FreeTrainingSamples (
00970      LIST   CharList)
00971 {
00972    LABELEDLIST CharSample;
00973    FEATURE_SET FeatureSet;
00974    LIST     FeatureList;
00975 
00976 
00977    printf ("\nFreeTrainingSamples...");
00978    iterate (CharList)      /* iterate thru all of the fonts */
00979    {
00980       CharSample = (LABELEDLIST) first (CharList);
00981       FeatureList = CharSample->List;
00982       iterate (FeatureList)   /* iterate thru all of the classes */
00983       {
00984          FeatureSet = (FEATURE_SET) first (FeatureList);
00985          FreeFeatureSet (FeatureSet);
00986       }
00987       FreeLabeledList (CharSample);
00988    }
00989    destroy (CharList);
00990 
00991 }// FreeTrainingSamples
00992 
00993 /*-----------------------------------------------------------------------------*/
01014 void FreeLabeledClassList (
01015      LIST   ClassList)
01016 {
01017    MERGE_CLASS MergeClass;
01018 
01019    iterate (ClassList)     /* iterate thru all of the fonts */
01020    {
01021       MergeClass = (MERGE_CLASS) first (ClassList);
01022       free (MergeClass->Label);
01023       FreeClass(MergeClass->Class);
01024       free (MergeClass);
01025    }
01026    destroy (ClassList);
01027 
01028 }  /* FreeLabeledClassList */
01029 
01030 /*---------------------------------------------------------------------------*/
01031 
01042 void FreeLabeledList (
01043      LABELEDLIST  LabeledList)
01044 {
01045    destroy (LabeledList->List);
01046    free (LabeledList->Label);
01047    free (LabeledList);
01048 
01049 }// FreeLabeledList
01050 
01051 /*---------------------------------------------------------------------------*/
01052 
01053 /*
01054 ** Parameters:
01055 **    CharSample: LABELEDLIST that holds all the feature information for a
01056 **    given character.
01057 ** Globals:
01058 **    None
01059 ** Operation:
01060 **    This routine reads samples from a LABELEDLIST and enters
01061 **    those samples into a clusterer data structure.  This
01062 **    data structure is then returned to the caller.
01063 ** Return:
01064 **    Pointer to new clusterer data structure.
01065 ** Exceptions:
01066 **    None
01067 ** History:
01068 **    8/16/89, DSJ, Created.
01069 */
01070 
01071 CLUSTERER *SetUpForClustering(
01072      LABELEDLIST  CharSample)
01073 {
01074    UINT16   N;
01075    int      i, j;
01076    FLOAT32  *Sample = NULL;
01077    CLUSTERER   *Clusterer;
01078    INT32    CharID;
01079    LIST FeatureList = NULL;
01080    FEATURE_SET FeatureSet = NULL;
01081    FEATURE_DESC FeatureDesc = NULL;
01082 // PARAM_DESC* ParamDesc;
01083 
01084    FeatureDesc = DefinitionOf(ShortNameToFeatureType(PROGRAM_FEATURE_TYPE));
01085    N = FeatureDesc->NumParams;
01086    //ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
01087    Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
01088 // free(ParamDesc);
01089 
01090    FeatureList = CharSample->List;
01091    CharID = 0;
01092    iterate(FeatureList)
01093    {
01094       if (CharID >= MaxNumSamples) break;
01095 
01096       FeatureSet = (FEATURE_SET) first (FeatureList);
01097       for (i=0; i < FeatureSet->MaxNumFeatures; i++)
01098       {
01099          if (Sample == NULL)
01100             Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01101          for (j=0; j < N; j++)
01102             if (RoundingAccuracy != 0.0)
01103                Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
01104             else
01105                Sample[j] = FeatureSet->Features[i]->Params[j];
01106             MakeSample (Clusterer, Sample, CharID);
01107       }//for i
01108       CharID++;
01109    }//iterate (FeatureList)
01110    if ( Sample != NULL ) free( Sample );
01111    return( Clusterer );
01112 
01113 }// SetUpForClustering
01114 
01115 /*------------------------------------------------------------------------*/
01122 LIST RemoveInsignificantProtos(
01123    LIST ProtoList,
01124    BOOL8 KeepSigProtos,
01125    BOOL8 KeepInsigProtos,
01126    int N)
01127 
01128 {
01129    LIST NewProtoList = NIL;
01130    LIST pProtoList;
01131    PROTOTYPE* Proto;
01132    PROTOTYPE* NewProto;
01133    int i;
01134 
01135    pProtoList = ProtoList;
01136    iterate(pProtoList)
01137    {
01138       Proto = (PROTOTYPE *) first (pProtoList);
01139       if ((Proto->Significant && KeepSigProtos) ||
01140          (!Proto->Significant && KeepInsigProtos))
01141       {
01142          NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
01143 
01144          NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01145          NewProto->Significant = Proto->Significant;
01146          NewProto->Style = Proto->Style;
01147          NewProto->NumSamples = Proto->NumSamples;
01148          NewProto->Cluster = NULL;
01149          NewProto->Distrib = NULL;
01150 
01151          for (i=0; i < N; i++)
01152             NewProto->Mean[i] = Proto->Mean[i];
01153          if (Proto->Variance.Elliptical != NULL)
01154          {
01155             NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01156             for (i=0; i < N; i++)
01157                NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
01158          }
01159          else
01160             NewProto->Variance.Elliptical = NULL;
01161          //---------------------------------------------
01162          if (Proto->Magnitude.Elliptical != NULL)
01163          {
01164             NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01165             for (i=0; i < N; i++)
01166                NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
01167          }
01168          else
01169             NewProto->Magnitude.Elliptical = NULL;
01170          //------------------------------------------------
01171          if (Proto->Weight.Elliptical != NULL)
01172          {
01173             NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01174             for (i=0; i < N; i++)
01175                NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
01176          }
01177          else
01178             NewProto->Weight.Elliptical = NULL;
01179 
01180          NewProto->TotalMagnitude = Proto->TotalMagnitude;
01181          NewProto->LogMagnitude = Proto->LogMagnitude;
01182          NewProtoList = push_last(NewProtoList, NewProto);
01183       }//if KeepSigProtos or KeepInsigProtos
01184    }//iterate (pProtoList)
01185 
01186    //FreeProtoList (ProtoList);
01187    return (NewProtoList);
01188 }// RemoveInsignificantProtos
01189 
01190 /*-----------------------------------------------------------------------------*/
01197 void CleanUpUnusedData(
01198    LIST ProtoList)
01199 {
01200    PROTOTYPE* Prototype;
01201 
01202    iterate(ProtoList)
01203    {
01204       Prototype = (PROTOTYPE *) first (ProtoList);
01205       if(Prototype->Variance.Elliptical != NULL)
01206       {
01207          memfree(Prototype->Variance.Elliptical);
01208          Prototype->Variance.Elliptical = NULL;
01209       }
01210       if(Prototype->Magnitude.Elliptical != NULL)
01211       {
01212          memfree(Prototype->Magnitude.Elliptical);
01213          Prototype->Magnitude.Elliptical = NULL;
01214       }
01215       if(Prototype->Weight.Elliptical != NULL)
01216       {
01217          memfree(Prototype->Weight.Elliptical);
01218          Prototype->Weight.Elliptical = NULL;
01219       }
01220    }
01221 }
01222 
01223 /*--------------------------------------------------------------------------*/
01229 void Normalize (
01230    float  *Values)
01231 {
01232    register float Slope;
01233    register float Intercept;
01234    register float Normalizer;
01235 
01236    Slope      = tan (Values [2] * 2 * PI);
01237    Intercept  = Values [1] - Slope * Values [0];
01238    Normalizer = 1 / sqrt (Slope * Slope + 1.0);
01239 
01240    Values [0] = Slope * Normalizer;
01241    Values [1] = - Normalizer;
01242    Values [2] = Intercept * Normalizer;
01243 }// Normalize
01244 
01251 void SetUpForFloat2Int(
01252    LIST LabeledClassList)
01253 {
01254    MERGE_CLASS MergeClass;
01255    CLASS_TYPE     Class;
01256    int            NumProtos;
01257    int            NumConfigs;
01258    int            NumWords;
01259    int            i, j;
01260    float       Values[3];
01261    PROTO       NewProto;
01262    PROTO       OldProto;
01263    BIT_VECTOR     NewConfig;
01264    BIT_VECTOR     OldConfig;
01265 
01266    printf("Float2Int ...");
01267 
01268    iterate(LabeledClassList)
01269    {
01270       MergeClass = (MERGE_CLASS) first (LabeledClassList);
01271       Class = &TrainingData[NameToChar(MergeClass->Label)];
01272       NumProtos = NumProtosIn(MergeClass->Class);
01273       NumConfigs = NumConfigsIn(MergeClass->Class);
01274 
01275       NumProtosIn(Class) = NumProtos;
01276       Class->MaxNumProtos = NumProtos;
01277       Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
01278       for(i=0; i < NumProtos; i++)
01279       {
01280          NewProto = ProtoIn(Class, i);
01281          OldProto = ProtoIn(MergeClass->Class, i);
01282          Values[0] = ProtoX(OldProto);
01283          Values[1] = ProtoY(OldProto);
01284          Values[2] = ProtoAngle(OldProto);
01285          Normalize(Values);
01286          ProtoX(NewProto) = ProtoX(OldProto);
01287          ProtoY(NewProto) = ProtoY(OldProto);
01288          ProtoLength(NewProto) = ProtoLength(OldProto);
01289          ProtoAngle(NewProto) = ProtoAngle(OldProto);
01290          CoefficientA(NewProto) = Values[0];
01291          CoefficientB(NewProto) = Values[1];
01292          CoefficientC(NewProto) = Values[2];
01293       }//for i
01294 
01295       NumConfigsIn(Class) = NumConfigs;
01296       Class->MaxNumConfigs = NumConfigs;
01297       Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
01298       NumWords = WordsInVectorOfSize(NumProtos);
01299       for(i=0; i < NumConfigs; i++)
01300       {
01301          NewConfig = NewBitVector(NumProtos);
01302          OldConfig = ConfigIn(MergeClass->Class, i);
01303          for(j=0; j < NumWords; j++)
01304             NewConfig[j] = OldConfig[j];
01305          ConfigIn(Class, i) = NewConfig;
01306       }
01307    }//iterate (LabeledClassList)
01308 }// SetUpForFloat2Int

Generated on Wed Feb 28 19:49:12 2007 for Tesseract by  doxygen 1.5.1