00001
00025
00026
00027
00028 #include "oldlist.h"
00029 #include "efio.h"
00030 #include "emalloc.h"
00031 #include "featdefs.h"
00032 #include "getopt.h"
00033 #include "ocrfeatures.h"
00034 #include "general.h"
00035 #include "clusttool.h"
00036 #include "cluster.h"
00037 #include "protos.h"
00038 #include "minmax.h"
00039 #include "debug.h"
00040 #include "const.h"
00041 #include "mergenf.h"
00042 #include "name2char.h"
00043 #include "intproto.h"
00044 #include "variables.h"
00045 #include "freelist.h"
00046
00047 #include <string.h>
00048 #include <stdio.h>
00049 #include <math.h>
00050
00051 #define MAXNAMESIZE 80
00052 #define MAX_NUM_SAMPLES 10000
00053 #define PROGRAM_FEATURE_TYPE "mf"
00054 #define MINSD (1.0f / 128.0f)
00055
00056 int row_number;
00057
00066 typedef struct
00067 {
00068 char *Label;
00069 LIST List;
00070 }
00071 LABELEDLISTNODE, *LABELEDLIST;
00072
00077 typedef struct
00078 {
00079 char* Label;
00080 int NumMerged[MAX_NUM_PROTOS];
00081 CLASS_TYPE Class;
00082 }MERGE_CLASS_NODE;
00083
00088 typedef MERGE_CLASS_NODE* MERGE_CLASS;
00089
00090 #define round(x,frag)(floor(x/frag+.5)*frag)
00091
00092
00093
00094
00095
00096 int main (
00097 int argc,
00098 char **argv);
00099
00100
00101
00102
00103 void ParseArguments(
00104 int argc,
00105 char **argv);
00106
00107 char *GetNextFilename ();
00108
00109 LIST ReadTrainingSamples (
00110 FILE *File);
00111
00112 LABELEDLIST FindList (
00113 LIST List,
00114 char *Label);
00115
00116 MERGE_CLASS FindClass (
00117 LIST List,
00118 char *Label);
00119
00120 LABELEDLIST NewLabeledList (
00121 char *Label);
00122
00123 MERGE_CLASS NewLabeledClass (
00124 char *Label);
00125
00126 void WriteTrainingSamples (
00127 char *Directory,
00128 LIST CharList);
00129
00130 void WriteClusteredTrainingSamples (
00131 char *Directory,
00132 LIST ProtoList,
00133 CLUSTERER *Clusterer,
00134 LABELEDLIST CharSample);
00135
00136 void WriteMergedTrainingSamples(
00137 char *Directory,
00138 LIST ClassList);
00139
00140 void WriteMicrofeat(
00141 char *Directory,
00142 LIST ClassList);
00143
00144 void WriteProtos(
00145 FILE* File,
00146 MERGE_CLASS MergeClass);
00147
00148 void WriteConfigs(
00149 FILE* File,
00150 CLASS_TYPE Class);
00151
00152 void FreeTrainingSamples (
00153 LIST CharList);
00154
00155 void FreeLabeledClassList (
00156 LIST ClassList);
00157
00158 void FreeLabeledList (
00159 LABELEDLIST LabeledList);
00160
00161 CLUSTERER *SetUpForClustering(
00162 LABELEDLIST CharSample);
00163
00164
00165
00166
00167
00168 LIST RemoveInsignificantProtos(
00169 LIST ProtoList,
00170 BOOL8 KeepSigProtos,
00171 BOOL8 KeepInsigProtos,
00172 int N);
00173
00174 void CleanUpUnusedData(
00175 LIST ProtoList);
00176
00177 void Normalize (
00178 float *Values);
00179
00180 void SetUpForFloat2Int(
00181 LIST LabeledClassList);
00182
00183 void WritePFFMTable(
00184 INT_TEMPLATES Templates,
00185 const char* filename);
00186
00187
00188 static char FontName[MAXNAMESIZE];
00189
00190 static char *Directory = NULL;
00191 static int MaxNumSamples = MAX_NUM_SAMPLES;
00192 static int Argc;
00193 static char **Argv;
00194
00195
00196 static BOOL8 ShowAllSamples = FALSE;
00197 static BOOL8 ShowSignificantProtos = TRUE;
00198 static BOOL8 ShowInsignificantProtos = FALSE;
00199
00207 static CLUSTERCONFIG Config =
00208 {
00209 elliptical, 0.40, 0.05, 1.0, 1e-6
00210 };
00211
00212 static FLOAT32 RoundingAccuracy = 0.0;
00213
00214
00215
00216
00217
00218
00252 int main (
00253 int argc,
00254 char **argv)
00255 {
00256 char *PageName;
00257 FILE *TrainingPage;
00258 FILE *OutFile;
00259 LIST CharList;
00260 CLUSTERER *Clusterer = NULL;
00261 LIST ProtoList = NIL;
00262 LABELEDLIST CharSample;
00263 PROTOTYPE *Prototype;
00264 LIST ClassList = NIL;
00265 int Cid, Pid;
00266 PROTO Proto;
00267 PROTO_STRUCT DummyProto;
00268 BIT_VECTOR Config2;
00269 MERGE_CLASS MergeClass;
00270 INT_TEMPLATES IntTemplates;
00271 LIST pCharList, pProtoList;
00272 char Filename[MAXNAMESIZE];
00273
00274 ParseArguments (argc, argv);
00275 InitFastTrainerVars ();
00276 InitSubfeatureVars ();
00277 while ((PageName = GetNextFilename()) != NULL)
00278 {
00279 printf ("\nReading %s ...", PageName);
00280 TrainingPage = Efopen (PageName, "r");
00281 CharList = ReadTrainingSamples (TrainingPage);
00282 fclose (TrainingPage);
00283
00284 pCharList = CharList;
00285 iterate(pCharList)
00286 {
00287
00288 CharSample = (LABELEDLIST) first (pCharList);
00289 printf ("\nClustering %s ...", CharSample->Label);
00290 Clusterer = SetUpForClustering(CharSample);
00291 ProtoList = ClusterSamples(Clusterer, &Config);
00292
00293 CleanUpUnusedData(ProtoList);
00294
00295
00296 ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
00297 ShowInsignificantProtos, Clusterer->SampleSize);
00298 FreeClusterer(Clusterer);
00299 MergeClass = FindClass (ClassList, CharSample->Label);
00300 if (MergeClass == NULL)
00301 {
00302 MergeClass = NewLabeledClass (CharSample->Label);
00303 ClassList = push (ClassList, MergeClass);
00304 }
00305 Cid = AddConfigToClass(MergeClass->Class);
00306 pProtoList = ProtoList;
00307 iterate (pProtoList)
00308 {
00309 Prototype = (PROTOTYPE *) first (pProtoList);
00310
00311
00312 Pid = FindClosestExistingProto (MergeClass->Class, MergeClass->NumMerged, Prototype);
00313 if (Pid == NO_PROTO)
00314 {
00315 Pid = AddProtoToClass (MergeClass->Class);
00316 Proto = ProtoIn (MergeClass->Class, Pid);
00317 MakeNewFromOld (Proto, Prototype);
00318 MergeClass->NumMerged[Pid] = 1;
00319 }
00320 else
00321 {
00322 MakeNewFromOld (&DummyProto, Prototype);
00323 ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
00324 (FLOAT32) MergeClass->NumMerged[Pid], 1.0,
00325 ProtoIn (MergeClass->Class, Pid));
00326 MergeClass->NumMerged[Pid] ++;
00327 }
00328 Config2 = ConfigIn (MergeClass->Class, Cid);
00329 AddProtoToConfig (Pid, Config2);
00330 }
00331 FreeProtoList (&ProtoList);
00332 }
00333 FreeTrainingSamples (CharList);
00334 printf ("\n");
00335 }
00336
00337
00338 WriteMicrofeat(Directory, ClassList);
00339 InitIntProtoVars ();
00340 InitPrototypes ();
00341 SetUpForFloat2Int(ClassList);
00342 IntTemplates = CreateIntTemplates(TrainingData);
00343 strcpy (Filename, "");
00344 if (Directory != NULL)
00345 {
00346 strcat (Filename, Directory);
00347 strcat (Filename, "/");
00348 }
00349 strcat (Filename, "inttemp");
00350 #ifdef __UNIX__
00351 OutFile = Efopen (Filename, "w");
00352 #else
00353 OutFile = Efopen (Filename, "wb");
00354 #endif
00355 WriteIntTemplates(OutFile, IntTemplates);
00356 fclose (OutFile);
00357
00358
00359 WritePFFMTable(IntTemplates, "pffmtable");
00360 printf ("\nDone!\n");
00361 FreeLabeledClassList (ClassList);
00362 return 0;
00363 }
00364
00365
00366
00367
00368
00369
00370
00404 void ParseArguments(
00405 int argc,
00406 char **argv)
00407 {
00408 int Option;
00409 int ParametersRead;
00410 BOOL8 Error;
00411 extern char *optarg;
00412
00413 Error = FALSE;
00414 Argc = argc;
00415 Argv = argv;
00416 while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
00417 {
00418 switch ( Option )
00419 {
00420 case 'n':
00421
00422 ShowInsignificantProtos = FALSE;
00423 break;
00424 case 'p':
00425
00426 ShowSignificantProtos = FALSE;
00427 break;
00428 case 'd':
00429 ShowAllSamples = FALSE;
00430 break;
00431 case 'C':
00432 ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) );
00433 if ( ParametersRead != 1 ) Error = TRUE;
00434 else if ( Config.Confidence > 1 ) Config.Confidence = 1;
00435 else if ( Config.Confidence < 0 ) Config.Confidence = 0;
00436 break;
00437 case 'I':
00438 ParametersRead = sscanf( optarg, "%f", &(Config.Independence) );
00439 if ( ParametersRead != 1 ) Error = TRUE;
00440 else if ( Config.Independence > 1 ) Config.Independence = 1;
00441 else if ( Config.Independence < 0 ) Config.Independence = 0;
00442 break;
00443 case 'M':
00444 ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) );
00445 if ( ParametersRead != 1 ) Error = TRUE;
00446 else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
00447 else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
00448 break;
00449 case 'B':
00450 ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) );
00451 if ( ParametersRead != 1 ) Error = TRUE;
00452 else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
00453 else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
00454 break;
00455 case 'R':
00456 ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy );
00457 if ( ParametersRead != 1 ) Error = TRUE;
00458 else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
00459 else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
00460 break;
00461 case 'S':
00462 switch ( optarg[0] )
00463 {
00464 case 's': Config.ProtoStyle = spherical; break;
00465 case 'e': Config.ProtoStyle = elliptical; break;
00466 case 'm': Config.ProtoStyle = mixed; break;
00467 case 'a': Config.ProtoStyle = automatic; break;
00468 default: Error = TRUE;
00469 }
00470 break;
00471 case 'D':
00472 Directory = optarg;
00473 break;
00474 case 'N':
00475 if (sscanf (optarg, "%d", &MaxNumSamples) != 1 ||
00476 MaxNumSamples <= 0)
00477 Error = TRUE;
00478 break;
00479 case '?':
00480 Error = TRUE;
00481 break;
00482 }
00483 if ( Error )
00484 {
00485 fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
00486 fprintf (stderr, "\t[-S ProtoStyle]\n");
00487 fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
00488 fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
00489 exit (2);
00490 }
00491 }
00492 }
00493
00494
00509 char *GetNextFilename ()
00510 {
00511 if (optind < Argc)
00512 return (Argv [optind++]);
00513 else
00514 return (NULL);
00515
00516 }
00517
00518
00519
00530 LIST ReadTrainingSamples (
00531 FILE *File)
00532 {
00533 char CharName[MAXNAMESIZE];
00534 LABELEDLIST CharSample;
00535 FEATURE_SET FeatureSamples;
00536 LIST TrainingSamples = NIL;
00537 CHAR_DESC CharDesc;
00538 int Type, i;
00539
00540 while (fscanf (File, "%s %s", FontName, CharName) == 2) {
00541 CharSample = FindList (TrainingSamples, CharName);
00542 if (CharSample == NULL) {
00543 CharSample = NewLabeledList (CharName);
00544 TrainingSamples = push (TrainingSamples, CharSample);
00545 }
00546 CharDesc = ReadCharDescription (File);
00547 Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
00548 FeatureSamples = FeaturesOfType(CharDesc, Type);
00549
00550 for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
00551 FEATURE f = FeatureSamples->Features[feature];
00552 for (int dim =0; dim < f->Type->NumParams; ++dim)
00553 f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
00554 }
00555 CharSample->List = push (CharSample->List, FeatureSamples);
00556 for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
00557 if (Type != i)
00558 FreeFeatureSet (FeaturesOfType (CharDesc, i));
00559 free (CharDesc);
00560 }
00561 return (TrainingSamples);
00562
00563 }
00564
00565
00566
00578 LABELEDLIST FindList (
00579 LIST List,
00580 char *Label)
00581 {
00582 LABELEDLIST LabeledList;
00583
00584 iterate (List)
00585 {
00586 LabeledList = (LABELEDLIST) first (List);
00587 if (strcmp (LabeledList->Label, Label) == 0)
00588 return (LabeledList);
00589 }
00590 return (NULL);
00591
00592 }
00593
00594
00605 MERGE_CLASS FindClass (
00606 LIST List,
00607 char *Label)
00608 {
00609 MERGE_CLASS MergeClass;
00610
00611 iterate (List)
00612 {
00613 MergeClass = (MERGE_CLASS) first (List);
00614 if (strcmp (MergeClass->Label, Label) == 0)
00615 return (MergeClass);
00616 }
00617 return (NULL);
00618
00619 }
00620
00621
00622
00633 LABELEDLIST NewLabeledList (
00634 char *Label)
00635 {
00636 LABELEDLIST LabeledList;
00637
00638 LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
00639 LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
00640 strcpy (LabeledList->Label, Label);
00641 LabeledList->List = NIL;
00642 return (LabeledList);
00643
00644 }
00645
00646
00647
00654 void WritePFFMTable(
00655 INT_TEMPLATES Templates, const char* filename) {
00656
00657 FILE* fp = Efopen(filename, "wb");
00658
00659
00660 for (int i = 0; i < NumClassesIn (Templates); i++) {
00661 int MaxLength = 0;
00662 INT_CLASS Class = ClassForIndex (Templates, i);
00663 for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
00664 if (LengthForConfigId (Class, ConfigId) > MaxLength)
00665 MaxLength = LengthForConfigId (Class, ConfigId);
00666 }
00667 fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength);
00668 }
00669 fclose(fp);
00670 }
00671
00678 MERGE_CLASS NewLabeledClass (
00679 char *Label)
00680 {
00681 MERGE_CLASS MergeClass;
00682
00683 MergeClass = (MERGE_CLASS) Emalloc (sizeof (MERGE_CLASS_NODE));
00684 MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
00685 strcpy (MergeClass->Label, Label);
00686 MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
00687 return (MergeClass);
00688
00689 }
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707 void WriteTrainingSamples (
00708 char *Directory,
00709 LIST CharList)
00710 {
00711 LABELEDLIST CharSample;
00712 FEATURE_SET FeatureSet;
00713 LIST FeatureList;
00714 FILE *File;
00715 char Filename[MAXNAMESIZE];
00716 int NumSamples;
00717
00718 iterate (CharList)
00719 {
00720 CharSample = (LABELEDLIST) first (CharList);
00721
00722
00723 strcpy (Filename, "");
00724 if (Directory != NULL)
00725 {
00726 strcat (Filename, Directory);
00727 strcat (Filename, "/");
00728 }
00729 strcat (Filename, FontName);
00730 strcat (Filename, "/");
00731 strcat (Filename, CharSample->Label);
00732 strcat (Filename, ".");
00733 strcat (Filename, PROGRAM_FEATURE_TYPE);
00734 printf ("\nWriting %s ...", Filename);
00735
00736
00737
00738 File = fopen (Filename, "r");
00739 if (File == NULL)
00740 {
00741 File = Efopen (Filename, "w");
00742 WriteOldParamDesc
00743 (File, DefinitionOf (ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)));
00744 }
00745 else
00746 {
00747 fclose (File);
00748 File = Efopen (Filename, "a");
00749 }
00750
00751
00752 FeatureList = CharSample->List;
00753 NumSamples = 0;
00754 iterate (FeatureList)
00755 {
00756 if (NumSamples >= MaxNumSamples) break;
00757
00758 FeatureSet = (FEATURE_SET) first (FeatureList);
00759 WriteFeatureSet (File, FeatureSet);
00760 NumSamples++;
00761 }
00762 fclose (File);
00763 }
00764 }
00765
00766
00767
00768
00784 void WriteClusteredTrainingSamples (
00785 char *Directory,
00786 LIST ProtoList,
00787 CLUSTERER *Clusterer,
00788 LABELEDLIST CharSample)
00789 {
00790 FILE *File;
00791 char Filename[MAXNAMESIZE];
00792
00793 strcpy (Filename, "");
00794 if (Directory != NULL)
00795 {
00796 strcat (Filename, Directory);
00797 strcat (Filename, "/");
00798 }
00799 strcat (Filename, FontName);
00800 strcat (Filename, "/");
00801 strcat (Filename, CharSample->Label);
00802 strcat (Filename, ".");
00803 strcat (Filename, PROGRAM_FEATURE_TYPE);
00804 strcat (Filename, ".p");
00805 printf ("\nWriting %s ...", Filename);
00806 #ifdef __UNIX__
00807 File = Efopen (Filename, "w");
00808 #else
00809 File = Efopen (Filename, "wb");
00810 #endif
00811 WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc,
00812 ProtoList, ShowSignificantProtos, ShowInsignificantProtos);
00813 fclose (File);
00814
00815 }
00816
00817
00824 void WriteMergedTrainingSamples(
00825 char *Directory,
00826 LIST ClassList)
00827
00828 {
00829 FILE *File;
00830 char Filename[MAXNAMESIZE];
00831 MERGE_CLASS MergeClass;
00832
00833 iterate (ClassList)
00834 {
00835 MergeClass = (MERGE_CLASS) first (ClassList);
00836 strcpy (Filename, "");
00837 if (Directory != NULL)
00838 {
00839 strcat (Filename, Directory);
00840 strcat (Filename, "/");
00841 }
00842 strcat (Filename, "Merged/");
00843 strcat (Filename, MergeClass->Label);
00844 strcat (Filename, PROTO_SUFFIX);
00845 printf ("\nWriting Merged %s ...", Filename);
00846 File = Efopen (Filename, "w");
00847 WriteOldProtoFile (File, MergeClass->Class);
00848 fclose (File);
00849
00850 strcpy (Filename, "");
00851 if (Directory != NULL)
00852 {
00853 strcat (Filename, Directory);
00854 strcat (Filename, "/");
00855 }
00856 strcat (Filename, "Merged/");
00857 strcat (Filename, MergeClass->Label);
00858 strcat (Filename, CONFIG_SUFFIX);
00859 printf ("\nWriting Merged %s ...", Filename);
00860 File = Efopen (Filename, "w");
00861 WriteOldConfigFile (File, MergeClass->Class);
00862 fclose (File);
00863 }
00864
00865 }
00866
00867
00874 void WriteMicrofeat(
00875 char *Directory,
00876 LIST ClassList)
00877
00878 {
00879 FILE *File;
00880 char Filename[MAXNAMESIZE];
00881 MERGE_CLASS MergeClass;
00882
00883 strcpy (Filename, "");
00884 if (Directory != NULL)
00885 {
00886 strcat (Filename, Directory);
00887 strcat (Filename, "/");
00888 }
00889 strcat (Filename, "Microfeat");
00890 File = Efopen (Filename, "w");
00891 printf ("\nWriting Merged %s ...", Filename);
00892 iterate(ClassList)
00893 {
00894 MergeClass = (MERGE_CLASS) first (ClassList);
00895 WriteProtos(File, MergeClass);
00896 WriteConfigs(File, MergeClass->Class);
00897 }
00898 fclose (File);
00899 }
00900
00901
00908 void WriteProtos(
00909 FILE* File,
00910 MERGE_CLASS MergeClass)
00911 {
00912 float Values[3];
00913 int i;
00914 PROTO Proto;
00915
00916 fprintf(File, "%c\n", NameToChar(MergeClass->Label));
00917 fprintf(File, "%d\n", NumProtosIn(MergeClass->Class));
00918 for(i=0; i < NumProtosIn(MergeClass->Class); i++)
00919 {
00920 Proto = ProtoIn(MergeClass->Class,i);
00921 fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", ProtoX(Proto), ProtoY(Proto),
00922 ProtoLength(Proto), ProtoAngle(Proto));
00923 Values[0] = ProtoX(Proto);
00924 Values[1] = ProtoY(Proto);
00925 Values[2] = ProtoAngle(Proto);
00926 Normalize(Values);
00927 fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
00928 }
00929 }
00930
00931
00938 void WriteConfigs(
00939 FILE* File,
00940 CLASS_TYPE Class)
00941 {
00942 BIT_VECTOR Config;
00943 int i, j, WordsPerConfig;
00944
00945 WordsPerConfig = WordsInVectorOfSize(NumProtosIn(Class));
00946 fprintf(File, "%d %d\n", NumConfigsIn(Class),WordsPerConfig);
00947 for(i=0; i < NumConfigsIn(Class); i++)
00948 {
00949 Config = ConfigIn(Class,i);
00950 for(j=0; j < WordsPerConfig; j++)
00951 fprintf(File, "%08x ", Config[j]);
00952 fprintf(File, "\n");
00953 }
00954 fprintf(File, "\n");
00955 }
00956
00957
00958
00969 void FreeTrainingSamples (
00970 LIST CharList)
00971 {
00972 LABELEDLIST CharSample;
00973 FEATURE_SET FeatureSet;
00974 LIST FeatureList;
00975
00976
00977 printf ("\nFreeTrainingSamples...");
00978 iterate (CharList)
00979 {
00980 CharSample = (LABELEDLIST) first (CharList);
00981 FeatureList = CharSample->List;
00982 iterate (FeatureList)
00983 {
00984 FeatureSet = (FEATURE_SET) first (FeatureList);
00985 FreeFeatureSet (FeatureSet);
00986 }
00987 FreeLabeledList (CharSample);
00988 }
00989 destroy (CharList);
00990
00991 }
00992
00993
01014 void FreeLabeledClassList (
01015 LIST ClassList)
01016 {
01017 MERGE_CLASS MergeClass;
01018
01019 iterate (ClassList)
01020 {
01021 MergeClass = (MERGE_CLASS) first (ClassList);
01022 free (MergeClass->Label);
01023 FreeClass(MergeClass->Class);
01024 free (MergeClass);
01025 }
01026 destroy (ClassList);
01027
01028 }
01029
01030
01031
01042 void FreeLabeledList (
01043 LABELEDLIST LabeledList)
01044 {
01045 destroy (LabeledList->List);
01046 free (LabeledList->Label);
01047 free (LabeledList);
01048
01049 }
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071 CLUSTERER *SetUpForClustering(
01072 LABELEDLIST CharSample)
01073 {
01074 UINT16 N;
01075 int i, j;
01076 FLOAT32 *Sample = NULL;
01077 CLUSTERER *Clusterer;
01078 INT32 CharID;
01079 LIST FeatureList = NULL;
01080 FEATURE_SET FeatureSet = NULL;
01081 FEATURE_DESC FeatureDesc = NULL;
01082
01083
01084 FeatureDesc = DefinitionOf(ShortNameToFeatureType(PROGRAM_FEATURE_TYPE));
01085 N = FeatureDesc->NumParams;
01086
01087 Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
01088
01089
01090 FeatureList = CharSample->List;
01091 CharID = 0;
01092 iterate(FeatureList)
01093 {
01094 if (CharID >= MaxNumSamples) break;
01095
01096 FeatureSet = (FEATURE_SET) first (FeatureList);
01097 for (i=0; i < FeatureSet->MaxNumFeatures; i++)
01098 {
01099 if (Sample == NULL)
01100 Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01101 for (j=0; j < N; j++)
01102 if (RoundingAccuracy != 0.0)
01103 Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
01104 else
01105 Sample[j] = FeatureSet->Features[i]->Params[j];
01106 MakeSample (Clusterer, Sample, CharID);
01107 }
01108 CharID++;
01109 }
01110 if ( Sample != NULL ) free( Sample );
01111 return( Clusterer );
01112
01113 }
01114
01115
01122 LIST RemoveInsignificantProtos(
01123 LIST ProtoList,
01124 BOOL8 KeepSigProtos,
01125 BOOL8 KeepInsigProtos,
01126 int N)
01127
01128 {
01129 LIST NewProtoList = NIL;
01130 LIST pProtoList;
01131 PROTOTYPE* Proto;
01132 PROTOTYPE* NewProto;
01133 int i;
01134
01135 pProtoList = ProtoList;
01136 iterate(pProtoList)
01137 {
01138 Proto = (PROTOTYPE *) first (pProtoList);
01139 if ((Proto->Significant && KeepSigProtos) ||
01140 (!Proto->Significant && KeepInsigProtos))
01141 {
01142 NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
01143
01144 NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01145 NewProto->Significant = Proto->Significant;
01146 NewProto->Style = Proto->Style;
01147 NewProto->NumSamples = Proto->NumSamples;
01148 NewProto->Cluster = NULL;
01149 NewProto->Distrib = NULL;
01150
01151 for (i=0; i < N; i++)
01152 NewProto->Mean[i] = Proto->Mean[i];
01153 if (Proto->Variance.Elliptical != NULL)
01154 {
01155 NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01156 for (i=0; i < N; i++)
01157 NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
01158 }
01159 else
01160 NewProto->Variance.Elliptical = NULL;
01161
01162 if (Proto->Magnitude.Elliptical != NULL)
01163 {
01164 NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01165 for (i=0; i < N; i++)
01166 NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
01167 }
01168 else
01169 NewProto->Magnitude.Elliptical = NULL;
01170
01171 if (Proto->Weight.Elliptical != NULL)
01172 {
01173 NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01174 for (i=0; i < N; i++)
01175 NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
01176 }
01177 else
01178 NewProto->Weight.Elliptical = NULL;
01179
01180 NewProto->TotalMagnitude = Proto->TotalMagnitude;
01181 NewProto->LogMagnitude = Proto->LogMagnitude;
01182 NewProtoList = push_last(NewProtoList, NewProto);
01183 }
01184 }
01185
01186
01187 return (NewProtoList);
01188 }
01189
01190
01197 void CleanUpUnusedData(
01198 LIST ProtoList)
01199 {
01200 PROTOTYPE* Prototype;
01201
01202 iterate(ProtoList)
01203 {
01204 Prototype = (PROTOTYPE *) first (ProtoList);
01205 if(Prototype->Variance.Elliptical != NULL)
01206 {
01207 memfree(Prototype->Variance.Elliptical);
01208 Prototype->Variance.Elliptical = NULL;
01209 }
01210 if(Prototype->Magnitude.Elliptical != NULL)
01211 {
01212 memfree(Prototype->Magnitude.Elliptical);
01213 Prototype->Magnitude.Elliptical = NULL;
01214 }
01215 if(Prototype->Weight.Elliptical != NULL)
01216 {
01217 memfree(Prototype->Weight.Elliptical);
01218 Prototype->Weight.Elliptical = NULL;
01219 }
01220 }
01221 }
01222
01223
01229 void Normalize (
01230 float *Values)
01231 {
01232 register float Slope;
01233 register float Intercept;
01234 register float Normalizer;
01235
01236 Slope = tan (Values [2] * 2 * PI);
01237 Intercept = Values [1] - Slope * Values [0];
01238 Normalizer = 1 / sqrt (Slope * Slope + 1.0);
01239
01240 Values [0] = Slope * Normalizer;
01241 Values [1] = - Normalizer;
01242 Values [2] = Intercept * Normalizer;
01243 }
01244
01251 void SetUpForFloat2Int(
01252 LIST LabeledClassList)
01253 {
01254 MERGE_CLASS MergeClass;
01255 CLASS_TYPE Class;
01256 int NumProtos;
01257 int NumConfigs;
01258 int NumWords;
01259 int i, j;
01260 float Values[3];
01261 PROTO NewProto;
01262 PROTO OldProto;
01263 BIT_VECTOR NewConfig;
01264 BIT_VECTOR OldConfig;
01265
01266 printf("Float2Int ...");
01267
01268 iterate(LabeledClassList)
01269 {
01270 MergeClass = (MERGE_CLASS) first (LabeledClassList);
01271 Class = &TrainingData[NameToChar(MergeClass->Label)];
01272 NumProtos = NumProtosIn(MergeClass->Class);
01273 NumConfigs = NumConfigsIn(MergeClass->Class);
01274
01275 NumProtosIn(Class) = NumProtos;
01276 Class->MaxNumProtos = NumProtos;
01277 Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
01278 for(i=0; i < NumProtos; i++)
01279 {
01280 NewProto = ProtoIn(Class, i);
01281 OldProto = ProtoIn(MergeClass->Class, i);
01282 Values[0] = ProtoX(OldProto);
01283 Values[1] = ProtoY(OldProto);
01284 Values[2] = ProtoAngle(OldProto);
01285 Normalize(Values);
01286 ProtoX(NewProto) = ProtoX(OldProto);
01287 ProtoY(NewProto) = ProtoY(OldProto);
01288 ProtoLength(NewProto) = ProtoLength(OldProto);
01289 ProtoAngle(NewProto) = ProtoAngle(OldProto);
01290 CoefficientA(NewProto) = Values[0];
01291 CoefficientB(NewProto) = Values[1];
01292 CoefficientC(NewProto) = Values[2];
01293 }
01294
01295 NumConfigsIn(Class) = NumConfigs;
01296 Class->MaxNumConfigs = NumConfigs;
01297 Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
01298 NumWords = WordsInVectorOfSize(NumProtos);
01299 for(i=0; i < NumConfigs; i++)
01300 {
01301 NewConfig = NewBitVector(NumProtos);
01302 OldConfig = ConfigIn(MergeClass->Class, i);
01303 for(j=0; j < NumWords; j++)
01304 NewConfig[j] = OldConfig[j];
01305 ConfigIn(Class, i) = NewConfig;
01306 }
01307 }
01308 }