training/mfTraining.cpp File Reference

#include "oldlist.h"
#include "efio.h"
#include "emalloc.h"
#include "featdefs.h"
#include "getopt.h"
#include "ocrfeatures.h"
#include "general.h"
#include "clusttool.h"
#include "cluster.h"
#include "protos.h"
#include "minmax.h"
#include "debug.h"
#include "const.h"
#include "mergenf.h"
#include "name2char.h"
#include "intproto.h"
#include "variables.h"
#include "freelist.h"
#include <string.h>
#include <stdio.h>
#include <math.h>

Go to the source code of this file.

Classes

Defines

Typedefs

Functions

Variables


Define Documentation

#define MAX_NUM_SAMPLES   10000

Definition at line 52 of file mfTraining.cpp.

#define MAXNAMESIZE   80

Note:
File: mfTraining.cpp
Separate training pages into files for each character;
strips from files only the features and their parameters of the feature type mf.
Author:
Dan Johnson; Christy Russon
Date:
Fri Aug 18 08:53:50 1989, DSJ, Created; 5/25/90, DSJ, Adapted to multiple feature types; 5/17/98 Changes made to make feature specific and simplify structures. First step in simplifying training process.
(c) Copyright Hewlett-Packard Company, 1988.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Definition at line 51 of file mfTraining.cpp.

#define MINSD   (1.0f / 128.0f)

Definition at line 54 of file mfTraining.cpp.

#define PROGRAM_FEATURE_TYPE   "mf"

Definition at line 53 of file mfTraining.cpp.

#define round ( x,
frag   )     (floor(x/frag+.5)*frag)

Definition at line 90 of file mfTraining.cpp.


Typedef Documentation

typedef struct LABELEDLISTNODE * LABELEDLIST

MERGE_CLASS

FIX:.

Definition at line 88 of file mfTraining.cpp.


Function Documentation

void CleanUpUnusedData ( LIST  ProtoList  ) 

Parameters:
\param 

Definition at line 1197 of file mfTraining.cpp.

References FLOATUNION::Elliptical, first, iterate, proto::Magnitude, memfree(), NULL, proto::Variance, and proto::Weight.

01199 {
01200    PROTOTYPE* Prototype;
01201 
01202    iterate(ProtoList)
01203    {
01204       Prototype = (PROTOTYPE *) first (ProtoList);
01205       if(Prototype->Variance.Elliptical != NULL)
01206       {
01207          memfree(Prototype->Variance.Elliptical);
01208          Prototype->Variance.Elliptical = NULL;
01209       }
01210       if(Prototype->Magnitude.Elliptical != NULL)
01211       {
01212          memfree(Prototype->Magnitude.Elliptical);
01213          Prototype->Magnitude.Elliptical = NULL;
01214       }
01215       if(Prototype->Weight.Elliptical != NULL)
01216       {
01217          memfree(Prototype->Weight.Elliptical);
01218          Prototype->Weight.Elliptical = NULL;
01219       }
01220    }
01221 }

MERGE_CLASS FindClass ( LIST  List,
char *  Label 
)

Find Label in List.

Parameters:
List List to search
Label Label to search for
Returns:
Labeled list with the specified Label or NULL.
Searches thru a Class(font) for specified Label.
Date:
Fri Aug 18 15:57:41 1989, DSJ, Created.

Definition at line 605 of file mfTraining.cpp.

References first, iterate, MERGE_CLASS_NODE::Label, and NULL.

00608 {
00609    MERGE_CLASS MergeClass;
00610 
00611    iterate (List)
00612     {
00613       MergeClass = (MERGE_CLASS) first (List);
00614       if (strcmp (MergeClass->Label, Label) == 0)
00615          return (MergeClass);
00616     }
00617    return (NULL);
00618 
00619 }// FindClass

LABELEDLIST FindList ( LIST  List,
char *  Label 
)

Find Label in List.

Parameters:
List List to search
Label Label to search for
Returns:
Labeled list with the specified Label or NULL.
Searches thru a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, NULL is returned.
Date:
Fri Aug 18 15:57:41 1989, DSJ, Created.

void FreeLabeledClassList ( LIST  ClassList  ) 

Frees memory allocated for each font in CharList.

Parameters:
CharList List of all fonts in document
Returns:
None
Note:
Does NOT free any memory which may be consumed by items of a font.
Date:
Fri Aug 18 17:44:27 1989, DSJ, Created.
XXX FontList list of all fonts in document Globals: none Operation: This routine deallocates all of the space allocated to the specified list of training samples. Return: none Exceptions: none History: Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 1014 of file mfTraining.cpp.

References MERGE_CLASS_NODE::Class, destroy(), first, FreeClass(), iterate, and MERGE_CLASS_NODE::Label.

01016 {
01017    MERGE_CLASS MergeClass;
01018 
01019    iterate (ClassList)     /* iterate thru all of the fonts */
01020    {
01021       MergeClass = (MERGE_CLASS) first (ClassList);
01022       free (MergeClass->Label);
01023       FreeClass(MergeClass->Class);
01024       free (MergeClass);
01025    }
01026    destroy (ClassList);
01027 
01028 }  /* FreeLabeledClassList */

void FreeLabeledList ( LABELEDLIST  LabeledList  ) 

Destroy and free memory reserved for LabeledList only.

Parameters:
LabeledList Labeled list to be freed
Returns:
None
Deallocates all of the memory consumed by a labeled list.
Note:
Does NOT free any memory which may be consumed by the items in the list.
Date:
Fri Aug 18 17:52:45 1989, DSJ, Created.

void FreeTrainingSamples ( LIST  CharList  ) 

Frees memory used by FontList.

Parameters:
FontList List of all fonts in document
Returns:
None
Deallocates memory allocated to the specified list of training samples.
Note:
Uses FreeFeatureSet() on each feature set
Date:
Fri Aug 18 17:44:27 1989, DSJ, Created.

char* GetNextFilename (  ) 

Returns next command-line argument.

Note:
Globals:
  • optind = defined by getopt sys call
  • Argc = Global copy of argc
  • Argv = Global copy of argv
Returns:
Next command-line argument or NULL.
If there are no remaining command line arguments, it returns NULL. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.
Date:
Fri Aug 18 09:34:12 1989, DSJ, Created.

int main ( int  argc,
char **  argv 
)

Create files "tessdata/inttemp" and "tessdata/pffmtable" from feature samples in training pageS.

Definition at line 95 of file tesseractmain.cpp.

References check_legal_image_size(), TessBaseAPI::End(), ERRCODE::error(), EXIT, IMAGE::get_bpp(), IMAGE::get_buffer(), IMAGE::get_xsize(), IMAGE::get_ysize(), TessBaseAPI::Init(), MEMORY_OUT, NULL, IMAGE::read(), IMAGE::read_header(), READFAILED, TessBaseAPI::TesseractRect(), TestEnvironment(), TestSocketClient(), tprintf(), USAGE, and wxArgv.

00095                                 {
00096   STRING outfile;               //output file
00097 
00098   if (argc < 3) {
00099     USAGE.error (argv[0], EXIT,
00100       "%s imagename outputbase [configfile [[+|-]varfile]...]\n", argv[0]);
00101   }
00102 
00103   if (argc == 3)
00104     TessBaseAPI::Init(argv[0], argv[1], NULL, false, 0, argv + 2);
00105   else
00106     TessBaseAPI::Init(argv[0], argv[1], argv[3], false, argc - 4, argv + 4);
00107 
00108   tprintf ("Tesseract Open Source OCR Engine\n");
00109 
00110   IMAGE image;
00111 #ifdef _TIFFIO_
00112   TIFF* tif = TIFFOpen(argv[1], "r");
00113   if (tif) {
00114     read_tiff_image(tif, &image);
00115     TIFFClose(tif);
00116   } else {
00117     READFAILED.error (argv[0], EXIT, argv[1]);
00118   }
00119 #else
00120   if (image.read_header(argv[1]) < 0)
00121     READFAILED.error (argv[0], EXIT, argv[1]);
00122   if (image.read(image.get_ysize ()) < 0) {
00123     MEMORY_OUT.error(argv[0], EXIT, "Read of image %s",
00124       argv[1]);
00125   }
00126 #endif // _TIFFIO_
00127   int bytes_per_line = check_legal_image_size(image.get_xsize(),
00128                                               image.get_ysize(),
00129                                               image.get_bpp());
00130   char* text = TessBaseAPI::TesseractRect(image.get_buffer(), image.get_bpp()/8,
00131                                           bytes_per_line, 0, 0,
00132                                           image.get_xsize(), image.get_ysize());
00133   outfile = argv[2];
00134   outfile += ".txt";
00135   FILE* fp = fopen(outfile.string(), "w");
00136   if (fp != NULL) {
00137     fwrite(text, 1, strlen(text), fp);
00138     fclose(fp);
00139   }
00140   delete [] text;
00141   TessBaseAPI::End();
00142 
00143   return 0;                      //Normal exit
00144 }

MERGE_CLASS NewLabeledClass ( char *  Label  ) 

Parameters:
\param 

Definition at line 678 of file mfTraining.cpp.

References MERGE_CLASS_NODE::Class, Emalloc(), MERGE_CLASS_NODE::Label, MAX_NUM_CONFIGS, MAX_NUM_PROTOS, and NewClass().

00680 {
00681    MERGE_CLASS MergeClass;
00682 
00683    MergeClass = (MERGE_CLASS) Emalloc (sizeof (MERGE_CLASS_NODE));
00684    MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
00685    strcpy (MergeClass->Label, Label);
00686    MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
00687    return (MergeClass);
00688 
00689 }  /* NewLabeledClass */

LABELEDLIST NewLabeledList ( char *  Label  ) 

Parameters:
Label Label for new list
Returns:
Return: New, empty labeled list.
XXX This routine allocates a new, empty labeled list and gives it the specified label. Exceptions: none History: Fri Aug 18 16:08:46 1989, DSJ, Created.

void Normalize ( float *  Values  ) 

Parameters:
Values Values to be modified

Definition at line 1229 of file mfTraining.cpp.

References PI.

Referenced by SetUpForFloat2Int(), and WriteProtos().

01231 {
01232    register float Slope;
01233    register float Intercept;
01234    register float Normalizer;
01235 
01236    Slope      = tan (Values [2] * 2 * PI);
01237    Intercept  = Values [1] - Slope * Values [0];
01238    Normalizer = 1 / sqrt (Slope * Slope + 1.0);
01239 
01240    Values [0] = Slope * Normalizer;
01241    Values [1] = - Normalizer;
01242    Values [2] = Intercept * Normalizer;
01243 }// Normalize

void ParseArguments ( int  argc,
char **  argv 
)

Parses the command-line arguments.

Parameters:
argc Number of command line arguments
argv Array of command line arguments
Note:
Globals:
  • ShowAllSamples = flag controlling samples display
  • ShowSignificantProtos = flag controlling proto display
  • ShowInsignificantProtos = flag controlling proto display
  • Config = current clustering parameters
  • optarg = defined by getopt sys call
  • optind = defined by getopt sys call
  • Argc = global copy of argc
  • Argv = global copy of argv

Illegal options terminate the program.

The legal arguments are:
-d              = "turn off display of samples"
-p              = "turn off significant protos"
-n              = "turn off insignificant proto"
-S <style>      = "spherical" | "elliptical" | "mixed" | "automatic"
-M MinSamples   = "min samples per prototype (%)"
-B MaxIllegal   = "max illegal chars per cluster (%)"
-I Independence = "0 to 1"
-C Confidence   = "1e-200 to 1.0"
-D Directory
-N MaxNumSamples = limit how many features to consider from training page
-R RoundingAccuracy
Date:
7/24/89, DSJ, Created.
Todo:
The usage printed on error is out of sync with above

LIST ReadTrainingSamples ( FILE *  File  ) 

Reads training samples from File and places them into list where samples are organized by FontName and CharName.

Parameters:
File Opened text file to read samples from
Returns:
List of samples from File
Date:
Fri Aug 18 13:11:39 1989, DSJ, Created; 5/17/98 simplifications to structure, elliminated font, and feature specification levels of structure.

Definition at line 530 of file mfTraining.cpp.

References f, FEATURE_SET_STRUCT::Features, FeaturesOfType, FindList(), FontName, FreeFeatureSet(), fscanf(), LABELEDLISTNODE::List, MAXNAMESIZE, MINSD, NewLabeledList(), NIL, NULL, FEATURE_SET_STRUCT::NumFeatures, NumFeatureSetsIn, PROGRAM_FEATURE_TYPE, push(), ReadCharDescription(), ShortNameToFeatureType(), and UniformRandomNumber().

00532 {
00533    char        CharName[MAXNAMESIZE];
00534    LABELEDLIST CharSample;
00535   FEATURE_SET FeatureSamples;
00536    LIST        TrainingSamples = NIL;
00537    CHAR_DESC      CharDesc;
00538    int         Type, i;
00539 
00540    while (fscanf (File, "%s %s", FontName, CharName) == 2) {
00541       CharSample = FindList (TrainingSamples, CharName);
00542       if (CharSample == NULL) {
00543          CharSample = NewLabeledList (CharName);
00544          TrainingSamples = push (TrainingSamples, CharSample);
00545       }
00546       CharDesc = ReadCharDescription (File);
00547       Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
00548       FeatureSamples = FeaturesOfType(CharDesc, Type);
00549 
00550       for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
00551          FEATURE f = FeatureSamples->Features[feature];
00552          for (int dim =0; dim < f->Type->NumParams; ++dim)
00553             f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
00554       }//for feature
00555       CharSample->List = push (CharSample->List, FeatureSamples);
00556       for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
00557          if (Type != i)
00558             FreeFeatureSet (FeaturesOfType (CharDesc, i));
00559       free (CharDesc);
00560     }//while
00561    return (TrainingSamples);
00562 
00563 }// ReadTrainingSamples

LIST RemoveInsignificantProtos ( LIST  ProtoList,
BOOL8  KeepSigProtos,
BOOL8  KeepInsigProtos,
int  N 
)

Parameters:
\param 

Definition at line 1122 of file mfTraining.cpp.

References proto::Cluster, proto::Distrib, FLOATUNION::Elliptical, Emalloc(), first, iterate, proto::Magnitude, proto::Mean, NIL, NULL, proto::NumSamples, proto::Significant, proto::Style, proto::Variance, and proto::Weight.

01128 {
01129    LIST NewProtoList = NIL;
01130    LIST pProtoList;
01131    PROTOTYPE* Proto;
01132    PROTOTYPE* NewProto;
01133    int i;
01134 
01135    pProtoList = ProtoList;
01136    iterate(pProtoList)
01137    {
01138       Proto = (PROTOTYPE *) first (pProtoList);
01139       if ((Proto->Significant && KeepSigProtos) ||
01140          (!Proto->Significant && KeepInsigProtos))
01141       {
01142          NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
01143 
01144          NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01145          NewProto->Significant = Proto->Significant;
01146          NewProto->Style = Proto->Style;
01147          NewProto->NumSamples = Proto->NumSamples;
01148          NewProto->Cluster = NULL;
01149          NewProto->Distrib = NULL;
01150 
01151          for (i=0; i < N; i++)
01152             NewProto->Mean[i] = Proto->Mean[i];
01153          if (Proto->Variance.Elliptical != NULL)
01154          {
01155             NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01156             for (i=0; i < N; i++)
01157                NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
01158          }
01159          else
01160             NewProto->Variance.Elliptical = NULL;
01161          //---------------------------------------------
01162          if (Proto->Magnitude.Elliptical != NULL)
01163          {
01164             NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01165             for (i=0; i < N; i++)
01166                NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
01167          }
01168          else
01169             NewProto->Magnitude.Elliptical = NULL;
01170          //------------------------------------------------
01171          if (Proto->Weight.Elliptical != NULL)
01172          {
01173             NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
01174             for (i=0; i < N; i++)
01175                NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
01176          }
01177          else
01178             NewProto->Weight.Elliptical = NULL;
01179 
01180          NewProto->TotalMagnitude = Proto->TotalMagnitude;
01181          NewProto->LogMagnitude = Proto->LogMagnitude;
01182          NewProtoList = push_last(NewProtoList, NewProto);
01183       }//if KeepSigProtos or KeepInsigProtos
01184    }//iterate (pProtoList)
01185 
01186    //FreeProtoList (ProtoList);
01187    return (NewProtoList);
01188 }// RemoveInsignificantProtos

CLUSTERER* SetUpForClustering ( LABELEDLIST  CharSample  ) 

void SetUpForFloat2Int ( LIST  LabeledClassList  ) 

Parameters:
\param 

Definition at line 1251 of file mfTraining.cpp.

References MERGE_CLASS_NODE::Class, CoefficientA, CoefficientB, CoefficientC, ConfigIn, CLASS_STRUCT::Configurations, Emalloc(), first, iterate, MERGE_CLASS_NODE::Label, CLASS_STRUCT::MaxNumConfigs, CLASS_STRUCT::MaxNumProtos, NameToChar(), NewBitVector(), Normalize(), NumConfigsIn, NumProtosIn, ProtoAngle, ProtoIn, ProtoLength, CLASS_STRUCT::Prototypes, ProtoX, ProtoY, TrainingData, and WordsInVectorOfSize.

01253 {
01254    MERGE_CLASS MergeClass;
01255    CLASS_TYPE     Class;
01256    int            NumProtos;
01257    int            NumConfigs;
01258    int            NumWords;
01259    int            i, j;
01260    float       Values[3];
01261    PROTO       NewProto;
01262    PROTO       OldProto;
01263    BIT_VECTOR     NewConfig;
01264    BIT_VECTOR     OldConfig;
01265 
01266    printf("Float2Int ...");
01267 
01268    iterate(LabeledClassList)
01269    {
01270       MergeClass = (MERGE_CLASS) first (LabeledClassList);
01271       Class = &TrainingData[NameToChar(MergeClass->Label)];
01272       NumProtos = NumProtosIn(MergeClass->Class);
01273       NumConfigs = NumConfigsIn(MergeClass->Class);
01274 
01275       NumProtosIn(Class) = NumProtos;
01276       Class->MaxNumProtos = NumProtos;
01277       Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
01278       for(i=0; i < NumProtos; i++)
01279       {
01280          NewProto = ProtoIn(Class, i);
01281          OldProto = ProtoIn(MergeClass->Class, i);
01282          Values[0] = ProtoX(OldProto);
01283          Values[1] = ProtoY(OldProto);
01284          Values[2] = ProtoAngle(OldProto);
01285          Normalize(Values);
01286          ProtoX(NewProto) = ProtoX(OldProto);
01287          ProtoY(NewProto) = ProtoY(OldProto);
01288          ProtoLength(NewProto) = ProtoLength(OldProto);
01289          ProtoAngle(NewProto) = ProtoAngle(OldProto);
01290          CoefficientA(NewProto) = Values[0];
01291          CoefficientB(NewProto) = Values[1];
01292          CoefficientC(NewProto) = Values[2];
01293       }//for i
01294 
01295       NumConfigsIn(Class) = NumConfigs;
01296       Class->MaxNumConfigs = NumConfigs;
01297       Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
01298       NumWords = WordsInVectorOfSize(NumProtos);
01299       for(i=0; i < NumConfigs; i++)
01300       {
01301          NewConfig = NewBitVector(NumProtos);
01302          OldConfig = ConfigIn(MergeClass->Class, i);
01303          for(j=0; j < NumWords; j++)
01304             NewConfig[j] = OldConfig[j];
01305          ConfigIn(Class, i) = NewConfig;
01306       }
01307    }//iterate (LabeledClassList)
01308 }// SetUpForFloat2Int

void WriteClusteredTrainingSamples ( char *  Directory,
LIST  ProtoList,
CLUSTERER Clusterer,
LABELEDLIST  CharSample 
)

XXX.

Parameters:
Directory Directory to place sample files into
Note:
Globals:
  • MaxNumSamples = max number of samples per class to write
  • ShowSignificantProtos = Writes significant prototypes
  • ShowInsignificantProtos = Writes insignificant prototypes
Returns:
None
Writes the specified samples into files which are organized according to the font name and character name of the samples.
Date:
Fri Aug 18 16:17:06 1989, DSJ, Created.

Definition at line 784 of file mfTraining.cpp.

References Efopen(), FontName, LABELEDLISTNODE::Label, MAXNAMESIZE, NULL, CLUSTERER::ParamDesc, PROGRAM_FEATURE_TYPE, CLUSTERER::SampleSize, ShowInsignificantProtos, ShowSignificantProtos, and WriteProtoList().

00789 {
00790    FILE     *File;
00791    char     Filename[MAXNAMESIZE];
00792 
00793    strcpy (Filename, "");
00794    if (Directory != NULL)
00795    {
00796       strcat (Filename, Directory);
00797       strcat (Filename, "/");
00798    }
00799    strcat (Filename, FontName);
00800    strcat (Filename, "/");
00801    strcat (Filename, CharSample->Label);
00802    strcat (Filename, ".");
00803    strcat (Filename, PROGRAM_FEATURE_TYPE);
00804    strcat (Filename, ".p");
00805    printf ("\nWriting %s ...", Filename);
00806 #ifdef __UNIX__
00807    File = Efopen (Filename, "w");
00808 #else
00809    File = Efopen (Filename, "wb"); // BINARY file
00810 #endif
00811    WriteProtoList(File, Clusterer->SampleSize, Clusterer->ParamDesc,
00812       ProtoList, ShowSignificantProtos, ShowInsignificantProtos);
00813    fclose (File);
00814 
00815 }  /* WriteClusteredTrainingSamples */

void WriteConfigs ( FILE *  File,
CLASS_TYPE  Class 
)

Parameters:
\param 

Definition at line 938 of file mfTraining.cpp.

References Config, ConfigIn, NumConfigsIn, NumProtosIn, and WordsInVectorOfSize.

Referenced by WriteMicrofeat().

00941 {
00942    BIT_VECTOR Config;
00943    int i, j, WordsPerConfig;
00944 
00945    WordsPerConfig = WordsInVectorOfSize(NumProtosIn(Class));
00946    fprintf(File, "%d %d\n", NumConfigsIn(Class),WordsPerConfig);
00947    for(i=0; i < NumConfigsIn(Class); i++)
00948    {
00949       Config = ConfigIn(Class,i);
00950       for(j=0; j < WordsPerConfig; j++)
00951          fprintf(File, "%08x ", Config[j]);
00952       fprintf(File, "\n");
00953    }
00954    fprintf(File, "\n");
00955 } // WriteConfigs

void WriteMergedTrainingSamples ( char *  Directory,
LIST  ClassList 
)

Parameters:
\param 

Definition at line 824 of file mfTraining.cpp.

References MERGE_CLASS_NODE::Class, CONFIG_SUFFIX, Efopen(), first, iterate, MERGE_CLASS_NODE::Label, MAXNAMESIZE, NULL, PROTO_SUFFIX, WriteOldConfigFile(), and WriteOldProtoFile().

00828 {
00829    FILE     *File;
00830    char     Filename[MAXNAMESIZE];
00831    MERGE_CLASS MergeClass;
00832 
00833    iterate (ClassList)
00834    {
00835       MergeClass = (MERGE_CLASS) first (ClassList);
00836       strcpy (Filename, "");
00837       if (Directory != NULL)
00838       {
00839          strcat (Filename, Directory);
00840          strcat (Filename, "/");
00841       }
00842       strcat (Filename, "Merged/");
00843       strcat (Filename, MergeClass->Label);
00844       strcat (Filename, PROTO_SUFFIX);
00845       printf ("\nWriting Merged %s ...", Filename);
00846       File = Efopen (Filename, "w");
00847       WriteOldProtoFile (File, MergeClass->Class);
00848       fclose (File);
00849 
00850       strcpy (Filename, "");
00851       if (Directory != NULL)
00852       {
00853          strcat (Filename, Directory);
00854          strcat (Filename, "/");
00855       }
00856       strcat (Filename, "Merged/");
00857       strcat (Filename, MergeClass->Label);
00858       strcat (Filename, CONFIG_SUFFIX);
00859       printf ("\nWriting Merged %s ...", Filename);
00860       File = Efopen (Filename, "w");
00861       WriteOldConfigFile (File, MergeClass->Class);
00862       fclose (File);
00863    }//iterate (ClassList)
00864 
00865 }// WriteMergedTrainingSamples

void WriteMicrofeat ( char *  Directory,
LIST  ClassList 
)

Parameters:
\param 

Definition at line 874 of file mfTraining.cpp.

References MERGE_CLASS_NODE::Class, Efopen(), first, iterate, MAXNAMESIZE, NULL, WriteConfigs(), and WriteProtos().

00878 {
00879    FILE     *File;
00880    char     Filename[MAXNAMESIZE];
00881    MERGE_CLASS MergeClass;
00882 
00883    strcpy (Filename, "");
00884    if (Directory != NULL)
00885    {
00886       strcat (Filename, Directory);
00887       strcat (Filename, "/");
00888    }
00889    strcat (Filename, "Microfeat");
00890    File = Efopen (Filename, "w");
00891    printf ("\nWriting Merged %s ...", Filename);
00892    iterate(ClassList)
00893    {
00894       MergeClass = (MERGE_CLASS) first (ClassList);
00895       WriteProtos(File, MergeClass);
00896       WriteConfigs(File, MergeClass->Class);
00897    }
00898    fclose (File);
00899 } // WriteMicrofeat

void WritePFFMTable ( INT_TEMPLATES  Templates,
const char *  filename 
)

Parameters:
Templates Templates to write
filename Filename to write them to

Definition at line 654 of file mfTraining.cpp.

References ClassForIndex, ClassIdForIndex, Efopen(), LengthForConfigId, NumClassesIn, and NumIntConfigsIn.

00655                                                      {
00656 
00657   FILE* fp = Efopen(filename, "wb");
00658 
00659   // Write out each class
00660   for (int i = 0; i < NumClassesIn (Templates); i++) {
00661     int MaxLength = 0;
00662     INT_CLASS Class = ClassForIndex (Templates, i);
00663     for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
00664       if (LengthForConfigId (Class, ConfigId) > MaxLength)
00665         MaxLength = LengthForConfigId (Class, ConfigId);
00666     }
00667     fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength);
00668   }
00669   fclose(fp);
00670 }// WritePFFMTable

void WriteProtos ( FILE *  File,
MERGE_CLASS  MergeClass 
)

Parameters:
\param 

Definition at line 908 of file mfTraining.cpp.

References MERGE_CLASS_NODE::Class, MERGE_CLASS_NODE::Label, NameToChar(), Normalize(), NumProtosIn, ProtoAngle, ProtoIn, ProtoLength, ProtoX, and ProtoY.

00911 {
00912    float Values[3];
00913    int i;
00914    PROTO Proto;
00915 
00916    fprintf(File, "%c\n", NameToChar(MergeClass->Label));
00917    fprintf(File, "%d\n", NumProtosIn(MergeClass->Class));
00918    for(i=0; i < NumProtosIn(MergeClass->Class); i++)
00919    {
00920       Proto = ProtoIn(MergeClass->Class,i);
00921       fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", ProtoX(Proto), ProtoY(Proto),
00922          ProtoLength(Proto), ProtoAngle(Proto));
00923       Values[0] = ProtoX(Proto);
00924       Values[1] = ProtoY(Proto);
00925       Values[2] = ProtoAngle(Proto);
00926       Normalize(Values);
00927       fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);
00928    }
00929 } // WriteProtos

void WriteTrainingSamples ( char *  Directory,
LIST  CharList 
)


Variable Documentation

int Argc [static]

Definition at line 192 of file mfTraining.cpp.

char** Argv [static]

Definition at line 193 of file mfTraining.cpp.

CLUSTERCONFIG Config [static]

Initial value:

{
   elliptical, 0.40, 0.05, 1.0, 1e-6
}
Holds configuration parameters to control clustering.

-M 0.40   -B 0.05   -I 1.0   -C 1e-6.
Note:
These differ from Config in cnTraining.cpp

Definition at line 207 of file mfTraining.cpp.

char* Directory = NULL [static]

Definition at line 190 of file mfTraining.cpp.

char FontName[MAXNAMESIZE] [static]

Definition at line 188 of file mfTraining.cpp.

int MaxNumSamples = MAX_NUM_SAMPLES [static]

Definition at line 191 of file mfTraining.cpp.

FLOAT32 RoundingAccuracy = 0.0 [static]

Definition at line 212 of file mfTraining.cpp.

int row_number

NOT used in tesseract 1.02

Definition at line 56 of file mfTraining.cpp.

BOOL8 ShowAllSamples = FALSE [static]

Definition at line 196 of file mfTraining.cpp.

BOOL8 ShowInsignificantProtos = FALSE [static]

Definition at line 198 of file mfTraining.cpp.

BOOL8 ShowSignificantProtos = TRUE [static]

Definition at line 197 of file mfTraining.cpp.


Generated on Wed Feb 28 19:49:27 2007 for Tesseract by  doxygen 1.5.1