training/cnTraining.cpp File Reference

#include "oldlist.h"
#include "efio.h"
#include "emalloc.h"
#include "featdefs.h"
#include "getopt.h"
#include "ocrfeatures.h"
#include "general.h"
#include "clusttool.h"
#include "cluster.h"
#include "name2char.h"
#include <string.h>
#include <stdio.h>
#include <math.h>

Go to the source code of this file.

Classes

Defines

Typedefs

Functions

Variables


Define Documentation

#define MAX_NUM_SAMPLES   10000

Unless limited by user to fewer

Definition at line 42 of file cnTraining.cpp.

#define MAXNAMESIZE   80

---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------

Definition at line 40 of file cnTraining.cpp.

Referenced by ReadTrainingSamples(), WriteClusteredTrainingSamples(), WriteMergedTrainingSamples(), WriteMicrofeat(), WriteNormProtos(), and WriteTrainingSamples().

#define MINSD   (1.0f / 64.0f)

Minimum standard deviation

Definition at line 46 of file cnTraining.cpp.

Referenced by ReadTrainingSamples().

#define PROGRAM_FEATURE_TYPE   "cn"

FIX:

Definition at line 44 of file cnTraining.cpp.

Referenced by ReadTrainingSamples(), SetUpForClustering(), WriteClusteredTrainingSamples(), and WriteTrainingSamples().

#define round ( x,
frag   )     (floor(x/frag+.5)*frag)

FIX:.

Definition at line 68 of file cnTraining.cpp.

Referenced by SetUpForClustering().


Typedef Documentation

LABELEDLIST

FIX: Pointer to labeled list ??


Function Documentation

void AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
char *  CharName 
)

Add all elements of ProtoList to NormProtoList.

Parameters:
NormProtoList Modified by function
ProtoList List of prototypes to add
Todo:
Not sure if it adds ALL of them

Definition at line 794 of file cnTraining.cpp.

References first, iterate, LABELEDLISTNODE::List, NewLabeledList(), and push().

00798 {
00799    PROTOTYPE* Proto;
00800    LABELEDLIST LabeledProtoList;
00801 
00802    LabeledProtoList = NewLabeledList(CharName);
00803    iterate(ProtoList)
00804    {
00805       Proto = (PROTOTYPE *) first (ProtoList);
00806       LabeledProtoList->List = push(LabeledProtoList->List, Proto);
00807    }
00808    *NormProtoList = push(*NormProtoList, LabeledProtoList);
00809 }//AddToNormProtosList

LABELEDLIST FindList ( LIST  List,
char *  Label 
)

Find list of labels.

Parameters:
List List to search
Label Label to search for
Returns:
Labeled list with the specified Label or NULL.
Searches thru a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, NULL is returned.
Date:
Fri Aug 18 15:57:41 1989, DSJ, Created.

Definition at line 479 of file cnTraining.cpp.

References first, iterate, LABELEDLISTNODE::Label, and NULL.

Referenced by ReadTrainingSamples().

00482 {
00483    LABELEDLIST LabeledList;
00484 
00485    iterate (List)
00486     {
00487       LabeledList = (LABELEDLIST) first (List);
00488       if (strcmp (LabeledList->Label, Label) == 0)
00489          return (LabeledList);
00490     }
00491    return (NULL);
00492 
00493 }//FindList

void FreeLabeledList ( LABELEDLIST  LabeledList  ) 

Destroy and free memory reserved for LabeledList only.

Parameters:
LabeledList Labeled list to be freed
Returns:
None
Deallocates all of the memory consumed by a labeled list.
Note:
Does NOT free any memory which may be consumed by the items in the list.
Date:
Fri Aug 18 17:52:45 1989, DSJ, Created.

Definition at line 723 of file cnTraining.cpp.

References destroy(), LABELEDLISTNODE::Label, and LABELEDLISTNODE::List.

Referenced by FreeNormProtoList(), and FreeTrainingSamples().

00725 {
00726    destroy (LabeledList->List);
00727    free (LabeledList->Label);
00728    free (LabeledList);
00729 
00730 }// FreeLabeledList

void FreeNormProtoList ( LIST  CharList  ) 

Frees memory allocated for each font in CharList.

Parameters:
CharList List of all characters in font
Returns:
None
Note:
Does NOT free any memory which may be consumed by items of a font.
Date:
Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 696 of file cnTraining.cpp.

References destroy(), first, FreeLabeledList(), and iterate.

00699 {
00700    LABELEDLIST CharSample;
00701 
00702    iterate (CharList)      /* iterate thru all of the fonts */
00703    {
00704       CharSample = (LABELEDLIST) first (CharList);
00705       FreeLabeledList (CharSample);
00706    }
00707    destroy (CharList);
00708 
00709 }// FreeNormProtoList

void FreeTrainingSamples ( LIST  CharList  ) 

Frees memory used by FontList.

Parameters:
FontList List of all fonts in document
Returns:
None
Deallocates memory allocated to the specified list of training samples.
Note:
Uses FreeFeatureSet() on each feature set
Date:
Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 662 of file cnTraining.cpp.

References destroy(), first, FreeFeatureSet(), FreeLabeledList(), iterate, and LABELEDLISTNODE::List.

00664 {
00665    LABELEDLIST CharSample;
00666    FEATURE_SET FeatureSet;
00667    LIST     FeatureList;
00668 
00669 
00670    printf ("\nFreeTrainingSamples...");
00671    iterate (CharList)      /* iterate thru all of the fonts */
00672    {
00673       CharSample = (LABELEDLIST) first (CharList);
00674       FeatureList = CharSample->List;
00675       iterate (FeatureList)   /* iterate thru all of the classes */
00676       {
00677          FeatureSet = (FEATURE_SET) first (FeatureList);
00678          FreeFeatureSet (FeatureSet);
00679       }
00680       FreeLabeledList (CharSample);
00681    }
00682    destroy (CharList);
00683 
00684 }// FreeTrainingSamples

char * GetNextFilename (  ) 

Returns the next command line argument.

Note:
Globals:
  • optind = defined by getopt sys call
  • Argc = Global copy of argc
  • Argv = Global copy of argv
Returns:
Next command-line argument or NULL.
If there are no remaining command line arguments, it returns NULL. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.
Date:
Fri Aug 18 09:34:12 1989, DSJ, Created.

Definition at line 410 of file cnTraining.cpp.

References Argc, Argv, NULL, and optind.

00411 {
00412    if (optind < Argc)
00413       return (Argv [optind++]);
00414    else
00415       return (NULL);
00416 
00417 }//GetNextFilename

int main ( int  argc,
char **  argv 
)

Separate trained features for each character from the aggregate training page.

Definition at line 95 of file tesseractmain.cpp.

00095                                 {
00096   STRING outfile;               //output file
00097 
00098   if (argc < 3) {
00099     USAGE.error (argv[0], EXIT,
00100       "%s imagename outputbase [configfile [[+|-]varfile]...]\n", argv[0]);
00101   }
00102 
00103   if (argc == 3)
00104     TessBaseAPI::Init(argv[0], argv[1], NULL, false, 0, argv + 2);
00105   else
00106     TessBaseAPI::Init(argv[0], argv[1], argv[3], false, argc - 4, argv + 4);
00107 
00108   tprintf ("Tesseract Open Source OCR Engine\n");
00109 
00110   IMAGE image;
00111 #ifdef _TIFFIO_
00112   TIFF* tif = TIFFOpen(argv[1], "r");
00113   if (tif) {
00114     read_tiff_image(tif, &image);
00115     TIFFClose(tif);
00116   } else {
00117     READFAILED.error (argv[0], EXIT, argv[1]);
00118   }
00119 #else
00120   if (image.read_header(argv[1]) < 0)
00121     READFAILED.error (argv[0], EXIT, argv[1]);
00122   if (image.read(image.get_ysize ()) < 0) {
00123     MEMORY_OUT.error(argv[0], EXIT, "Read of image %s",
00124       argv[1]);
00125   }
00126 #endif // _TIFFIO_
00127   int bytes_per_line = check_legal_image_size(image.get_xsize(),
00128                                               image.get_ysize(),
00129                                               image.get_bpp());
00130   char* text = TessBaseAPI::TesseractRect(image.get_buffer(), image.get_bpp()/8,
00131                                           bytes_per_line, 0, 0,
00132                                           image.get_xsize(), image.get_ysize());
00133   outfile = argv[2];
00134   outfile += ".txt";
00135   FILE* fp = fopen(outfile.string(), "w");
00136   if (fp != NULL) {
00137     fwrite(text, 1, strlen(text), fp);
00138     fclose(fp);
00139   }
00140   delete [] text;
00141   TessBaseAPI::End();
00142 
00143   return 0;                      //Normal exit
00144 }

LABELEDLIST NewLabeledList ( char *  Label  ) 

Creates new list labeled with Label.

Parameters:
Label Label for new list
Returns:
Return: New, empty labeled list.
XXX This routine allocates a new, empty labeled list and gives it the specified label. Exceptions: none History: Fri Aug 18 16:08:46 1989, DSJ, Created.

Definition at line 506 of file cnTraining.cpp.

References Emalloc(), LABELEDLISTNODE::Label, LABELEDLISTNODE::List, and NIL.

Referenced by AddToNormProtosList(), and ReadTrainingSamples().

00508 {
00509    LABELEDLIST LabeledList;
00510 
00511    LabeledList = (LABELEDLIST) (char*)Emalloc (sizeof (LABELEDLISTNODE));
00512    LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
00513    strcpy (LabeledList->Label, Label);
00514    LabeledList->List = NIL;
00515    return (LabeledList);
00516 
00517 }//NewLabeledList

int NumberOfProtos ( LIST  ProtoList,
BOOL8  CountSigProtos,
BOOL8  CountInsigProtos 
)

Count number of prototypes matching criteria.

Parameters:
ProtoList Source of prototypes
WriteSigProtos if true, count significant prototypes
WriteInsigProtos if true, count INsignificant prototypes
Returns:
Count

Definition at line 849 of file cnTraining.cpp.

References first, iterate, N, and proto::Significant.

Referenced by WriteNormProtos().

00853 {
00854    int N = 0;
00855    PROTOTYPE   *Proto;
00856 
00857    iterate(ProtoList)
00858    {
00859       Proto = (PROTOTYPE *) first ( ProtoList );
00860       if (( Proto->Significant && CountSigProtos )
00861            || ( ! Proto->Significant && CountInsigProtos ) )
00862          N++;
00863    }
00864    return(N);
00865 }//NumberOfProtos

void ParseArguments ( int  argc,
char **  argv 
)

Parses the command-line arguments.

Parameters:
argc Number of command line arguments
argv Array of command line arguments
Note:
Globals:
  • ShowAllSamples = flag controlling samples display
  • ShowSignificantProtos = flag controlling proto display
  • ShowInsignificantProtos = flag controlling proto display
  • Config = current clustering parameters
  • optarg = defined by getopt sys call
  • optind = defined by getopt sys call
  • Argc = global copy of argc
  • Argv = global copy of argv

Illegal options terminate the program.

The legal arguments are:
-d              = "turn off display of samples"
-p              = "turn off significant protos"
-n              = "turn off insignificant proto"
-S <style>      = "spherical" | "elliptical" | "mixed" | "automatic"
-M MinSamples   = "min samples per prototype (%)"
-B MaxIllegal   = "max illegal chars per cluster (%)"
-I Independence = "0 to 1"
-C Confidence   = "1e-200 to 1.0"
-D Directory
-N MaxNumSamples = limit how many features to consider from training page
-R RoundingAccuracy
Date:
7/24/89, DSJ, Created.
Todo:
The usage printed on error is out of sync with above

Definition at line 306 of file cnTraining.cpp.

References Argc, Argv, automatic, CLUSTERCONFIG::Confidence, Config, Directory, elliptical, FALSE, getopt(), CLUSTERCONFIG::Independence, CLUSTERCONFIG::MaxIllegal, MaxNumSamples, CLUSTERCONFIG::MinSamples, mixed, optarg, CLUSTERCONFIG::ProtoStyle, RoundingAccuracy, ShowAllSamples, ShowInsignificantProtos, ShowSignificantProtos, spherical, and TRUE.

00309 {
00310    int      Option;
00311    int      ParametersRead;
00312    BOOL8    Error;
00313    extern char *optarg;
00314 
00315    Error = FALSE;
00316    Argc = argc;
00317    Argv = argv;
00318    while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )
00319    {
00320       switch ( Option )
00321       {
00322       case 'n':
00323          sscanf(optarg,"%d", &ParametersRead);
00324          ShowInsignificantProtos = ParametersRead;
00325          break;
00326       case 'p':
00327          sscanf(optarg,"%d", &ParametersRead);
00328          ShowSignificantProtos = ParametersRead;
00329          break;
00330       case 'd':
00331          ShowAllSamples = FALSE;
00332          break;
00333       case 'C':
00334          ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) );
00335          if ( ParametersRead != 1 ) Error = TRUE;
00336          else if ( Config.Confidence > 1 ) Config.Confidence = 1;
00337          else if ( Config.Confidence < 0 ) Config.Confidence = 0;
00338          break;
00339       case 'I':
00340          ParametersRead = sscanf( optarg, "%f", &(Config.Independence) );
00341          if ( ParametersRead != 1 ) Error = TRUE;
00342          else if ( Config.Independence > 1 ) Config.Independence = 1;
00343          else if ( Config.Independence < 0 ) Config.Independence = 0;
00344          break;
00345       case 'M':
00346          ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) );
00347          if ( ParametersRead != 1 ) Error = TRUE;
00348          else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
00349          else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
00350          break;
00351       case 'B':
00352          ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) );
00353          if ( ParametersRead != 1 ) Error = TRUE;
00354          else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
00355          else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
00356          break;
00357       case 'R':
00358          ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy );
00359          if ( ParametersRead != 1 ) Error = TRUE;
00360          else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
00361          else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
00362          break;
00363       case 'S':
00364          switch ( optarg[0] )
00365          {
00366          case 's': Config.ProtoStyle = spherical; break;
00367          case 'e': Config.ProtoStyle = elliptical; break;
00368          case 'm': Config.ProtoStyle = mixed; break;
00369          case 'a': Config.ProtoStyle = automatic; break;
00370          default: Error = TRUE;
00371          }
00372          break;
00373          case 'D':
00374             Directory = optarg;
00375             break;
00376          case 'N':
00377             if (sscanf (optarg, "%d", &MaxNumSamples) != 1 ||
00378                MaxNumSamples <= 0)
00379                Error = TRUE;
00380             break;
00381          case '?':
00382             Error = TRUE;
00383             break;
00384       }
00385       if ( Error )
00386       {
00387          fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );
00388          fprintf (stderr, "\t[-S ProtoStyle]\n");
00389          fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );
00390          fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");
00391          exit (2);
00392       }
00393    }//while
00394 }// ParseArguments

void ReadTrainingSamples ( FILE *  File,
LIST TrainingSamples 
)

Read and separate samples from training page.

Parameters:
File Open text file to read samples from
TrainingSamples Data structure returned
Returns:
None
Reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName.
Date:
Fri Aug 18 13:11:39 1989, DSJ, Created; 5/17/98 simplifications to structure, elliminated font, and feature specification levels of structure.

Definition at line 434 of file cnTraining.cpp.

References f, FEATURE_SET_STRUCT::Features, FeaturesOfType, FindList(), FontName, FreeFeatureSet(), fscanf(), LABELEDLISTNODE::List, MAXNAMESIZE, MINSD, NewLabeledList(), NULL, FEATURE_SET_STRUCT::NumFeatures, NumFeatureSetsIn, PROGRAM_FEATURE_TYPE, push(), ReadCharDescription(), ShortNameToFeatureType(), and UniformRandomNumber().

00437 {
00438    char     CharName[MAXNAMESIZE];
00439    LABELEDLIST CharSample;
00440    FEATURE_SET FeatureSamples;
00441    CHAR_DESC   CharDesc;
00442    int         Type, i;
00443 
00444    while (fscanf (File, "%s %s", FontName, CharName) == 2) {
00445       CharSample = FindList (*TrainingSamples, CharName);
00446       if (CharSample == NULL) {
00447          CharSample = NewLabeledList (CharName);
00448          *TrainingSamples = push (*TrainingSamples, CharSample);
00449       }
00450       CharDesc = ReadCharDescription (File);
00451       Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
00452       FeatureSamples = FeaturesOfType(CharDesc, Type);
00453 
00454       for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
00455          FEATURE f = FeatureSamples->Features[feature];
00456          for (int dim =0; dim < f->Type->NumParams; ++dim)
00457             f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
00458       }//for feature
00459       CharSample->List = push (CharSample->List, FeatureSamples);
00460       for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
00461          if (Type != i)
00462             FreeFeatureSet (FeaturesOfType (CharDesc, i));
00463       free (CharDesc);
00464    }//while
00465 }// ReadTrainingSamples

CLUSTERER * SetUpForClustering ( LABELEDLIST  CharSample  ) 

Copy samples from CharSample to clusterer structure.

Parameters:
CharSample Structure holding feature information for a given character
Returns:
Pointer to new clusterer structure
Reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.
Date:
8/16/89, DSJ, Created.

Definition at line 744 of file cnTraining.cpp.

References DefinitionOf, Emalloc(), FEATURE_SET_STRUCT::Features, first, iterate, LABELEDLISTNODE::List, MakeClusterer(), MakeSample(), FEATURE_SET_STRUCT::MaxNumFeatures, N, NULL, fds::NumParams, fds::ParamDesc, FEATURE_STRUCT::Params, PROGRAM_FEATURE_TYPE, round, RoundingAccuracy, and ShortNameToFeatureType().

00746 {
00747    UINT16   N;
00748    int      i, j;
00749    FLOAT32  *Sample = NULL;
00750    CLUSTERER   *Clusterer;
00751    INT32    CharID;
00752    LIST FeatureList = NULL;
00753    FEATURE_SET FeatureSet = NULL;
00754    FEATURE_DESC FeatureDesc = NULL;
00755 // PARAM_DESC* ParamDesc;
00756 
00757    FeatureDesc = DefinitionOf(ShortNameToFeatureType(PROGRAM_FEATURE_TYPE));
00758    N = FeatureDesc->NumParams;
00759    //ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);
00760    Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);
00761 // free(ParamDesc);
00762 
00763    FeatureList = CharSample->List;
00764    CharID = 0;
00765    iterate(FeatureList)
00766    {
00767       FeatureSet = (FEATURE_SET) first (FeatureList);
00768       for (i=0; i < FeatureSet->MaxNumFeatures; i++)
00769       {
00770          if (Sample == NULL)
00771             Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
00772          for (j=0; j < N; j++)
00773             if (RoundingAccuracy != 0.0)
00774                Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
00775             else
00776                Sample[j] = FeatureSet->Features[i]->Params[j];
00777             MakeSample (Clusterer, Sample, CharID);
00778       }
00779       CharID++;
00780    }
00781    if ( Sample != NULL ) free( Sample );
00782    return( Clusterer );
00783 
00784 }//SetUpForClustering

void WriteNormProtos ( char *  Directory,
LIST  LabeledProtoList,
CLUSTERER Clusterer 
)

XXX.

Parameters:
Directory Directory to place sample files into
Note:
Globals:
  • MaxNumSamples = max number of samples per class to write
  • ShowSignificantProtos = Writes significant prototypes
  • ShowInsignificantProtos = Writes insignificant prototypes
Returns:
None
Writes the specified samples into "normproto" which is organized according to the font name and character name of the samples.
Todo:
Ask Ray for definition of significant and insignificant features and why this is needed to begin with (guess: extract "decisive" features from "fluff" caused by the many different fonts used in training)
Date:
Fri Aug 18 16:17:06 1989, DSJ, Created.

Definition at line 613 of file cnTraining.cpp.

References Efopen(), first, iterate, LABELEDLISTNODE::Label, LABELEDLISTNODE::List, MAXNAMESIZE, N, NameToChar(), NULL, NumberOfProtos(), CLUSTERER::ParamDesc, CLUSTERER::SampleSize, ShowInsignificantProtos, ShowSignificantProtos, WriteParamDesc(), and WriteProtos().

00617 {
00618    FILE     *File;
00619    char     Filename[MAXNAMESIZE];
00620    LABELEDLIST LabeledProto;
00621    int N;
00622    char Label;
00623 
00624    strcpy (Filename, "");
00625    if (Directory != NULL)
00626    {
00627       strcat (Filename, Directory);
00628       strcat (Filename, "/");
00629    }
00630    strcat (Filename, "normproto");
00631    printf ("\nWriting %s ...", Filename);
00632    File = Efopen (Filename, "w");
00633    fprintf(File,"%0d\n",Clusterer->SampleSize);
00634    WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc);
00635 
00636    iterate(LabeledProtoList)
00637    {
00638       LabeledProto = (LABELEDLIST) first (LabeledProtoList);
00639       N = NumberOfProtos(LabeledProto->List,
00640          ShowSignificantProtos, ShowInsignificantProtos);
00641       Label = NameToChar(LabeledProto->Label);
00642       fprintf(File, "\n%c %d\n", Label, N);
00643       WriteProtos(File, Clusterer->SampleSize, LabeledProto->List,
00644          ShowSignificantProtos, ShowInsignificantProtos);
00645    }
00646    fclose (File);
00647 
00648 }// WriteNormProtos

void WriteProtos ( FILE *  File,
UINT16  N,
LIST  ProtoList,
BOOL8  WriteSigProtos,
BOOL8  WriteInsigProtos 
)

Write select prototypes from ProtoList to File.

Parameters:
File Opened file to write into
N Number of prototypes to write
ProtoList Source of prototypes
WriteSigProtos if true, only write significant prototypes
WriteInsigProtos if true, only write INsignificant prototypes

Definition at line 821 of file cnTraining.cpp.

References first, iterate, proto::Significant, and WritePrototype().

Referenced by WriteMicrofeat(), and WriteNormProtos().

00827 {
00828    PROTOTYPE   *Proto;
00829 
00830    // write prototypes
00831    iterate(ProtoList)
00832    {
00833       Proto = (PROTOTYPE *) first ( ProtoList );
00834       if (( Proto->Significant && WriteSigProtos )
00835            || ( ! Proto->Significant && WriteInsigProtos ) )
00836          WritePrototype( File, N, Proto );
00837    }
00838 }// WriteProtos

void WriteTrainingSamples ( char *  Directory,
LIST  CharList 
)

Writes some/all features for each font and character in CharList.

Parameters:
Directory Directory to place sample files into
CharList List of fonts
Note:
Globals:
  • MaxNumSamples = Max number of samples per class to write
Returns:
None
Writes the specified samples into files which are organized according to the font name and character name of the samples.
Note:
Creates file, writes header, and samples UNLESS file exists, else only appends samples
Date:
Fri Aug 18 16:17:06 1989, DSJ, Created.

Definition at line 534 of file cnTraining.cpp.

References DefinitionOf, Efopen(), first, iterate, LABELEDLISTNODE::Label, LABELEDLISTNODE::List, MAXNAMESIZE, NULL, PROGRAM_FEATURE_TYPE, ShortNameToFeatureType(), WriteFeatureSet(), and WriteOldParamDesc().

00537 {
00538    LABELEDLIST CharSample;
00539    FEATURE_SET FeatureSet;
00540    LIST     FeatureList;
00541    FILE     *File;
00542    char     Filename[MAXNAMESIZE];
00543    int      NumSamples;
00544 
00545    iterate (CharList)      // iterate thru all of the fonts
00546    {
00547       CharSample = (LABELEDLIST) first (CharList);
00548 
00549       // construct the full pathname for the current samples file
00550       strcpy (Filename, "");
00551       if (Directory != NULL)
00552       {
00553          strcat (Filename, Directory);
00554          strcat (Filename, "/");
00555       }
00556       strcat (Filename, "Merged");
00557       strcat (Filename, "/");
00558       strcat (Filename, CharSample->Label);
00559       strcat (Filename, ".");
00560       strcat (Filename, PROGRAM_FEATURE_TYPE);
00561       printf ("\nWriting %s ...", Filename);
00562 
00563       /* if file does not exist, create a new one with an appropriate
00564       header; otherwise append samples to the existing file */
00565       File = fopen (Filename, "r");
00566       if (File == NULL)
00567       {
00568          File = Efopen (Filename, "w");
00569          WriteOldParamDesc
00570             (File, DefinitionOf (ShortNameToFeatureType (PROGRAM_FEATURE_TYPE)));
00571       }
00572       else
00573       {
00574          fclose (File);
00575          File = Efopen (Filename, "a");
00576       }
00577 
00578       // append samples onto the file
00579       FeatureList = CharSample->List;
00580       NumSamples = 0;
00581       iterate (FeatureList)
00582       {
00583          //if (NumSamples >= MaxNumSamples) break;
00584 
00585          FeatureSet = (FEATURE_SET) first (FeatureList);
00586          WriteFeatureSet (File, FeatureSet);
00587          NumSamples++;
00588       }
00589       fclose (File);
00590    }
00591 }// WriteTrainingSamples


Variable Documentation

int Argc [static]

Definition at line 146 of file cnTraining.cpp.

Referenced by GetNextFilename(), and ParseArguments().

char** Argv [static]

Definition at line 147 of file cnTraining.cpp.

Referenced by GetNextFilename(), and ParseArguments().

CLUSTERCONFIG Config [static]

Initial value:

{
  elliptical, 0.025, 0.05, 0.8, 1e-3
}
Holds configuration parameters to control clustering.

-M 0.025   -B 0.05   -I 0.8   -C 1e-3
Note:
These differ from Config in mfTraining.cpp

Definition at line 161 of file cnTraining.cpp.

Referenced by AddConfigToClass(), AddProtoToClass(), BaselineClassifier(), ClusterSamples(), ComputePrototypes(), FreeTempConfig(), MakeNewAdaptedClass(), MakeNewTemporaryConfig(), MakePermanent(), MakePrototype(), MakeTempProtoPerm(), NewTempConfig(), ParseArguments(), ReadPermConfig(), ReadTempConfig(), SplitProto(), WriteConfigs(), WriteOldConfigFile(), and WriteTempConfig().

char* Directory = NULL [static]

Definition at line 144 of file cnTraining.cpp.

Referenced by ParseArguments().

char FontName[MAXNAMESIZE] [static]

Definition at line 142 of file cnTraining.cpp.

int MaxNumSamples = MAX_NUM_SAMPLES [static]

Definition at line 145 of file cnTraining.cpp.

Referenced by ParseArguments().

FLOAT32 RoundingAccuracy = 0.0 [static]

Definition at line 166 of file cnTraining.cpp.

Referenced by ParseArguments(), and SetUpForClustering().

int row_number

NOT used in tesseract 1.02

Definition at line 48 of file cnTraining.cpp.

BOOL8 ShowAllSamples = FALSE [static]

Definition at line 150 of file cnTraining.cpp.

Referenced by ParseArguments().

BOOL8 ShowInsignificantProtos = FALSE [static]

Definition at line 152 of file cnTraining.cpp.

Referenced by ParseArguments(), WriteClusteredTrainingSamples(), and WriteNormProtos().

BOOL8 ShowSignificantProtos = TRUE [static]

Definition at line 151 of file cnTraining.cpp.

Referenced by ParseArguments(), WriteClusteredTrainingSamples(), and WriteNormProtos().


Generated on Wed Feb 28 19:49:27 2007 for Tesseract by  doxygen 1.5.1