classify/adaptmatch.cpp File Reference

#include <ctype.h>
#include "adaptmatch.h"
#include "normfeat.h"
#include "mfoutline.h"
#include "picofeat.h"
#include "float2int.h"
#include "outfeat.h"
#include "emalloc.h"
#include "intfx.h"
#include "permnum.h"
#include "speckle.h"
#include "efio.h"
#include "normmatch.h"
#include "stopper.h"
#include "permute.h"
#include "context.h"
#include "ndminx.h"
#include "intproto.h"
#include "const.h"
#include "globals.h"
#include "werd.h"
#include "callcpp.h"
#include "tordvars.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>

Go to the source code of this file.

Classes

Defines

Functions

Variables


Define Documentation

#define ADAPT_TEMPLATE_SUFFIX   ".a"

Note:
File: adaptmatch.cpp
High level adaptive matcher.
Author:
Dan Johnson
Date:
Mon Mar 11 10:00:10 1991, DSJ, Created.
 **	(c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.

Definition at line 56 of file adaptmatch.cpp.

Referenced by EndAdaptiveClassifier(), and InitAdaptiveClassifier().

#define ADAPTABLE_WERD   (GOOD_NUMBER + 0.05)

Definition at line 67 of file adaptmatch.cpp.

Referenced by AdaptableWord(), and word_adaptable().

#define BUILT_IN_CUTOFFS_FILE   "tessdata/pffmtable"

Definition at line 58 of file adaptmatch.cpp.

Referenced by InitAdaptiveClassifierVars().

#define BUILT_IN_TEMPLATES_FILE   "tessdata/inttemp"

Definition at line 57 of file adaptmatch.cpp.

Referenced by InitAdaptiveClassifierVars().

 
#define InitIntFX (  )     (FeaturesHaveBeenExtracted = FALSE)

Definition at line 114 of file adaptmatch.cpp.

Referenced by AdaptToWord(), and DoAdaptiveMatch().

#define MarginalMatch ( Rating   )     ((Rating) > GreatAdaptiveMatch)

Bigger is worse

Definition at line 109 of file adaptmatch.cpp.

Referenced by DoAdaptiveMatch().

#define MAX_ADAPTABLE_WERD_SIZE   40

Used in new rules of AdaptableWord()

Definition at line 66 of file adaptmatch.cpp.

Referenced by AdaptableWord(), and AdaptToWord().

#define MAX_MATCHES   10

FIX: Number of lesser other choices for any best choice to keep

Definition at line 61 of file adaptmatch.cpp.

Referenced by ConvertMatchesToChoices().

#define NO_DEBUG   0

Definition at line 64 of file adaptmatch.cpp.

Referenced by AdaptToChar(), AmbigClassifier(), GetBestRatingFor(), MakeNewTemporaryConfig(), and ShowBestMatchFor().

#define TempConfigReliable ( Config   )     ((Config)->NumTimesSeen > ReliableConfigThreshold)

Definition at line 111 of file adaptmatch.cpp.

Referenced by AdaptToChar().

#define UNLIKELY_NUM_FEAT   200

If over this number of features, we are sucking mud

Definition at line 63 of file adaptmatch.cpp.

Referenced by GetAdaptiveFeatures(), GetBaselineFeatures(), and MakeNewAdaptedClass().

#define WORST_POSSIBLE_RATING   (1.0)

FIX: Worst rating, also used to signify initial match ??

Definition at line 75 of file adaptmatch.cpp.

Referenced by AdaptiveClassifier(), AdaptToPunc(), AddNewResult(), BaselineClassifier(), CharNormClassifier(), GetAmbiguities(), and InitMatcherRatings().

#define Y_DIM_OFFSET   (Y_SHIFT - BASELINE_Y_SHIFT)

Because ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 and not -0.25 to 0.75 used in baseline normalization, we need to offset it

Definition at line 72 of file adaptmatch.cpp.

Referenced by MakeNewAdaptedClass(), and MakeNewTempProtos().


Function Documentation

int AdaptableWord ( TWERD Word,
const char *  BestChoice,
const char *  BestRawChoice 
)

Return TRUE if the specified word is acceptable for adaptation. Rules that apply in general, those simplest to compute come first.

Parameters:
Word current word
BestChoice best overall choice for word with context
BestRawChoice best choice for word without context
Note:
Globals: none
Returns:
TRUE or FALSE
Note:
Exceptions: none
Date:
Thu May 30 14:25:06 1991, DSJ, Created.

Definition at line 1224 of file adaptmatch.cpp.

References ADAPTABLE_WERD, AlternativeChoicesWorseThan(), case_ok(), CurrentBestChoiceAdjustFactor(), CurrentBestChoiceIs(), MAX_ADAPTABLE_WERD_SIZE, NULL, NumBlobsIn(), punctuation_ok(), pure_number(), valid_number(), and valid_word().

Referenced by tess_adaptable_word().

01226                                              {
01227   int BestChoiceLength;
01228 
01229   return
01230    (
01231       /* EnableLearning && */
01232       /* new rules */
01233       BestChoice != NULL
01234       && BestRawChoice != NULL
01235       && Word != NULL
01236       && ( BestChoiceLength = strlen (BestChoice)) > 0
01237       && BestChoiceLength == NumBlobsIn (Word)
01238       && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE
01239       && (
01240          /* new rules */
01241          EnableNewAdaptRules
01242          && CurrentBestChoiceAdjustFactor() <= ADAPTABLE_WERD
01243           && AlternativeChoicesWorseThan(ADAPTABLE_WERD)
01244          && CurrentBestChoiceIs(BestChoice)
01245          ||
01246          /* old rules */
01247          !EnableNewAdaptRules
01248          && BestChoiceLength == strlen (BestRawChoice)
01249          && (
01250                (
01251                valid_word (BestChoice)
01252                && case_ok (BestChoice)
01253                )
01254              ||   (
01255                valid_number (BestChoice)
01256                && pure_number (BestChoice)
01257                )
01258             )
01259             && punctuation_ok (BestChoice) != -1
01260             && punctuation_ok (BestChoice) <= 1
01261          )
01262    );
01263 }// AdaptableWord

LIST AdaptiveClassifier ( TBLOB Blob,
TBLOB DotBlob,
TEXTROW Row 
)

Calls the adaptive matcher which returns (in an array) the class id of each class matched.

Parameters:
Blob blob to be classified
DotBlob (obsolete)
Row row of text that word appears in
Note:
Globals: CurrentRatings used by compare function for qsort
Returns:
List of choices found by adaptive matcher.
It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array. Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices. This routine also performs some simple speckle filtering.

Note:
Exceptions: none
Date:
Mon Mar 11 10:00:58 1991, DSJ, Created.

Definition at line 616 of file adaptmatch.cpp.

References AdaptedTemplates, AddLargeSpeckleTo(), append_choice(), ADAPT_RESULTS::BestClass, ADAPT_RESULTS::BestConfig, ADAPT_RESULTS::BestRating, ADAPT_RESULTS::BlobLength, ADAPT_RESULTS::Classes, CompareCurrentRatings(), ConvertMatchesToChoices(), count(), cprintf(), CurrentRatings, DebugAdaptiveClassifier(), DoAdaptiveMatch(), EnterClassifyMode, f, GetLineStatsFromRow(), InitMatcherRatings(), LargeSpeckle(), MatcherDebugLevel, MAX_FLOAT32, NewAdaptedTemplates(), NIL, NO_CLASS, NULL, NumClassesOutput, ADAPT_RESULTS::NumMatches, PrintAdaptiveMatchResults(), ADAPT_RESULTS::Ratings, RemoveBadMatches(), RemoveExtraPuncs(), and WORST_POSSIBLE_RATING.

Referenced by tess_bn_matcher(), tess_cn_matcher(), and tess_default_matcher().

00616                                                                    { 
00617   LIST Choices;
00618   ADAPT_RESULTS Results;
00619   LINE_STATS LineStats;
00620 
00621   // next 2 lines new for v1.03
00622   if (AdaptedTemplates == NULL)
00623     AdaptedTemplates = NewAdaptedTemplates ();
00624   EnterClassifyMode;
00625 
00626   Results.BlobLength = MAX_FLOAT32;
00627   Results.NumMatches = 0;
00628   Results.BestRating = WORST_POSSIBLE_RATING;
00629   Results.BestClass = NO_CLASS;
00630   Results.BestConfig = 0;
00631   GetLineStatsFromRow(Row, &LineStats);
00632   InitMatcherRatings (Results.Ratings);
00633 
00634   DoAdaptiveMatch(Blob, &LineStats, &Results);
00635   RemoveBadMatches(&Results);
00636 
00637   /* save ratings in a global so that CompareCurrentRatings() can see them */
00638   CurrentRatings = Results.Ratings;
00639   qsort ((void *) (Results.Classes), Results.NumMatches,
00640     sizeof (CLASS_ID), CompareCurrentRatings);
00641   RemoveExtraPuncs(&Results);
00642   Choices = ConvertMatchesToChoices (&Results);
00643 
00644   if (MatcherDebugLevel >= 1) {
00645     cprintf ("AD Matches =  ");
00646     PrintAdaptiveMatchResults(stdout, &Results);
00647   }
00648 
00649   if (LargeSpeckle (Blob, Row))
00650     Choices = AddLargeSpeckleTo (Choices);
00651 
00652 #ifndef GRAPHICS_DISABLED
00653   if (EnableAdaptiveDebugger)
00654     DebugAdaptiveClassifier(Blob, &LineStats, &Results);
00655 #endif
00656 
00657   NumClassesOutput += count (Choices);
00658   if (Choices == NIL) {
00659     if (!bln_numericmode)
00660       printf ("Nil classification!\n");  // Should never normally happen.
00661     return (append_choice (NIL, "", 50.0f, -20.0f, -1));
00662   }
00663 
00664   return (Choices);
00665 
00666 }// AdaptiveClassifier

void AdaptToChar ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
FLOAT32  Threshold 
)

Adapts to character?

Parameters:
Blob blob to add to templates for ClassId
LineStats statistics about text line blob is in
ClassId class to add blob to
Threshold minimum match rating to existing template
Note:
Globals:
  • AdaptedTemplates current set of adapted templates
  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns:
none
Note:
Exceptions: none
Date:
Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 1282 of file adaptmatch.cpp.

References AdaptedTemplates, AllConfigsOn, AllProtosOn, ADAPT_TEMPLATES_STRUCT::Class, ClassForClassId, INT_RESULT_STRUCT::Config, ConfigIsPermanent, cprintf(), FreeFeatureSet(), GetAdaptiveFeatures(), GetClassToDebug(), IncreaseConfidence, IndexForClassId, IntegerMatcher(), LearningDebugLevel, LegalClassId, MakeNewAdaptedClass(), MakeNewTemporaryConfig(), MakePermanent(), NO_DEBUG, NumCharsAdaptedTo, TEMP_CONFIG_STRUCT::NumTimesSeen, INT_RESULT_STRUCT::Rating, SetAdaptiveThreshold(), SetBaseLineMatch(), ShowMatchDisplay(), TempConfigFor, TempConfigReliable, ADAPT_TEMPLATES_STRUCT::Templates, UnusedClassIdIn, and UpdateMatchDisplay().

Referenced by AdaptToPunc(), and AdaptToWord().

01285                                     {
01286   int NumFeatures;
01287   INT_FEATURE_ARRAY IntFeatures;
01288   INT_RESULT_STRUCT IntResult;
01289   CLASS_INDEX ClassIndex;
01290   INT_CLASS IClass;
01291   ADAPT_CLASS Class;
01292   TEMP_CONFIG TempConfig;
01293   FEATURE_SET FloatFeatures;
01294 
01295   NumCharsAdaptedTo++;
01296   if (!LegalClassId (ClassId))
01297     return;
01298 
01299   if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
01300     MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates);
01301   }
01302   else {
01303     IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
01304     ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
01305     Class = AdaptedTemplates->Class[ClassIndex];
01306 
01307     NumFeatures = GetAdaptiveFeatures (Blob, LineStats,
01308       IntFeatures, &FloatFeatures);
01309     if (NumFeatures <= 0)
01310       return;
01311 
01312     SetBaseLineMatch();
01313     IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
01314       NumFeatures, NumFeatures, IntFeatures, 0, 0,
01315       &IntResult, NO_DEBUG);
01316 
01317     SetAdaptiveThreshold(Threshold);
01318 
01319     if (IntResult.Rating <= Threshold) {
01320       if (ConfigIsPermanent (Class, IntResult.Config)) {
01321         if (LearningDebugLevel >= 1)
01322           cprintf ("Found good match to perm config %d = %4.1f%%.\n",
01323             IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
01324         FreeFeatureSet(FloatFeatures);
01325         return;
01326       }
01327 
01328       TempConfig = TempConfigFor (Class, IntResult.Config);
01329       IncreaseConfidence(TempConfig);
01330       if (LearningDebugLevel >= 1)
01331         cprintf ("Increasing reliability of temp config %d to %d.\n",
01332           IntResult.Config, TempConfig->NumTimesSeen);
01333 
01334       if (TempConfigReliable (TempConfig))
01335         MakePermanent (AdaptedTemplates, ClassId, IntResult.Config,
01336           Blob, LineStats);
01337     }
01338     else {
01339       if (LearningDebugLevel >= 1) // new in v1.03
01340         cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
01341           IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
01342       MakeNewTemporaryConfig(AdaptedTemplates,
01343                              ClassId,
01344                              NumFeatures,
01345                              IntFeatures,
01346                              FloatFeatures);
01347       if (LearningDebugLevel >= 1) { // new in v1.03
01348         IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
01349           NumFeatures, NumFeatures, IntFeatures, 0, 0,
01350           &IntResult, NO_DEBUG);
01351         cprintf ("Best match to temp config %d = %4.1f%%.\n",
01352           IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
01353         if (LearningDebugLevel >= 2) {
01354           UINT32 ConfigMask;
01355           ConfigMask = 1 << IntResult.Config;
01356           ShowMatchDisplay();
01357           IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask,
01358             NumFeatures, NumFeatures, IntFeatures, 0, 0,
01359             &IntResult, 6 | 0x19);
01360           UpdateMatchDisplay();
01361           GetClassToDebug ("Adapting");
01362         }
01363       }
01364     }
01365     FreeFeatureSet(FloatFeatures);
01366   }
01367 }                                /* AdaptToChar */

void AdaptToPunc ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
FLOAT32  Threshold 
)

Adapts to punctuation.

Parameters:
Blob blob to add to templates for ClassId
LineStats statistics about text line blob is in
ClassId class to add blob to
Threshold minimum match rating to existing template
Note:
Globals: PreTrainedTemplates current set of built-in templates
Returns:
none
Note:
Exceptions: none
Date:
Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 1383 of file adaptmatch.cpp.

References AdaptToChar(), ADAPT_RESULTS::BestClass, ADAPT_RESULTS::BestConfig, ADAPT_RESULTS::BestRating, ADAPT_RESULTS::BlobLength, CharNormClassifier(), ADAPT_RESULTS::Classes, cprintf(), InitMatcherRatings(), LearningDebugLevel, MAX_FLOAT32, NO_CLASS, ADAPT_RESULTS::NumMatches, PreTrainedTemplates, ADAPT_RESULTS::Ratings, RemoveBadMatches(), and WORST_POSSIBLE_RATING.

Referenced by AdaptToWord().

01386                                     {
01387   ADAPT_RESULTS Results;
01388   int i;
01389 
01390   Results.BlobLength = MAX_FLOAT32;
01391   Results.NumMatches = 0;
01392   Results.BestRating = WORST_POSSIBLE_RATING;
01393   Results.BestClass = NO_CLASS;
01394   Results.BestConfig = 0;
01395   InitMatcherRatings (Results.Ratings);
01396   CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
01397   RemoveBadMatches(&Results);
01398 
01399   if (Results.NumMatches != 1) {
01400     if (LearningDebugLevel >= 1) {
01401       cprintf ("Rejecting punc = %c (Alternatives = ", ClassId);
01402 
01403       for (i = 0; i < Results.NumMatches; i++)
01404         cprintf ("%c", Results.Classes[i]);
01405       cprintf (")\n");
01406     }
01407     return;
01408   }
01409 
01410   #ifndef SECURE_NAMES
01411   if (LearningDebugLevel >= 1)
01412     cprintf ("Adapting to punc = %c\n", ClassId);
01413   #endif
01414   AdaptToChar(Blob, LineStats, ClassId, Threshold);
01415 
01416 }                                /* AdaptToPunc */

void AdaptToWord ( TWERD Word,
TEXTROW Row,
const char *  BestChoice,
const char *  BestRawChoice,
const char *  rejmap 
)

Implements a preliminary version of the rules which are used to decide which characters to adapt to.

Parameters:
Word Word to be adapted to
Row Row of text that word is found in
BestChoice Best choice for word found by system
BestRawChoice Best choice for word found by classifier only
rejmap Reject map
Note:
Globals: EnableLearning TRUE if learning is enabled
Returns:
none
A word is adapted to if it is in the dictionary or if it is a "good" number (no trailing units, etc.). It cannot contain broken or merged characters. Within that word, only letters and digits are adapted to (no punctuation).

SPECIAL RULE: Don't adapt to an 'i' which is the first char in a word because they are too ambiguous with 'I'.

The new adaptation rules should account for this automatically, since they exclude ambiguous words from adaptation, but for safety's sake we'll leave the rule in.

Also, don't adapt to i's that have only one blob in them because this creates too much ambiguity for broken characters.

Note:
Exceptions: none
Date:
Thu Mar 14 07:40:36 1991, DSJ, Created.

Definition at line 700 of file adaptmatch.cpp.

References AdaptToChar(), AdaptToPunc(), assert(), wordstruct::blobs, cprintf(), GetAdaptThresholds(), GetLineStatsFromRow(), InitIntFX, LearningDebugLevel, MAX_ADAPTABLE_WERD_SIZE, blobstruct::next, NULL, NumOutlinesInBlob(), and NumWordsAdaptedTo.

Referenced by tess_adapter().

00704                                      {
00705   TBLOB *Blob;
00706   LINE_STATS LineStats;
00707   FLOAT32 Thresholds[MAX_ADAPTABLE_WERD_SIZE];
00708   FLOAT32 *Threshold;
00709   const char *map = rejmap;
00710   char map_char = '1';
00711 
00712   if (EnableLearning) {
00713     NumWordsAdaptedTo++;
00714 
00715     #ifndef SECURE_NAMES
00716     if (LearningDebugLevel >= 1)
00717       cprintf ("\n\nAdapting to word = %s\n", BestChoice);
00718     #endif
00719     GetLineStatsFromRow(Row, &LineStats);
00720 
00721     GetAdaptThresholds(Word,
00722                        &LineStats,
00723                        BestChoice,
00724                        BestRawChoice,
00725                        Thresholds);
00726 
00727     for (Blob = Word->blobs, Threshold = Thresholds;
00728     Blob != NULL; Blob = Blob->next, BestChoice++, Threshold++) {
00729       InitIntFX();
00730 
00731       if (rejmap != NULL)
00732         map_char = *map++;
00733 
00734       assert (map_char == '1' || map_char == '0');
00735 
00736       if (map_char == '1') {
00737 
00738         if (isalnum (*BestChoice)) {
00739           /* Use SPECIAL RULE here */
00740           if ((*BestChoice == 'i'
00741             || il1_adaption_test && *BestChoice == 'I'
00742             && islower (BestChoice[1])) && (Blob == Word->blobs
00743             ||
00744             ispunct (*
00745             (BestChoice -
00746             1))
00747             || !il1_adaption_test
00748             &&
00749             NumOutlinesInBlob
00750           (Blob) != 2)) {
00751             if (LearningDebugLevel >= 1)
00752               cprintf ("Rejecting char = %c\n", *BestChoice);
00753           }
00754           else {
00755             #ifndef SECURE_NAMES
00756             if (LearningDebugLevel >= 1)
00757               cprintf ("Adapting to char = %c, thr= %g\n", *BestChoice, *Threshold);
00758             #endif
00759             AdaptToChar(Blob, &LineStats, *BestChoice, *Threshold);
00760           }
00761         }
00762         else
00763           AdaptToPunc(Blob, &LineStats, *BestChoice, *Threshold);
00764       }
00765     }
00766     if (LearningDebugLevel >= 1)
00767       cprintf ("\n");
00768   }
00769 }                                /* AdaptToWord */

void AddNewResult ( ADAPT_RESULTS Results,
CLASS_ID  ClassId,
FLOAT32  Rating,
int  ConfigId 
)

Adds the result of a classification into Results.

Parameters:
Results results to add new result to
ClassId class of new result
Rating rating of new result
ConfigId config id of new result
Note:
Globals: BadMatchPad defines limits of an acceptable match
Returns:
none
If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.
Note:
Exceptions: none
Date:
Tue Mar 12 18:19:29 1991, DSJ, Created.

Definition at line 1441 of file adaptmatch.cpp.

References ADAPT_RESULTS::BestClass, ADAPT_RESULTS::BestConfig, ADAPT_RESULTS::BestRating, BestRating, ADAPT_RESULTS::Classes, ClassForClassId, ADAPT_RESULTS::Configs, NO_CLASS, NULL, NumIntConfigsIn, ADAPT_RESULTS::NumMatches, PreTrainedTemplates, ADAPT_RESULTS::Ratings, and WORST_POSSIBLE_RATING.

Referenced by AmbigClassifier(), BaselineClassifier(), CharNormClassifier(), and ClassifyAsNoise().

01444                                 {
01445   FLOAT32 OldRating;
01446   INT_CLASS_STRUCT* CharClass = NULL;
01447 
01448   OldRating = Results->Ratings[ClassId];
01449   if (Rating <= Results->BestRating + BadMatchPad && Rating < OldRating) {
01450     Results->Ratings[ClassId] = Rating;
01451     if (ClassId != NO_CLASS)
01452       CharClass = ClassForClassId(PreTrainedTemplates, ClassId);
01453     if (CharClass != NULL && NumIntConfigsIn(CharClass) == 32)
01454       Results->Configs[ClassId] = ConfigId;
01455     else
01456       Results->Configs[ClassId] = ~0;
01457 
01458     if (Rating < Results->BestRating) {
01459       Results->BestRating = Rating;
01460       Results->BestClass = ClassId;
01461       Results->BestConfig = ConfigId;
01462     }
01463 
01464     /* if this is first rating for class, add to list of classes matched */
01465     if (OldRating == WORST_POSSIBLE_RATING)
01466       Results->Classes[Results->NumMatches++] = ClassId;
01467   }
01468 }                                /* AddNewResult */

void AmbigClassifier ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
char *  Ambiguities,
ADAPT_RESULTS Results 
)

Identical to CharNormClassifier() except that it does no class pruning, simply matches the unknown blob against the classes listed in Ambiguities.

Parameters:
Blob blob to be classified
LineStats statistics for text line Blob is in
Templates built-in templates to classify against
Ambiguities string of class id's to match against
Results place to put match results
Note:
Globals:
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs
Returns:
none
Note:
Exceptions: none
Date:
Tue Mar 12 19:40:36 1991, DSJ, Created.

Definition at line 1489 of file adaptmatch.cpp.

References AddNewResult(), AllConfigsOn, AllProtosOn, AmbigClassifierCalls, ADAPT_RESULTS::BlobLength, ClassForClassId, INT_RESULT_STRUCT::Config, cprintf(), GetCharNormFeatures(), GetPicoFeatureLength, IndexForClassId, IntegerMatcher(), MatcherDebugLevel, NO_DEBUG, NumAmbigClassesTried, INT_RESULT_STRUCT::Rating, and SetCharNormMatch().

Referenced by DoAdaptiveMatch().

01493                                              {
01494   int IntOutlineLength;
01495   int NumFeatures;
01496   INT_FEATURE_ARRAY IntFeatures;
01497   CLASS_NORMALIZATION_ARRAY CharNormArray;
01498   INT_RESULT_STRUCT IntResult;
01499   CLASS_ID ClassId;
01500   CLASS_INDEX ClassIndex;
01501 
01502   AmbigClassifierCalls++;
01503 
01504   NumFeatures = GetCharNormFeatures (Blob, LineStats,
01505     Templates,
01506     IntFeatures, CharNormArray,
01507     &(Results->BlobLength));
01508   if (NumFeatures <= 0)
01509     return;
01510 
01511   IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
01512 
01513   if (MatcherDebugLevel >= 2)
01514     cprintf ("AM Matches =  ");
01515 
01516   while (*Ambiguities) {
01517     ClassId = *Ambiguities;
01518     ClassIndex = IndexForClassId (Templates, ClassId);
01519 
01520     SetCharNormMatch();
01521     IntegerMatcher (ClassForClassId (Templates, ClassId),
01522       AllProtosOn, AllConfigsOn,
01523       IntOutlineLength, NumFeatures, IntFeatures, 0,
01524       CharNormArray[ClassIndex], &IntResult, NO_DEBUG);
01525 
01526     if (MatcherDebugLevel >= 2)
01527       cprintf ("%c-%-2d %2.0f  ", ClassId, IntResult.Config,
01528         IntResult.Rating * 100.0);
01529 
01530     AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
01531 
01532     Ambiguities++;
01533 
01534     NumAmbigClassesTried++;
01535   }
01536   if (MatcherDebugLevel >= 2)
01537     cprintf ("\n");
01538 
01539 }                                /* AmbigClassifier */

char * BaselineClassifier ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

Extracts baseline normalized features from the unknown character and matches them against the specified set of templates.

Parameters:
Blob blob to be classified
LineStats statistics for text line Blob is in
Templates current set of adapted templates
Results place to put match results
Note:
Globals: BaselineCutoffs expected num features for each class
Returns:
String of possible ambiguous chars that should be checked.
The classes which match are added to Results.
Note:
Exceptions: none
Date:
Tue Mar 12 19:38:03 1991, DSJ, Created.

Definition at line 1559 of file adaptmatch.cpp.

References AddNewResult(), BaselineClassifierCalls, BaselineCutoffs, ADAPT_RESULTS::BestClass, ADAPT_RESULTS::BestConfig, ADAPT_RESULTS::BlobLength, ADAPT_TEMPLATES_STRUCT::Class, ClassForClassId, ClassForIndex, ClassPruner(), Config, INT_RESULT_STRUCT::Config, ConfigIsPermanent, cprintf(), display_ratings, GetBaselineFeatures(), GetPicoFeatureLength, IndexForClassId, IntegerMatcher(), MatcherDebugLevel, NO_CLASS, NULL, NumBaselineClassesTried, NumIntConfigsIn, ADAPT_CLASS_STRUCT::PermConfigs, ADAPT_CLASS_STRUCT::PermProtos, INT_RESULT_STRUCT::Rating, SetBaseLineMatch(), ADAPT_TEMPLATES_STRUCT::Templates, and WORST_POSSIBLE_RATING.

Referenced by DoAdaptiveMatch().

01562                                                  {
01563   int IntOutlineLength;
01564   int NumFeatures;
01565   int NumClasses;
01566   int i;
01567   int config;
01568   float best_rating;
01569   INT_FEATURE_ARRAY IntFeatures;
01570   CLASS_NORMALIZATION_ARRAY CharNormArray;
01571   CLASS_PRUNER_RESULTS ClassPrunerResults;
01572   INT_RESULT_STRUCT IntResult;
01573   CLASS_ID ClassId;
01574   CLASS_INDEX ClassIndex;
01575   ADAPT_CLASS Class;
01576 
01577   BaselineClassifierCalls++;
01578 
01579   NumFeatures = GetBaselineFeatures (Blob, LineStats,
01580     Templates->Templates,
01581     IntFeatures, CharNormArray,
01582     &(Results->BlobLength));
01583   if (NumFeatures <= 0)
01584     return NULL;
01585 
01586   IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
01587 
01588   NumClasses = ClassPruner (Templates->Templates, NumFeatures,
01589     IntFeatures, CharNormArray,
01590     BaselineCutoffs, ClassPrunerResults,
01591     MatchDebugFlags);
01592 
01593   NumBaselineClassesTried += NumClasses;
01594 
01595   if (MatcherDebugLevel >= 2 || display_ratings > 1)
01596     cprintf ("BL Matches =  ");
01597 
01598   best_rating = WORST_POSSIBLE_RATING;
01599   for (i = 0; i < NumClasses
01600     && ((newcp_ratings_on & 12) < 8
01601     || (newcp_ratings_on & 12) == 8
01602     && ClassPrunerResults[i].Rating < best_rating + BadMatchPad / 2
01603     && ClassPrunerResults[i].Rating < newcp_duff_rating
01604   && NumClasses > 1); i++) {
01605     ClassId = ClassPrunerResults[i].Class;
01606     ClassIndex = IndexForClassId (Templates->Templates, ClassId);
01607 
01608     SetBaseLineMatch();
01609     IntegerMatcher (ClassForClassId (Templates->Templates, ClassId),
01610       Templates->Class[ClassIndex]->PermProtos,
01611       Templates->Class[ClassIndex]->PermConfigs,
01612       IntOutlineLength, NumFeatures, IntFeatures, 0,
01613       CharNormArray[ClassIndex], &IntResult, MatchDebugFlags);
01614 
01615     if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01616       cprintf ("%c-%-2d %2.1f(%2.1f/%2.1f)  ", ClassId, IntResult.Config,
01617         IntResult.Rating * 100.0,
01618         ClassPrunerResults[i].Rating * 100.0,
01619         ClassPrunerResults[i].Rating2 * 100.0);
01620       if (i % 4 == 3)
01621         cprintf ("\n");
01622     }
01623 
01624     AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
01625   }
01626   while (i < NumClasses) {
01627     ClassId = ClassPrunerResults[i].Class;
01628     ClassIndex = IndexForClassId (Templates->Templates, ClassId);
01629     Class = Templates->Class[ClassIndex];
01630     config =
01631       NumIntConfigsIn (ClassForIndex (Templates->Templates, ClassIndex));
01632     for (config--; config >= 0 && !ConfigIsPermanent (Class, config);
01633       config--);
01634 
01635     if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01636       cprintf ("%c(%d) %2.1f(%2.1f)  ", ClassId, config,
01637         ClassPrunerResults[i].Rating * 200.0,
01638         ClassPrunerResults[i].Rating2 * 100.0);
01639       if (i % 4 == 3)
01640         cprintf ("\n");
01641     }
01642 
01643     AddNewResult (Results, ClassId, ClassPrunerResults[i].Rating * 2,
01644       config);
01645     i++;
01646   }
01647   if (MatcherDebugLevel >= 2 || display_ratings > 1)
01648     cprintf ("\n");
01649 
01650   ClassId = Results->BestClass;
01651   if (ClassId == NO_CLASS)
01652     return (NULL);
01653   /* this is a bug - maybe should return "" */
01654 
01655   ClassIndex = IndexForClassId (Templates->Templates, ClassId);
01656   return ((char *) (Templates->Class[ClassIndex]->
01657     Config[Results->BestConfig].Perm));
01658 
01659 }                                /* BaselineClassifier */

void CharNormClassifier ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

Extracts character normalized features from the unknown character and matches them against the specified set of templates.

Parameters:
Blob Blob to be classified
LineStats Statistics for text line Blob is in
Templates Templates to classify unknown against
Results Place to put match results
Note:
Globals:
  • CharNormCutoffs Expected num features for each class
  • AllProtosOn Mask that enables all protos
  • AllConfigsOn Mask that enables all configs
Returns:
none
The classes which match are added to Results.
Note:
Exceptions: none
Date:
Tue Mar 12 16:02:52 1991, DSJ, Created.

Definition at line 1802 of file adaptmatch.cpp.

References AddNewResult(), ADAPT_RESULTS::BlobLength, CharNormClassifierCalls, CharNormCutoffs, ClassForClassId, ClassPruner(), INT_RESULT_STRUCT::Config, config_mask_to_proto_mask(), cprintf(), display_ratings, feature_pruner(), GetCharNormFeatures(), GetPicoFeatureLength, IndexForClassId, IntegerMatcher(), MatcherDebugLevel, NumCharNormClassesTried, prune_configs(), PrunedProtos, INT_RESULT_STRUCT::Rating, SetCharNormMatch(), and WORST_POSSIBLE_RATING.

Referenced by AdaptToPunc(), DoAdaptiveMatch(), and GetAmbiguities().

01805                                                 {
01806   int IntOutlineLength;
01807   int NumFeatures;
01808   int NumClasses;
01809   int i;
01810   INT32 min_misses;
01811   float best_rating;
01812   INT_FEATURE_ARRAY IntFeatures;
01813   CLASS_NORMALIZATION_ARRAY CharNormArray;
01814   CLASS_PRUNER_RESULTS ClassPrunerResults;
01815   INT_RESULT_STRUCT IntResult;
01816   CLASS_ID ClassId;
01817   CLASS_INDEX ClassIndex;
01818 
01819   CharNormClassifierCalls++;
01820 
01821   NumFeatures = GetCharNormFeatures (Blob, LineStats,
01822     Templates,
01823     IntFeatures, CharNormArray,
01824     &(Results->BlobLength));
01825   if (NumFeatures <= 0)
01826     return;
01827 
01828   IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
01829 
01830   NumClasses = ClassPruner (Templates, NumFeatures,
01831     IntFeatures, CharNormArray,
01832     CharNormCutoffs, ClassPrunerResults,
01833     MatchDebugFlags);
01834 
01835   if (feature_prune_percentile > 0) {
01836     min_misses = feature_pruner (Templates, NumFeatures,
01837       IntFeatures, NumClasses,
01838       ClassPrunerResults);
01839     NumClasses =
01840       prune_configs(Templates,
01841                     min_misses,
01842                     NumFeatures,
01843                     IntFeatures,
01844                     CharNormArray,
01845                     NumClasses,
01846                     IntOutlineLength,
01847                     ClassPrunerResults,
01848                     MatchDebugFlags);
01849   }
01850   else
01851     min_misses = 0;
01852   if (tessedit_single_match && NumClasses > 1)
01853     NumClasses = 1;
01854   NumCharNormClassesTried += NumClasses;
01855 
01856   if (MatcherDebugLevel >= 2 || display_ratings > 1)
01857     cprintf ("CN Matches =  ");
01858 
01859   best_rating = WORST_POSSIBLE_RATING;
01860   for (i = 0; i < NumClasses
01861     && ((newcp_ratings_on & 3) < 2
01862     || (newcp_ratings_on & 3) == 2
01863     && ClassPrunerResults[i].Rating < best_rating + BadMatchPad / 2
01864     && ClassPrunerResults[i].Rating < newcp_duff_rating
01865   && NumClasses > 1); i++) {
01866     ClassId = ClassPrunerResults[i].Class;
01867     ClassIndex = IndexForClassId (Templates, ClassId);
01868 
01869     SetCharNormMatch();
01870 
01871     if (feature_prune_percentile > 0)
01872                                  //xiaofan
01873       config_mask_to_proto_mask (ClassForClassId (Templates, ClassId), (BIT_VECTOR) & ClassPrunerResults[i].config_mask,
01874         PrunedProtos);
01875                                  //xiaofan
01876     IntegerMatcher (ClassForClassId (Templates, ClassId), PrunedProtos, (BIT_VECTOR) & ClassPrunerResults[i].config_mask,
01877       IntOutlineLength, NumFeatures, IntFeatures, 0,
01878       CharNormArray[ClassIndex], &IntResult, MatchDebugFlags);
01879 
01880     if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01881       cprintf ("%c-%-2d %2.1f(%2.1f/%2.1f)  ", ClassId, IntResult.Config,
01882         IntResult.Rating * 100.0,
01883         ClassPrunerResults[i].Rating * 100.0,
01884         ClassPrunerResults[i].Rating2 * 100.0);
01885       if (i % 4 == 3)
01886         cprintf ("\n");
01887     }
01888 
01889     AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
01890     if (IntResult.Rating < best_rating)
01891       best_rating = IntResult.Rating;
01892   }
01893   while (i < NumClasses) {
01894     ClassId = ClassPrunerResults[i].Class;
01895     ClassIndex = IndexForClassId (Templates, ClassId);
01896 
01897     if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01898       cprintf ("%c %2.1f(%2.1f)  ", ClassId,
01899         ClassPrunerResults[i].Rating * 200.0,
01900         ClassPrunerResults[i].Rating2 * 100.0);
01901       if (i % 4 == 3)
01902         cprintf ("\n");
01903     }
01904 
01905     AddNewResult (Results, ClassId, ClassPrunerResults[i].Rating * 2, 0);
01906     i++;
01907   }
01908   if (MatcherDebugLevel >= 2 || display_ratings > 1)
01909     cprintf ("\n");
01910 
01911 }                                /* CharNormClassifier */

void ClassifyAsNoise ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_RESULTS Results 
)

Computes a rating which reflects the likelihood that the blob being classified is a noise blob.

Parameters:
Blob blob to be classified
LineStats statistics for text line Blob is in
Results results to add noise classification to
Note:
Globals:
  • NoiseBlobLength average length of a noise blob
Returns:
none
Note:
Assumes that the blob length has already been computed and placed into Results.

Exceptions: none

Date:
Tue Mar 12 18:36:52 1991, DSJ, Created.

Definition at line 1929 of file adaptmatch.cpp.

References AddNewResult(), ADAPT_RESULTS::BlobLength, and NO_CLASS.

Referenced by DoAdaptiveMatch().

01931                                              {
01932   register FLOAT32 Rating;
01933 
01934   Rating = Results->BlobLength / NoiseBlobLength;
01935   Rating *= Rating;
01936   Rating /= 1.0 + Rating;
01937 
01938   AddNewResult (Results, NO_CLASS, Rating, 0);
01939 
01940 }                                /* ClassifyAsNoise */

int CompareCurrentRatings ( const void *  arg1,
const void *  arg2 
)

Gets the ratings for the 2 specified classes from a global variable (CurrentRatings) and returns:.

Parameters:
arg1 class whose ratings are to be compared
arg2 class whose ratings are to be compared
Note:
Globals:
  • CurrentRatings contains actual ratings for each class
Returns:
Order of classes based on their ratings (see above).

Definition at line 1960 of file adaptmatch.cpp.

References CurrentRatings.

Referenced by AdaptiveClassifier(), and GetAmbiguities().

01962                                             {
01963   FLOAT32 Rating1, Rating2;
01964   CLASS_ID *Class1 = (CLASS_ID *) arg1;
01965   CLASS_ID *Class2 = (CLASS_ID *) arg2;
01966 
01967   Rating1 = CurrentRatings[*Class1];
01968   Rating2 = CurrentRatings[*Class2];
01969 
01970   if (Rating1 < Rating2)
01971     return (-1);
01972   else if (Rating1 > Rating2)
01973     return (1);
01974   else
01975     return (0);
01976 
01977 }                                /* CompareCurrentRatings */

LIST ConvertMatchesToChoices ( ADAPT_RESULTS Results  ) 

Creates a choice for each matching class in Results (up to MAX_MATCHES) and returns LIST of these choices.

Parameters:
Results Pointer to our ADAPT_RESULTS
Note:
Globals: none
Returns:
List of choices.
The match ratings are converted to be the ratings and certainties as used by the context checkers

FIX: This implement the RATING equation shown in Column 10, Row 0 to 5 of Patent 5,237,627 (top of pg 16), right?

Note:
Exceptions: none
Date:
Tue Mar 12 08:55:37 1991, DSJ, Created.

Definition at line 1998 of file adaptmatch.cpp.

References append_choice(), ADAPT_RESULTS::BlobLength, CertaintyScale, ADAPT_RESULTS::Classes, ADAPT_RESULTS::Configs, MAX_MATCHES, NIL, ADAPT_RESULTS::NumMatches, and ADAPT_RESULTS::Ratings.

Referenced by AdaptiveClassifier().

01998                                                      {
01999   char ChoiceString[2];
02000   int i;
02001   LIST Choices;
02002   CLASS_ID NextMatch;
02003   FLOAT32 Rating;
02004   FLOAT32 Certainty;
02005 
02006   ChoiceString[1] = '\0';
02007   if (Results->NumMatches > MAX_MATCHES)
02008     Results->NumMatches = MAX_MATCHES;
02009 
02010   for (Choices = NIL, i = 0; i < Results->NumMatches; i++) {
02011     NextMatch = Results->Classes[i];
02012     ChoiceString[0] = NextMatch;
02013     Rating = Certainty = Results->Ratings[NextMatch];
02014     Rating *= RatingScale * Results->BlobLength;
02015     Certainty *= -CertaintyScale;
02016     Choices = append_choice (Choices, ChoiceString, Rating, Certainty,
02017       Results->Configs[NextMatch]);
02018   }
02019   return (Choices);
02020 
02021 }                                /* ConvertMatchesToChoices */

void DebugAdaptiveClassifier ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_RESULTS Results 
)

Debug Adaptive Classifier using GUI.

Parameters:
Blob Blob whose classification is being debugged
LineStats Statistics for text line blob is in
Results Results of match being debugged
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Wed Mar 13 16:44:41 1991, DSJ, Created.

Definition at line 2037 of file adaptmatch.cpp.

References ADAPT_RESULTS::BestClass, cprintf(), FALSE, GetClassToDebug(), ShowBestMatchFor(), ShowMatchDisplay(), TRUE, and UpdateMatchDisplay().

Referenced by AdaptiveClassifier().

02039                                                      {
02040   const char *Prompt =
02041     "\nType class id (or CTRL-A,B,C) in IntegerMatch Window ...";
02042   const char *DebugMode = "All Templates";
02043   CLASS_ID LastClass = Results->BestClass;
02044   CLASS_ID ClassId;
02045   BOOL8 AdaptiveOn = TRUE;
02046   BOOL8 PreTrainedOn = TRUE;
02047 
02048   ShowMatchDisplay();
02049   cprintf ("\nDebugging class = %c  (%s) ...\n", LastClass, DebugMode);
02050   ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
02051   UpdateMatchDisplay();
02052 
02053   while ((ClassId = GetClassToDebug (Prompt)) != 0) {
02054     switch (ClassId) {
02055       case 'b':
02056         AdaptiveOn = TRUE;
02057         PreTrainedOn = FALSE;
02058         DebugMode = "Adaptive Templates Only";
02059         break;
02060 
02061       case 'c':
02062         AdaptiveOn = FALSE;
02063         PreTrainedOn = TRUE;
02064         DebugMode = "PreTrained Templates Only";
02065         break;
02066 
02067       case 'a':
02068         AdaptiveOn = TRUE;
02069         PreTrainedOn = TRUE;
02070         DebugMode = "All Templates";
02071         break;
02072 
02073       default:
02074         LastClass = ClassId;
02075         break;
02076     }
02077 
02078     ShowMatchDisplay();
02079     cprintf ("\nDebugging class = %c  (%s) ...\n", LastClass, DebugMode);
02080     ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
02081     UpdateMatchDisplay();
02082   }
02083 }                                /* DebugAdaptiveClassifier */

void DoAdaptiveMatch ( TBLOB Blob,
LINE_STATS LineStats,
ADAPT_RESULTS Results 
)

Performs an adaptive classification.

Parameters:
Blob blob to be classified
LineStats statistics for text line Blob is in
Results place to put match results
Note:
Globals:
  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • GreatAdaptiveMatch rating limit for a great match
Returns:
none
If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.
Note:
Exceptions: none
Date:
Tue Mar 12 08:50:11 1991, DSJ, Created.

Definition at line 2110 of file adaptmatch.cpp.

References AdaptedTemplates, AdaptiveMatcherCalls, AmbigClassifier(), BaselineClassifier(), ADAPT_RESULTS::BestRating, CharNormClassifier(), ClassifyAsNoise(), InitIntFX, MarginalMatch, ADAPT_RESULTS::NumMatches, ADAPT_TEMPLATES_STRUCT::NumPermClasses, PreTrainedTemplates, tess_bn_matching, and tess_cn_matching.

Referenced by AdaptiveClassifier().

02112                                              {
02113   char *Ambiguities;
02114 
02115   AdaptiveMatcherCalls++;
02116   InitIntFX();
02117 
02118   if (AdaptedTemplates->NumPermClasses < MinNumPermClasses
02119   || tess_cn_matching) {
02120     CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
02121   }
02122   else {
02123     Ambiguities = BaselineClassifier (Blob, LineStats,
02124       AdaptedTemplates, Results);
02125 
02126     if (Results->NumMatches > 0 && MarginalMatch (Results->BestRating)
02127     && !tess_bn_matching) {
02128       CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
02129     }
02130     else if (Ambiguities && *Ambiguities) {
02131       AmbigClassifier(Blob,
02132                       LineStats,
02133                       PreTrainedTemplates,
02134                       Ambiguities,
02135                       Results);
02136     }
02137   }
02138 
02139   if (Results->NumMatches == 0)
02140     ClassifyAsNoise(Blob, LineStats, Results);
02141   }   /* DoAdaptiveMatch */

void EndAdaptiveClassifier (  ) 

Performs cleanup operations on the adaptive classifier.

Parameters:
none 
Note:
Globals:
  • AdaptedTemplates current set of adapted templates
  • SaveAdaptedTemplates TRUE if templates should be saved
  • EnableAdaptiveMatcher TRUE if adaptive matcher is enabled
Returns:
none
It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Finally, free all allocated memory (new for v1.03)

Note:
Exceptions: none
Date:
Tue Mar 19 14:37:06 1991, DSJ, Created.

Definition at line 790 of file adaptmatch.cpp.

References ADAPT_TEMPLATE_SUFFIX, AdaptedTemplates, AllConfigsOff, AllConfigsOn, AllProtosOff, AllProtosOn, cprintf(), EndDangerousAmbigs(), free_int_templates(), FreeBitVector(), FreeNormProtos(), imagefile, NULL, PreTrainedTemplates, PrunedProtos, TempProtoMask, and WriteAdaptedTemplates().

Referenced by dj_cleanup().

00790                              {
00791   char Filename[256];
00792   FILE *File;
00793 
00794   #ifndef SECURE_NAMES
00795   if (EnableAdaptiveMatcher && SaveAdaptedTemplates) {
00796     strcpy(Filename, imagefile);
00797     strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
00798     File = fopen (Filename, "wb");
00799     if (File == NULL)
00800       cprintf ("Unable to save adapted templates to %s!\n", Filename);
00801     else {
00802       cprintf ("\nSaving adapted templates to %s ...", Filename);
00803       fflush(stdout);
00804       WriteAdaptedTemplates(File, AdaptedTemplates);
00805       cprintf ("\n");
00806       fclose(File);
00807     }
00808   }
00809   #endif
00810 
00811   // the following are new for v1.03
00812   EndDangerousAmbigs();
00813   FreeNormProtos();
00814   free_int_templates(PreTrainedTemplates);
00815   PreTrainedTemplates = NULL;
00816   FreeBitVector(AllProtosOn);
00817   FreeBitVector(PrunedProtos);
00818   FreeBitVector(AllConfigsOn);
00819   FreeBitVector(AllProtosOff);
00820   FreeBitVector(AllConfigsOff);
00821   FreeBitVector(TempProtoMask);
00822   AllProtosOn = NULL;
00823   PrunedProtos = NULL;
00824   AllConfigsOn = NULL;
00825   AllProtosOff = NULL;
00826   AllConfigsOff = NULL;
00827   TempProtoMask = NULL;
00828 }// EndAdaptiveClassifier

int GetAdaptiveFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

Sets up the feature extractor to extract baseline normalized pico-features.

Parameters:
Blob blob to extract features from
LineStats statistics about text row blob is in
IntFeatures array to fill with integer features
FloatFeatures place to return actual floating-pt features
Note:
Globals: none
Returns:
Number of pico-features returned (0 if an error occurred)
The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating point features are returned in FloatFeatures.
Note:
Exceptions: none
Date:
Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 1185 of file adaptmatch.cpp.

References baseline, ComputeIntFeatures(), ExtractPicoFeatures(), FreeFeatureSet(), NormMethod, NumFeaturesIn, and UNLIKELY_NUM_FEAT.

Referenced by AdaptToChar(), and compare_tess_blobs().

01188                                                     {
01189   FEATURE_SET Features;
01190   int NumFeatures;
01191 
01192   NormMethod = baseline;
01193   Features = ExtractPicoFeatures (Blob, LineStats);
01194 
01195   NumFeatures = NumFeaturesIn (Features);
01196   if (NumFeatures > UNLIKELY_NUM_FEAT) {
01197     FreeFeatureSet(Features);
01198     return (0);
01199   }
01200 
01201   ComputeIntFeatures(Features, IntFeatures);
01202   *FloatFeatures = Features;
01203 
01204   return (NumFeatures);
01205 
01206 }                                /* GetAdaptiveFeatures */

void GetAdaptThresholds ( TWERD Word,
LINE_STATS LineStats,
const char *  BestChoice,
const char *  BestRawChoice,
FLOAT32  Thresholds[] 
)

Tries to estimate how tight the adaptation threshold should be set for each character in the current word.

Parameters:
Word current word
LineStats line stats for row word is in
BestChoice best choice for current word with context
BestRawChoice best choice for current word without context
Thresholds array of thresholds to be filled in
Note:
Globals:
  • EnableNewAdaptRules
  • GoodAdaptiveMatch
  • PerfectRating
  • RatingMargin
Returns:
none (results are returned in Thresholds)
In general, the routine tries to set tighter thresholds for a character when the current set of templates would have made an error on that character. It tries to set a threshold tight enough to eliminate the error. Two different sets of rules can be used to determine the desired thresholds.

If the blob gets incorrectly classified, find the rating threshold needed to create a template which will correct the error with some margin. However, don't waste time trying to make templates which are too tight.

Note:
Exceptions: none
Date:
Fri May 31 09:22:08 1991, DSJ, Created.

Definition at line 2175 of file adaptmatch.cpp.

References wordstruct::blobs, FindClassifierErrors(), GetBestRatingFor(), GoodAdaptiveMatch, blobstruct::next, and NULL.

Referenced by AdaptToWord().

02178                                                    {
02179     TBLOB *Blob;
02180 
02181     if (EnableNewAdaptRules) {   /* new rules */
02182       FindClassifierErrors(PerfectRating,
02183                            GoodAdaptiveMatch,
02184                            RatingMargin,
02185                            Thresholds);
02186     }
02187     else {                       /* old rules */
02188       for (Blob = Word->blobs;
02189         Blob != NULL;
02190         Blob = Blob->next, BestChoice++, BestRawChoice++, Thresholds++)
02191       if (*BestChoice == *BestRawChoice)
02192         *Thresholds = GoodAdaptiveMatch;
02193       else {
02194         /* The blob was incorrectly classified */
02195         *Thresholds = GetBestRatingFor (Blob, LineStats, *BestChoice);
02196         *Thresholds *= (1.0 - RatingMargin);
02197         if (*Thresholds > GoodAdaptiveMatch)
02198           *Thresholds = GoodAdaptiveMatch;
02199         if (*Thresholds < PerfectRating)
02200           *Thresholds = PerfectRating;
02201       }
02202     }
02203   }                              /* GetAdaptThresholds */

char * GetAmbiguities ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  CorrectClass 
)

Matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters:
Blob blob to get classification ambiguities for
LineStats statistics for text line blob is in
CorrectClass correct class for Blob
Note:
Globals:
  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns:
String containing all possible ambiguous classes.
Note:
Exceptions: none
Date:
Fri Mar 15 08:08:22 1991, DSJ, Created.

Definition at line 2220 of file adaptmatch.cpp.

References ADAPT_RESULTS::BestClass, ADAPT_RESULTS::BestConfig, ADAPT_RESULTS::BestRating, CharNormClassifier(), ADAPT_RESULTS::Classes, CompareCurrentRatings(), CurrentRatings, Emalloc(), EnterClassifyMode, InitMatcherRatings(), NO_CLASS, ADAPT_RESULTS::NumMatches, PreTrainedTemplates, ADAPT_RESULTS::Ratings, RemoveBadMatches(), and WORST_POSSIBLE_RATING.

Referenced by MakePermanent().

02222                                               {
02223     ADAPT_RESULTS Results;
02224     char *Ambiguities;
02225     int i;
02226 
02227     EnterClassifyMode;
02228 
02229     Results.NumMatches = 0;
02230     Results.BestRating = WORST_POSSIBLE_RATING;
02231     Results.BestClass = NO_CLASS;
02232     Results.BestConfig = 0;
02233     InitMatcherRatings (Results.Ratings);
02234 
02235     CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
02236     RemoveBadMatches(&Results);
02237 
02238     /* save ratings in a global so that CompareCurrentRatings() can see them */
02239     CurrentRatings = Results.Ratings;
02240     qsort ((void *) (Results.Classes), Results.NumMatches,
02241       sizeof (CLASS_ID), CompareCurrentRatings);
02242 
02243     /* copy the class id's into an string of ambiguities - don't copy if
02244        the correct class is the only class id matched */
02245     Ambiguities = (char *) Emalloc (sizeof (char) * (Results.NumMatches + 1));
02246     if (Results.NumMatches > 1 ||
02247     Results.NumMatches == 1 && Results.Classes[0] != CorrectClass) {
02248       for (i = 0; i < Results.NumMatches; i++)
02249         Ambiguities[i] = Results.Classes[i];
02250       Ambiguities[i] = '\0';
02251     }
02252     else
02253       Ambiguities[0] = '\0';
02254 
02255     return (Ambiguities);
02256 
02257   }                              /* GetAmbiguities */

int GetBaselineFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
CLASS_NORMALIZATION_ARRAY  CharNormArray,
FLOAT32 BlobLength 
)

Sets up the feature extractor to extract baseline normalized pico-features.

Parameters:
Blob Blob to extract features from
LineStats Statistics about text row blob is in
Templates Used to compute char norm adjustments
IntFeatures Array to fill with integer features
CharNormArray Array to fill with dummy char norm adjustments
BlobLength Length of blob in baseline-normalized units
Note:
Globals: none
Returns:
Number of pico-features returned (0 if an error occurred)
The extracted pico-features are converted to integer form and placed in IntFeatures. CharNormArray is filled with 0's to indicate to the matcher that no character normalization adjustment needs to be done.

The total length of all blob outlines in baseline normalized units is also returned.

Note:
Exceptions: none
Date:
Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 2281 of file adaptmatch.cpp.

References baseline, ClearCharNormArray(), ComputeIntFeatures(), ExtractPicoFeatures(), FreeFeatureSet(), GetIntBaselineFeatures(), GetPicoFeatureLength, NormMethod, NumFeaturesIn, and UNLIKELY_NUM_FEAT.

Referenced by BaselineClassifier(), GetBestRatingFor(), and ShowBestMatchFor().

02286                                                {
02287     FEATURE_SET Features;
02288     int NumFeatures;
02289 
02290     if (EnableIntFX)
02291       return (GetIntBaselineFeatures (Blob, LineStats, Templates,
02292         IntFeatures, CharNormArray, BlobLength));
02293 
02294     NormMethod = baseline;
02295     Features = ExtractPicoFeatures (Blob, LineStats);
02296 
02297     NumFeatures = NumFeaturesIn (Features);
02298     *BlobLength = NumFeatures * GetPicoFeatureLength ();
02299     if (NumFeatures > UNLIKELY_NUM_FEAT) {
02300       FreeFeatureSet(Features);
02301       return (0);
02302     }
02303 
02304     ComputeIntFeatures(Features, IntFeatures);
02305     ClearCharNormArray(Templates, CharNormArray);
02306 
02307     FreeFeatureSet(Features);
02308     return (NumFeatures);
02309 
02310   }                              /* GetBaselineFeatures */

FLOAT32 GetBestRatingFor ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId 
)

Classifies Blob against both sets of templates for the specified class and returns the best rating found.

Parameters:
Blob blob to get best rating for
LineStats statistics about text line blob is in
ClassId class blob is to be compared to
Note:
Globals:
  • PreTrainedTemplates built-in templates
  • AdaptedTemplates current set of adapted templates
  • AllProtosOn dummy mask to enable all protos
  • AllConfigsOn dummy mask to enable all configs
Returns:
Best rating for match of Blob to ClassId.
Note:
Exceptions: none
Date:
Tue Apr 9 09:01:24 1991, DSJ, Created.

Definition at line 2329 of file adaptmatch.cpp.

References AdaptedTemplates, AllConfigsOn, AllProtosOn, ADAPT_TEMPLATES_STRUCT::Class, ClassForClassId, GetBaselineFeatures(), GetCharNormFeatures(), GetPicoFeatureLength, IndexForClassId, IntegerMatcher(), LegalClassId, MIN, NO_DEBUG, ADAPT_CLASS_STRUCT::PermConfigs, ADAPT_CLASS_STRUCT::PermProtos, PreTrainedTemplates, INT_RESULT_STRUCT::Rating, SetBaseLineMatch(), SetCharNormMatch(), ADAPT_TEMPLATES_STRUCT::Templates, and UnusedClassIdIn.

Referenced by GetAdaptThresholds().

02331                                              {
02332     int CNOutlineLength, BLOutlineLength;
02333     int NumCNFeatures, NumBLFeatures;
02334     INT_FEATURE_ARRAY CNFeatures, BLFeatures;
02335     INT_RESULT_STRUCT CNResult, BLResult;
02336     CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
02337     CLASS_INDEX ClassIndex;
02338     FLOAT32 BlobLength;
02339 
02340     CNResult.Rating = BLResult.Rating = 1.0;
02341 
02342     if (!LegalClassId (ClassId))
02343       return (1.0);
02344 
02345     if (!UnusedClassIdIn (PreTrainedTemplates, ClassId)) {
02346       NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
02347         PreTrainedTemplates,
02348         CNFeatures, CNAdjust, &BlobLength);
02349       if (NumCNFeatures > 0) {
02350         CNOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
02351         ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
02352 
02353         SetCharNormMatch();
02354         IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
02355           AllProtosOn, AllConfigsOn,
02356           CNOutlineLength, NumCNFeatures, CNFeatures, 0,
02357           CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
02358       }
02359     }
02360 
02361     if (!UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
02362       NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
02363         AdaptedTemplates->Templates,
02364         BLFeatures, BLAdjust, &BlobLength);
02365       if (NumBLFeatures > 0) {
02366         BLOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
02367         ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
02368 
02369         SetBaseLineMatch();
02370         IntegerMatcher (ClassForClassId
02371           (AdaptedTemplates->Templates, ClassId),
02372           AdaptedTemplates->Class[ClassIndex]->PermProtos,
02373           AdaptedTemplates->Class[ClassIndex]->PermConfigs,
02374           BLOutlineLength, NumBLFeatures, BLFeatures, 0,
02375           BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
02376       }
02377     }
02378 
02379     return (MIN (BLResult.Rating, CNResult.Rating));
02380 
02381   }                              /* GetBestRatingFor */

int GetCharNormFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
CLASS_NORMALIZATION_ARRAY  CharNormArray,
FLOAT32 BlobLength 
)

Sets up the feature extractor to extract character normalization features and character normalized pico-features.

Parameters:
Blob blob to extract features from
LineStats statistics about text row blob is in
Templates used to compute char norm adjustments
IntFeatures array to fill with integer features
CharNormArray array to fill with char norm adjustments
BlobLength length of blob in baseline-normalized units
Note:
Globals: none
Returns:
Number of pico-features returned (0 if an error occurred)
The extracted pico-features are converted to integer form and placed in IntFeatures. The character normalization features are matched to each class in templates and the resulting adjustment factors are returned in CharNormArray. The total length of all blob outlines in baseline normalized units is also returned.
Note:
Exceptions: none
Date:
Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 2407 of file adaptmatch.cpp.

References GetIntCharNormFeatures().

Referenced by AmbigClassifier(), CharNormClassifier(), GetBestRatingFor(), and ShowBestMatchFor().

02412                                                {
02413     return (GetIntCharNormFeatures (Blob, LineStats, Templates,
02414       IntFeatures, CharNormArray, BlobLength));
02415   }                              /* GetCharNormFeatures */

int GetIntBaselineFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
CLASS_NORMALIZATION_ARRAY  CharNormArray,
FLOAT32 BlobLength 
)

Calls the integer feature-extractor if it has not been called before for this blob.

Parameters:
Blob blob to extract features from
LineStats statistics about text row blob is in
Templates used to compute char norm adjustments
IntFeatures array to fill with integer features
CharNormArray array to fill with dummy char norm adjustments
BlobLength length of blob in baseline-normalized units
Note:
Globals:
  • FeaturesHaveBeenExtracted TRUE if fx has been done
  • BaselineFeatures holds extracted baseline feat
  • CharNormFeatures holds extracted char norm feat
  • FXInfo holds misc FX info
Returns:
Number of features extracted or 0 if an error occured.
The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the baseline features into the IntFeatures array provided by the caller.

Note:
Exceptions: none
Date:
Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 2444 of file adaptmatch.cpp.

References BaselineFeatures, CharNormFeatures, ClearCharNormArray(), ComputeScaleFactor, ExtractIntFeat(), FeaturesHaveBeenExtracted, FeaturesOK, FXInfo, INT_FX_RESULT_STRUCT::Length, INT_FX_RESULT_STRUCT::NumBL, and TRUE.

Referenced by GetBaselineFeatures().

02449                                                   {
02450     register INT_FEATURE Src, Dest, End;
02451 
02452     if (!FeaturesHaveBeenExtracted) {
02453       FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
02454         CharNormFeatures, &FXInfo);
02455       FeaturesHaveBeenExtracted = TRUE;
02456     }
02457 
02458     if (!FeaturesOK) {
02459       *BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
02460       return (0);
02461     }
02462 
02463     for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
02464       Src < End; *Dest++ = *Src++);
02465 
02466     ClearCharNormArray(Templates, CharNormArray);
02467     *BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
02468     return (FXInfo.NumBL);
02469 
02470   }                              /* GetIntBaselineFeatures */

int GetIntCharNormFeatures ( TBLOB Blob,
LINE_STATS LineStats,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
CLASS_NORMALIZATION_ARRAY  CharNormArray,
FLOAT32 BlobLength 
)

Calls the integer feature-extractor if it has not been called before for this blob.

Parameters:
Blob blob to extract features from
LineStats statistics about text row blob is in
Templates used to compute char norm adjustments
IntFeatures array to fill with integer features
CharNormArray array to fill with dummy char norm adjustments
BlobLength length of blob in baseline-normalized units
Note:
Globals:
  • FeaturesHaveBeenExtracted TRUE if fx has been done
  • BaselineFeatures holds extracted baseline feat
  • CharNormFeatures holds extracted char norm feat
  • FXInfo holds misc. FX info
Returns:
Number of features extracted or 0 if an error occured.
The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Note:
Exceptions: none
Date:
Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 2499 of file adaptmatch.cpp.

References BaselineAt, BaselineFeatures, CharNormDesc, CharNormFeatures, CharNormLength, CharNormRx, CharNormRy, CharNormY, ComputeIntCharNormArray(), ComputeScaleFactor, ExtractIntFeat(), FeaturesHaveBeenExtracted, FeaturesOK, FreeFeature(), FXInfo, INT_FX_RESULT_STRUCT::Length, LENGTH_COMPRESSION, NewFeature(), INT_FX_RESULT_STRUCT::NumCN, ParamOf, INT_FX_RESULT_STRUCT::Rx, INT_FX_RESULT_STRUCT::Ry, TRUE, INT_FX_RESULT_STRUCT::Xmean, and INT_FX_RESULT_STRUCT::Ymean.

Referenced by GetCharNormFeatures().

02504                                                   {
02505     register INT_FEATURE Src, Dest, End;
02506     FEATURE NormFeature;
02507     FLOAT32 Baseline, Scale;
02508 
02509     if (!FeaturesHaveBeenExtracted) {
02510       FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
02511         CharNormFeatures, &FXInfo);
02512       FeaturesHaveBeenExtracted = TRUE;
02513     }
02514 
02515     if (!FeaturesOK) {
02516       *BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
02517       return (0);
02518     }
02519 
02520     for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
02521       Src < End; *Dest++ = *Src++);
02522 
02523     NormFeature = NewFeature (&CharNormDesc);
02524     Baseline = BaselineAt (LineStats, FXInfo.Xmean);
02525     Scale = ComputeScaleFactor (LineStats);
02526     ParamOf (NormFeature, CharNormY) = (FXInfo.Ymean - Baseline) * Scale;
02527     ParamOf (NormFeature, CharNormLength) =
02528       FXInfo.Length * Scale / LENGTH_COMPRESSION;
02529     ParamOf (NormFeature, CharNormRx) = FXInfo.Rx * Scale;
02530     ParamOf (NormFeature, CharNormRy) = FXInfo.Ry * Scale;
02531     ComputeIntCharNormArray(NormFeature, Templates, CharNormArray);
02532     FreeFeature(NormFeature);
02533 
02534     *BlobLength = FXInfo.Length * Scale;
02535     return (FXInfo.NumCN);
02536 
02537   }                              /* GetIntCharNormFeatures */

void InitAdaptiveClassifier (  ) 

Reads in the training information needed by the adaptive classifier and saves it into global variables.

Parameters:
none 
Note:
Globals:
  • BuiltInTemplatesFile file to get built-in temps from
  • BuiltInCutoffsFile file to get avg. feat per class from
  • PreTrainedTemplates pre-trained configs and protos
  • AdaptedTemplates templates adapted to current page
  • CharNormCutoffs avg # of features per class
  • AllProtosOn dummy proto mask with all bits 1
  • AllConfigsOn dummy config mask with all bits 1
  • UsePreAdaptedTemplates enables use of pre-adapted templates
Returns:
none
Note:
Exceptions: none
Date:
Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 850 of file adaptmatch.cpp.

References ADAPT_TEMPLATE_SUFFIX, AdaptedTemplates, AllConfigsOff, AllConfigsOn, AllProtosOff, AllProtosOn, BaselineCutoffs, BuiltInCutoffsFile, BuiltInTemplatesFile, CharNormCutoffs, ClassIdForIndex, cprintf(), demodir, Efopen(), GetNormProtos(), imagefile, IndexForClassId, InitIntegerFX(), InitIntegerMatcher(), MAX_NUM_CONFIGS, MAX_NUM_PROTOS, NewAdaptedTemplates(), NewBitVector(), NULL, NumClassesIn, old_enable_learning, PreTrainedTemplates, PrintAdaptedTemplates(), PrunedProtos, ReadAdaptedTemplates(), ReadIntTemplates(), ReadNewCutoffs(), set_all_bits, ADAPT_TEMPLATES_STRUCT::Templates, TempProtoMask, TRUE, WordsInVectorOfSize, and zero_all_bits.

Referenced by mfeature_init().

00850                               {
00851   int i;
00852   FILE *File;
00853   char Filename[1024];
00854 
00855   if (!EnableAdaptiveMatcher)
00856     return;
00857 
00858   strcpy(Filename, demodir);
00859   strcat(Filename, BuiltInTemplatesFile);
00860   #ifndef SECURE_NAMES
00861   //      cprintf( "\nReading built-in templates from %s ...",
00862   //              Filename);
00863   fflush(stdout);
00864   #endif
00865 
00866   #ifdef __UNIX__
00867   File = Efopen (Filename, "r");
00868   #else
00869   File = Efopen (Filename, "rb");
00870   #endif
00871   PreTrainedTemplates = ReadIntTemplates (File, TRUE);
00872   fclose(File);
00873 
00874   strcpy(Filename, demodir);
00875   strcat(Filename, BuiltInCutoffsFile);
00876   #ifndef SECURE_NAMES
00877   //      cprintf( "\nReading built-in pico-feature cutoffs from %s ...",
00878   //              Filename);
00879   fflush(stdout);
00880   #endif
00881   ReadNewCutoffs (Filename, PreTrainedTemplates->IndexFor, CharNormCutoffs);
00882 
00883   GetNormProtos();
00884 
00885   InitIntegerMatcher();
00886   InitIntegerFX();
00887 
00888   AllProtosOn = NewBitVector (MAX_NUM_PROTOS);
00889   PrunedProtos = NewBitVector (MAX_NUM_PROTOS);
00890   AllConfigsOn = NewBitVector (MAX_NUM_CONFIGS);
00891   AllProtosOff = NewBitVector (MAX_NUM_PROTOS);
00892   AllConfigsOff = NewBitVector (MAX_NUM_CONFIGS);
00893   TempProtoMask = NewBitVector (MAX_NUM_PROTOS);
00894   set_all_bits (AllProtosOn, WordsInVectorOfSize (MAX_NUM_PROTOS));
00895   set_all_bits (PrunedProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
00896   set_all_bits (AllConfigsOn, WordsInVectorOfSize (MAX_NUM_CONFIGS));
00897   zero_all_bits (AllProtosOff, WordsInVectorOfSize (MAX_NUM_PROTOS));
00898   zero_all_bits (AllConfigsOff, WordsInVectorOfSize (MAX_NUM_CONFIGS));
00899 
00900   if (UsePreAdaptedTemplates) {
00901     strcpy(Filename, imagefile);
00902     strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
00903     File = fopen (Filename, "rb");
00904     if (File == NULL)
00905       AdaptedTemplates = NewAdaptedTemplates ();
00906     else {
00907       #ifndef SECURE_NAMES
00908       cprintf ("\nReading pre-adapted templates from %s ...", Filename);
00909       fflush(stdout);
00910       #endif
00911       AdaptedTemplates = ReadAdaptedTemplates (File);
00912       cprintf ("\n");
00913       fclose(File);
00914       PrintAdaptedTemplates(stdout, AdaptedTemplates);
00915 
00916       for (i = 0; i < NumClassesIn (AdaptedTemplates->Templates); i++) {
00917         BaselineCutoffs[i] =
00918           CharNormCutoffs[IndexForClassId (PreTrainedTemplates,
00919           ClassIdForIndex
00920           (AdaptedTemplates->Templates,
00921           i))];
00922       }
00923     }
00924   }
00925   else
00926     AdaptedTemplates = NewAdaptedTemplates ();
00927   old_enable_learning = EnableLearning;
00928 
00929 }                                /* InitAdaptiveClassifier */

void InitAdaptiveClassifierVars (  ) 

Installs the control knobs used by the adaptive matcher.

Parameters:
none 
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 950 of file adaptmatch.cpp.

References BUILT_IN_CUTOFFS_FILE, BUILT_IN_TEMPLATES_FILE, BuiltInCutoffsFile, BuiltInTemplatesFile, dummy, InitOutlineFXVars(), InitPicoFXVars(), and string_variable.

Referenced by init_dj_debug().

00950                                   {
00951   VALUE dummy;
00952 
00953   string_variable (BuiltInTemplatesFile, "BuiltInTemplatesFile",
00954     BUILT_IN_TEMPLATES_FILE);
00955   string_variable (BuiltInCutoffsFile, "BuiltInCutoffsFile",
00956     BUILT_IN_CUTOFFS_FILE);
00957 
00958   MakeEnableAdaptiveMatcher();
00959   MakeUsePreAdaptedTemplates();
00960   MakeSaveAdaptedTemplates();
00961 
00962   MakeEnableLearning();
00963   MakeEnableAdaptiveDebugger();
00964   MakeBadMatchPad();
00965   MakeGoodAdaptiveMatch();
00966   MakeGreatAdaptiveMatch();
00967   MakeNoiseBlobLength();
00968   MakeMinNumPermClasses();
00969   MakeReliableConfigThreshold();
00970   MakeMaxAngleDelta();
00971   MakeLearningDebugLevel();
00972   MakeMatcherDebugLevel();
00973   MakeMatchDebugFlags();
00974   MakeRatingMargin();
00975   MakePerfectRating();
00976   MakeEnableIntFX();
00977   MakeEnableNewAdaptRules();
00978   MakeRatingScale();
00979   MakeCertaintyScale();
00980 
00981   InitPicoFXVars();
00982   InitOutlineFXVars();  //?
00983 
00984 }                                /* InitAdaptiveClassifierVars */

void InitMatcherRatings ( register FLOAT32 Rating  ) 

Initializes the best rating for each class to be the worst possible rating (1.0).

Parameters:
Rating ptr to array of ratings to be initialized
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Tue Mar 12 13:43:28 1991, DSJ, Created.

Definition at line 2550 of file adaptmatch.cpp.

References MAX_CLASS_ID, and WORST_POSSIBLE_RATING.

Referenced by AdaptiveClassifier(), AdaptToPunc(), and GetAmbiguities().

02550                                                     {
02551     register FLOAT32 *LastRating;
02552     register FLOAT32 WorstRating = WORST_POSSIBLE_RATING;
02553 
02554     for (LastRating = Rating + MAX_CLASS_ID;
02555       Rating <= LastRating; *Rating++ = WorstRating);
02556 
02557   }                              /* InitMatcherRatings */

void make_config_pruner ( INT_TEMPLATES  templates,
CONFIG_PRUNER config_pruner 
)

Prune the possible classes for speed.

Parameters:
templates Predefined templates, from tessdata file
config_pruner Changed by function
Returns:
none
This is a pre-classifier that is used to create a short-list of classification candidates (pruning the possible classes) so that the full distance metric can be calculated on the short-list without taking excessive time, instead of exhaustively matching against each character possibility.

The class pruner uses a faster, but approximate method of matching the features, so while it does make mistakes, the mistakes are rare.

Definition at line 1679 of file adaptmatch.cpp.

References ClassForIndex, NUM_PP_BUCKETS, NumClassesIn, NumProtoSetsIn, PROTOS_PER_PROTO_SET, and ProtoSetIn.

01680                                                       {
01681   int classid;
01682   int x;                         //feature coord
01683   int word_index;                //in faster version
01684   int bit_index;
01685   UINT32 XFeatureAddress;
01686   UINT32 YFeatureAddress;
01687   UINT32 ThetaFeatureAddress;
01688   INT_CLASS ClassTemplate;
01689   int ProtoSetIndex;
01690   PROTO_SET ProtoSet;
01691   UINT32 *ProtoPrunerPtr;
01692   UINT32 ProtoNum;
01693   INT32 proto_offset;
01694   UINT32 ConfigWord;
01695   UINT32 ProtoWord;
01696   INT_PROTO Proto;
01697   UINT32 x_config_mask;          //forming mask
01698   UINT32 y_config_mask;          //forming mask
01699   UINT32 th_config_mask;         //forming mask
01700 
01701   for (classid = 0; classid < NumClassesIn (templates); classid++) {
01702     ClassTemplate = ClassForIndex (templates, classid);
01703     for (x = 0; x < NUM_PP_BUCKETS; x++) {
01704       XFeatureAddress = (x << 1);
01705       YFeatureAddress = (NUM_PP_BUCKETS << 1) + (x << 1);
01706       ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + (x << 1);
01707       x_config_mask = 0;
01708       y_config_mask = 0;
01709       th_config_mask = 0;
01710       for (ProtoSetIndex = 0;
01711         ProtoSetIndex < NumProtoSetsIn (ClassTemplate);
01712       ProtoSetIndex++) {
01713         ProtoSet = ProtoSetIn (ClassTemplate, ProtoSetIndex);
01714         ProtoPrunerPtr = (UINT32 *) ((*ProtoSet).ProtoPruner);
01715         for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
01716         ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ProtoPrunerPtr++) {
01717           /* Prune Protos of current Proto Set */
01718           ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
01719           for (proto_offset = 0; ProtoWord != 0;
01720           proto_offset++, ProtoWord >>= 1) {
01721             if (ProtoWord & 1) {
01722               Proto =
01723                 &(ProtoSet->Protos[ProtoNum + proto_offset]);
01724               ConfigWord = Proto->Configs[0];
01725               x_config_mask |= ConfigWord;
01726             }
01727           }
01728 
01729           ProtoWord = *(ProtoPrunerPtr + YFeatureAddress);
01730           for (proto_offset = 0; ProtoWord != 0;
01731           proto_offset++, ProtoWord >>= 1) {
01732             if (ProtoWord & 1) {
01733               Proto =
01734                 &(ProtoSet->Protos[ProtoNum + proto_offset]);
01735               ConfigWord = Proto->Configs[0];
01736               y_config_mask |= ConfigWord;
01737             }
01738           }
01739 
01740           ProtoWord = *(ProtoPrunerPtr + ThetaFeatureAddress);
01741           for (proto_offset = 0; ProtoWord != 0;
01742           proto_offset++, ProtoWord >>= 1) {
01743             if (ProtoWord & 1) {
01744               Proto =
01745                 &(ProtoSet->Protos[ProtoNum + proto_offset]);
01746               ConfigWord = Proto->Configs[0];
01747               th_config_mask |= ConfigWord;
01748             }
01749           }
01750         }
01751       }
01752       for (word_index = 0; word_index < 4; word_index++) {
01753         ConfigWord = 0;
01754         for (bit_index = 0; bit_index < 8; bit_index++) {
01755           if (x_config_mask & 1)
01756             ConfigWord |= 1 << (bit_index * 4);
01757           x_config_mask >>= 1;
01758         }
01759         config_pruner[classid][0][x][word_index] = ConfigWord;
01760 
01761         ConfigWord = 0;
01762         for (bit_index = 0; bit_index < 8; bit_index++) {
01763           if (y_config_mask & 1)
01764             ConfigWord |= 1 << (bit_index * 4);
01765           y_config_mask >>= 1;
01766         }
01767         config_pruner[classid][1][x][word_index] = ConfigWord;
01768 
01769         ConfigWord = 0;
01770         for (bit_index = 0; bit_index < 8; bit_index++) {
01771           if (th_config_mask & 1)
01772             ConfigWord |= 1 << (bit_index * 4);
01773           th_config_mask >>= 1;
01774         }
01775         config_pruner[classid][2][x][word_index] = ConfigWord;
01776       }
01777     }
01778   }
01779 }

make_float_var ( CertaintyScale  ,
20.  0,
MakeCertaintyScale  ,
18  ,
18  ,
SetCertaintyScale  ,
"CertaintyScale: "   
)

make_float_var ( RatingScale  ,
30.  0,
MakeRatingScale  ,
18  ,
17  ,
SetRatingScale  ,
"Rating scale: "   
)

make_float_var ( MaxAngleDelta  ,
0.  015,
MakeMaxAngleDelta  ,
18  ,
14  ,
SetMaxAngleDelta  ,
"Maximum angle delta for proto clustering: "   
)

make_float_var ( NoiseBlobLength  ,
0.  6,
MakeNoiseBlobLength  ,
18  ,
11  ,
SetNoiseBlobLength  ,
"Avg. noise blob length: "   
)

make_float_var ( RatingMargin  ,
0.  1,
MakeRatingMargin  ,
18  ,
10  ,
SetRatingMargin  ,
"New template margin (0-1): "   
)

make_float_var ( BadMatchPad  ,
0.  15,
MakeBadMatchPad  ,
18  ,
,
SetBadMatchPad  ,
"Bad Match Pad (0-1): "   
)

make_float_var ( PerfectRating  ,
0.  02,
MakePerfectRating  ,
18  ,
,
SetPerfectRating  ,
"Perfect Match (0-1): "   
)

make_float_var ( GreatAdaptiveMatch  ,
0.  0,
MakeGreatAdaptiveMatch  ,
18  ,
,
SetGreatAdaptiveMatch  ,
"Great Match (0-1): "   
)

make_float_var ( GoodAdaptiveMatch  ,
0.  125,
MakeGoodAdaptiveMatch  ,
18  ,
,
SetGoodAdaptiveMatch  ,
"Good Match (0-1): "   
)

make_int_var ( ReliableConfigThreshold  ,
,
MakeReliableConfigThreshold  ,
18  ,
13  ,
SetReliableConfigThreshold  ,
"Reliable Config Threshold: "   
)

make_int_var ( MinNumPermClasses  ,
,
MakeMinNumPermClasses  ,
18  ,
12  ,
SetMinNumPermClasses  ,
"Min # of permanent classes: "   
)

make_int_var ( LearningDebugLevel  ,
,
MakeLearningDebugLevel  ,
18  ,
,
SetLearningDebugLevel  ,
"Learning Debug Level: "   
)

make_int_var ( MatchDebugFlags  ,
,
MakeMatchDebugFlags  ,
18  ,
,
SetMatchDebugFlags  ,
"Matcher Debug Flags: "   
)

make_int_var ( MatcherDebugLevel  ,
,
MakeMatcherDebugLevel  ,
18  ,
,
SetMatcherDebugLevel  ,
"Matcher Debug Level: "   
)

make_toggle_const ( SaveAdaptedTemplates  ,
,
MakeSaveAdaptedTemplates   
)

make_toggle_const ( UsePreAdaptedTemplates  ,
,
MakeUsePreAdaptedTemplates   
)

make_toggle_const ( EnableAdaptiveMatcher  ,
,
MakeEnableAdaptiveMatcher   
)

make_toggle_var ( EnableNewAdaptRules  ,
,
MakeEnableNewAdaptRules  ,
18  ,
16  ,
SetEnableNewAdaptRules  ,
"Enable new adaptation rules"   
)

make_toggle_var ( EnableIntFX  ,
,
MakeEnableIntFX  ,
18  ,
15  ,
SetEnableIntFX  ,
"Enable integer fx"   
)

make_toggle_var ( EnableLearning  ,
,
MakeEnableLearning  ,
18  ,
,
SetEnableLearning  ,
"Enable learning"   
)

make_toggle_var ( EnableAdaptiveDebugger  ,
,
MakeEnableAdaptiveDebugger  ,
18  ,
,
SetEnableAdaptiveDebugger  ,
"Enable match debugger"   
)

void MakeNewAdaptedClass ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
ADAPT_TEMPLATES  Templates 
)

Creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters:
Blob blob to model new class after
LineStats statistics for text row blob is in
ClassId id of new class to be created
Templates adapted templates to add new class to
Note:
Globals:
  • AllProtosOn dummy mask with all 1's
  • BaselineCutoffs kludge needed to get cutoffs
  • PreTrainedTemplates kludge needed to get cutoffs
Returns:
none
Note:
Exceptions: none
Date:
Thu Mar 14 12:49:39 1991, DSJ, Created.

Definition at line 1096 of file adaptmatch.cpp.

References AddAdaptedClass(), AddIntConfig(), AddIntProto(), AddProtoToProtoPruner(), AllProtosOn, assert(), baseline, BaselineCutoffs, CharNormCutoffs, ClassForClassId, Config, ConvertConfig(), ConvertProto(), cprintf(), ExtractOutlineFeatures(), FeatureIn, FillABC(), FreeFeatureSet(), IndexForClassId, LearningDebugLevel, NewAdaptedClass(), NewTempConfig(), NewTempProto(), NO_PROTO, NormMethod, NumFeaturesIn, OutlineFeatDir, OutlineFeatLength, OutlineFeatX, OutlineFeatY, ParamOf, PreTrainedTemplates, TEMP_PROTO_STRUCT::Proto, ProtoAngle, TEMP_PROTO_STRUCT::ProtoId, ProtoLength, ProtoX, ProtoY, push(), SET_BIT, TempConfigFor, ADAPT_TEMPLATES_STRUCT::Templates, ADAPT_CLASS_STRUCT::TempProtos, UNLIKELY_NUM_FEAT, and Y_DIM_OFFSET.

Referenced by AdaptToChar(), and compare_tess_blobs().

01099                                                     {
01100   FEATURE_SET Features;
01101   int Fid, Pid;
01102   FEATURE Feature;
01103   int NumFeatures;
01104   TEMP_PROTO TempProto;
01105   PROTO Proto;
01106   ADAPT_CLASS Class;
01107   INT_CLASS IClass;
01108   CLASS_INDEX ClassIndex;
01109   TEMP_CONFIG Config;
01110 
01111   NormMethod = baseline;
01112   Features = ExtractOutlineFeatures (Blob, LineStats);
01113   NumFeatures = NumFeaturesIn (Features);
01114   if (NumFeatures > UNLIKELY_NUM_FEAT) {
01115     FreeFeatureSet(Features);
01116     return;
01117   }
01118 
01119   Class = NewAdaptedClass ();
01120   ClassIndex = AddAdaptedClass (Templates, Class, ClassId);
01121   Config = NewTempConfig (NumFeatures - 1);
01122   TempConfigFor (Class, 0) = Config;
01123 
01124   /* This is a kludge to construct cutoffs for adapted templates */
01125   BaselineCutoffs[ClassIndex] =
01126     CharNormCutoffs[IndexForClassId (PreTrainedTemplates, ClassId)];
01127 
01128   IClass = ClassForClassId (Templates->Templates, ClassId);
01129 
01130   for (Fid = 0; Fid < NumFeaturesIn (Features); Fid++) {
01131     Pid = AddIntProto (IClass);
01132     assert (Pid != NO_PROTO);
01133 
01134     Feature = FeatureIn (Features, Fid);
01135     TempProto = NewTempProto ();
01136     Proto = &(TempProto->Proto);
01137 
01138     /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
01139        ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
01140        instead of the -0.25 to 0.75 used in baseline normalization */
01141     ProtoAngle (Proto) = ParamOf (Feature, OutlineFeatDir);
01142     ProtoX (Proto) = ParamOf (Feature, OutlineFeatX);
01143     ProtoY (Proto) = ParamOf (Feature, OutlineFeatY) - Y_DIM_OFFSET;
01144     ProtoLength (Proto) = ParamOf (Feature, OutlineFeatLength);
01145     FillABC(Proto);
01146 
01147     TempProto->ProtoId = Pid;
01148     SET_BIT (Config->Protos, Pid);
01149 
01150     ConvertProto(Proto, Pid, IClass);
01151     AddProtoToProtoPruner(Proto, Pid, IClass);
01152 
01153     Class->TempProtos = push (Class->TempProtos, TempProto);
01154   }
01155   FreeFeatureSet(Features);
01156 
01157   AddIntConfig(IClass);
01158   ConvertConfig (AllProtosOn, 0, IClass);
01159 
01160   if (LearningDebugLevel >= 1) {
01161     cprintf ("Added new class '%c' with index %d and %d protos.\n",
01162       ClassId, ClassIndex, NumFeatures);
01163   }
01164 
01165 }                                /* MakeNewAdaptedClass */

void MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)

Makes new temp config ConfigId using NumOldProtos old and some new protos.

Parameters:
Templates Adapted templates to add new config to
ClassId Class id to associate with new config
NumFeatures Number of features in IntFeatures
Features Features describing model for new config
FloatFeatures Floating-pt representation of features
Note:
Globals:
  • AllProtosOn mask to enable all protos
  • AllConfigsOff mask to disable all configs
  • TempProtoMask defines old protos matched in new config
Returns:
none
Note:
Exceptions: none
Date:
Fri Mar 15 08:49:46 1991, DSJ, Created.

Definition at line 2576 of file adaptmatch.cpp.

References AddIntConfig(), AllConfigsOff, AllConfigsOn, AllProtosOn, ADAPT_TEMPLATES_STRUCT::Class, ClassForClassId, Config, ConvertConfig(), copy_all_bits, cprintf(), FindBadFeatures(), FindGoodProtos(), IndexForClassId, LearningDebugLevel, MakeNewTempProtos(), MAX_NUM_CONFIGS, MAX_NUM_INT_FEATURES, MAX_NUM_PROTOS, NewTempConfig(), NO_DEBUG, NO_PROTO, NumIntConfigsIn, NumIntProtosIn, PRINT_FEATURE_MATCHES, PRINT_MATCH_SUMMARY, PRINT_PROTO_MATCHES, SET_BIT, TempConfigFor, ADAPT_TEMPLATES_STRUCT::Templates, TempProtoMask, WordsInVectorOfSize, and zero_all_bits.

Referenced by AdaptToChar().

02580                                                          {
02581     CLASS_INDEX ClassIndex;
02582     INT_CLASS IClass;
02583     ADAPT_CLASS Class;
02584     PROTO_ID OldProtos[MAX_NUM_PROTOS];
02585     FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
02586     int NumOldProtos;
02587     int NumBadFeatures;
02588     int MaxProtoId, OldMaxProtoId;
02589     int BlobLength = 0;
02590     int MaskSize;
02591     int ConfigId;
02592     TEMP_CONFIG Config;
02593     int i;
02594     int debug_level = NO_DEBUG;
02595 
02596     if (LearningDebugLevel >= 3)
02597       debug_level =
02598         PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES;
02599 
02600     ClassIndex = IndexForClassId (Templates->Templates, ClassId);
02601     IClass = ClassForClassId (Templates->Templates, ClassId);
02602     Class = Templates->Class[ClassIndex];
02603 
02604     if (NumIntConfigsIn (IClass) >= MAX_NUM_CONFIGS)
02605       return;
02606 
02607     OldMaxProtoId = NumIntProtosIn (IClass) - 1;
02608 
02609     NumOldProtos = FindGoodProtos (IClass, AllProtosOn, AllConfigsOff,
02610       BlobLength, NumFeatures, Features,
02611       OldProtos, debug_level);
02612     NumOldProtos = 0;
02613 
02614     MaskSize = WordsInVectorOfSize (MAX_NUM_PROTOS);
02615     zero_all_bits(TempProtoMask, MaskSize);
02616     for (i = 0; i < NumOldProtos; i++)
02617       SET_BIT (TempProtoMask, OldProtos[i]);
02618 
02619     NumBadFeatures = FindBadFeatures (IClass, TempProtoMask, AllConfigsOn,
02620       BlobLength, NumFeatures, Features,
02621       BadFeatures, debug_level);
02622 
02623     MaxProtoId = MakeNewTempProtos (FloatFeatures, NumBadFeatures, BadFeatures,
02624       IClass, Class, TempProtoMask);
02625     if (MaxProtoId == NO_PROTO)
02626       return;
02627 
02628     ConfigId = AddIntConfig (IClass);
02629     ConvertConfig(TempProtoMask, ConfigId, IClass);
02630     Config = NewTempConfig (MaxProtoId);
02631     TempConfigFor (Class, ConfigId) = Config;
02632     copy_all_bits (TempProtoMask, Config->Protos, Config->ProtoVectorSize);
02633 
02634     if (LearningDebugLevel >= 1)
02635       cprintf ("Making new temp config %d using %d old and %d new protos.\n",
02636         ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId);
02637 
02638   }                              /* MakeNewTemporaryConfig */

PROTO_ID MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

Finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto.

Parameters:
Features floating-pt features describing new character
NumBadFeat number of bad features to turn into protos
BadFeat feature id's of bad features
IClass integer class templates to add new protos to
Class adapted class templates to add new protos to
TempProtoMask proto mask to add new protos to
Note:
Globals: none
Returns:
Max proto id in class after all protos have been added.
The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Y_DIM_OFFSET must be used because ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 instead of the -0.25 to 0.75 used in baseline normalization

Note:
Exceptions: none
Date:
Fri Mar 15 11:39:38 1991, DSJ, Created.

Definition at line 2664 of file adaptmatch.cpp.

References AddIntProto(), AddProtoToProtoPruner(), ConvertProto(), FeatureIn, FillABC(), GetPicoFeatureLength, NewTempProto(), NO_PROTO, NumIntProtosIn, ParamOf, PicoFeatDir, PicoFeatX, PicoFeatY, TEMP_PROTO_STRUCT::Proto, ProtoAngle, TEMP_PROTO_STRUCT::ProtoId, ProtoLength, ProtoX, ProtoY, push(), SET_BIT, ADAPT_CLASS_STRUCT::TempProtos, and Y_DIM_OFFSET.

Referenced by MakeNewTemporaryConfig().

02668                                                {
02669     FEATURE_ID *ProtoStart;
02670     FEATURE_ID *ProtoEnd;
02671     FEATURE_ID *LastBad;
02672     TEMP_PROTO TempProto;
02673     PROTO Proto;
02674     FEATURE F1, F2;
02675     FLOAT32 X1, X2, Y1, Y2;
02676     FLOAT32 A1, A2, AngleDelta;
02677     FLOAT32 SegmentLength;
02678     PROTO_ID Pid;
02679 
02680     for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
02681     ProtoStart < LastBad; ProtoStart = ProtoEnd) {
02682       F1 = FeatureIn (Features, *ProtoStart);
02683       X1 = ParamOf (F1, PicoFeatX);
02684       Y1 = ParamOf (F1, PicoFeatY);
02685       A1 = ParamOf (F1, PicoFeatDir);
02686 
02687       for (ProtoEnd = ProtoStart + 1,
02688         SegmentLength = GetPicoFeatureLength ();
02689         ProtoEnd < LastBad;
02690       ProtoEnd++, SegmentLength += GetPicoFeatureLength ()) {
02691         F2 = FeatureIn (Features, *ProtoEnd);
02692         X2 = ParamOf (F2, PicoFeatX);
02693         Y2 = ParamOf (F2, PicoFeatY);
02694         A2 = ParamOf (F2, PicoFeatDir);
02695 
02696         AngleDelta = fabs (A1 - A2);
02697         if (AngleDelta > 0.5)
02698           AngleDelta = 1.0 - AngleDelta;
02699 
02700         if (AngleDelta > MaxAngleDelta ||
02701           fabs (X1 - X2) > SegmentLength ||
02702           fabs (Y1 - Y2) > SegmentLength)
02703           break;
02704       }
02705 
02706       F2 = FeatureIn (Features, *(ProtoEnd - 1));
02707       X2 = ParamOf (F2, PicoFeatX);
02708       Y2 = ParamOf (F2, PicoFeatY);
02709       A2 = ParamOf (F2, PicoFeatDir);
02710 
02711       Pid = AddIntProto (IClass);
02712       if (Pid == NO_PROTO)
02713         return (NO_PROTO);
02714 
02715       TempProto = NewTempProto ();
02716       Proto = &(TempProto->Proto);
02717 
02718       /* compute proto params using Y_DIM_OFFSET */
02719       ProtoLength (Proto) = SegmentLength;
02720       ProtoAngle (Proto) = A1;
02721       ProtoX (Proto) = (X1 + X2) / 2.0;
02722       ProtoY (Proto) = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
02723       FillABC(Proto);
02724 
02725       TempProto->ProtoId = Pid;
02726       SET_BIT(TempProtoMask, Pid);
02727 
02728       ConvertProto(Proto, Pid, IClass);
02729       AddProtoToProtoPruner(Proto, Pid, IClass);
02730 
02731       Class->TempProtos = push (Class->TempProtos, TempProto);
02732     }
02733     return (NumIntProtosIn (IClass) - 1);
02734   }                              /* MakeNewTempProtos */

void MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob,
LINE_STATS LineStats 
)

Makes ConfigId permanent with ambiguities from LineStats.

Parameters:
Templates current set of adaptive templates
ClassId class containing config to be made permanent
ConfigId config to be made permanent
Blob current blob being adapted to
LineStats statistics about text line Blob is in
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Thu Mar 14 15:54:08 1991, DSJ, Created.

Definition at line 2750 of file adaptmatch.cpp.

References ADAPT_TEMPLATES_STRUCT::Class, PROTO_KEY::ClassId, Config, PROTO_KEY::ConfigId, cprintf(), delete_d(), FreeTempConfig(), GetAmbiguities(), IndexForClassId, LearningDebugLevel, MakeConfigPermanent, MakeTempProtoPerm(), ADAPT_TEMPLATES_STRUCT::NumPermClasses, ADAPT_CLASS_STRUCT::NumPermConfigs, PermConfigFor, TempConfigFor, PROTO_KEY::Templates, ADAPT_TEMPLATES_STRUCT::Templates, and ADAPT_CLASS_STRUCT::TempProtos.

Referenced by AdaptToChar().

02754                                             {
02755     char *Ambigs;
02756     TEMP_CONFIG Config;
02757     CLASS_INDEX ClassIndex;
02758     ADAPT_CLASS Class;
02759     PROTO_KEY ProtoKey;
02760 
02761     ClassIndex = IndexForClassId (Templates->Templates, ClassId);
02762     Class = Templates->Class[ClassIndex];
02763     Config = TempConfigFor (Class, ConfigId);
02764 
02765     MakeConfigPermanent(Class, ConfigId);
02766     if (Class->NumPermConfigs == 0)
02767       Templates->NumPermClasses++;
02768     Class->NumPermConfigs++;
02769 
02770     ProtoKey.Templates = Templates;
02771     ProtoKey.ClassId = ClassId;
02772     ProtoKey.ConfigId = ConfigId;
02773     Class->TempProtos = delete_d (Class->TempProtos, &ProtoKey,
02774       MakeTempProtoPerm);
02775     FreeTempConfig(Config);
02776 
02777     Ambigs = GetAmbiguities (Blob, LineStats, ClassId);
02778     PermConfigFor (Class, ConfigId) = Ambigs;
02779 
02780     if (LearningDebugLevel >= 1)
02781       cprintf ("Making config %d permanent with ambiguities '%s'.\n",
02782         ConfigId, Ambigs);
02783 
02784   }                              /* MakePermanent */

int MakeTempProtoPerm ( void *  item1,
void *  item2 
)

Converts TempProto to be permanent if its proto id is used by the configuration specified in ProtoKey.

Parameters:
item1 temporary proto to compare to key
item2 defines which protos to make permanent
Note:
Globals: none
Returns:
TRUE if TempProto is converted, FALSE otherwise
Note:
Exceptions: none
Date:
Thu Mar 14 18:49:54 1991, DSJ, Created.

Definition at line 2799 of file adaptmatch.cpp.

References AddProtoToClassPruner(), ADAPT_TEMPLATES_STRUCT::Class, PROTO_KEY::ClassId, Config, PROTO_KEY::ConfigId, FALSE, FreeTempProto(), IndexForClassId, MakeProtoPermanent, TEMP_PROTO_STRUCT::Proto, TEMP_PROTO_STRUCT::ProtoId, TempConfigFor, ADAPT_TEMPLATES_STRUCT::Templates, PROTO_KEY::Templates, test_bit, and TRUE.

Referenced by MakePermanent().

02800                                      {  //PROTO_KEY             *ProtoKey)
02801     CLASS_INDEX ClassIndex;
02802     ADAPT_CLASS Class;
02803     TEMP_CONFIG Config;
02804     TEMP_PROTO TempProto;
02805     PROTO_KEY *ProtoKey;
02806 
02807     TempProto = (TEMP_PROTO) item1;
02808     ProtoKey = (PROTO_KEY *) item2;
02809 
02810     ClassIndex = IndexForClassId (ProtoKey->Templates->Templates,
02811       ProtoKey->ClassId);
02812     Class = ProtoKey->Templates->Class[ClassIndex];
02813     Config = TempConfigFor (Class, ProtoKey->ConfigId);
02814 
02815     if (TempProto->ProtoId > Config->MaxProtoId ||
02816       !test_bit (Config->Protos, TempProto->ProtoId))
02817       return (FALSE);
02818 
02819     MakeProtoPermanent (Class, TempProto->ProtoId);
02820     AddProtoToClassPruner (&(TempProto->Proto), ProtoKey->ClassId,
02821       ProtoKey->Templates->Templates);
02822     FreeTempProto(TempProto);
02823 
02824     return (TRUE);
02825 
02826   }                              /* MakeTempProtoPerm */

int NumBlobsIn ( TWERD Word  ) 

Returns the number of blobs in Word.

Parameters:
Word Word to count blobs in
Note:
Globals: none
Returns:
Number of blobs in Word.
Note:
Exceptions: none
Date:
Thu Mar 14 08:30:27 1991, DSJ, Created.

Definition at line 2838 of file adaptmatch.cpp.

References wordstruct::blobs, blobstruct::next, and NULL.

Referenced by AdaptableWord().

02838                               {
02839     register TBLOB *Blob;
02840     register int NumBlobs;
02841 
02842     if (Word == NULL)
02843       return (0);
02844 
02845     for (Blob = Word->blobs, NumBlobs = 0;
02846       Blob != NULL; Blob = Blob->next, NumBlobs++);
02847 
02848     return (NumBlobs);
02849 
02850   }                              /* NumBlobsIn */

int NumOutlinesInBlob ( TBLOB Blob  ) 

Returns the number of OUTER outlines in Blob.

Parameters:
Blob Blob to count outlines in
Note:
Globals: none
Returns:
Number of outer outlines in Blob.
Note:
Exceptions: none
Date:
Mon Jun 10 15:46:20 1991, DSJ, Created.

Definition at line 2862 of file adaptmatch.cpp.

References NULL, Outline, and blobstruct::outlines.

Referenced by AdaptToWord().

02862                                      {
02863     register TESSLINE *Outline;
02864     register int NumOutlines;
02865 
02866     if (Blob == NULL)
02867       return (0);
02868 
02869     for (Outline = Blob->outlines, NumOutlines = 0;
02870       Outline != NULL; Outline = Outline->next, NumOutlines++);
02871 
02872     return (NumOutlines);
02873 
02874 }// NumOutlinesInBlob

void PrintAdaptiveMatchResults ( FILE *  File,
ADAPT_RESULTS Results 
)

Writes the matches in Results to File.

Parameters:
File open text file to write Results to
Results match results to write to File
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Mon Mar 18 09:24:53 1991, DSJ, Created.

Definition at line 2887 of file adaptmatch.cpp.

References ADAPT_RESULTS::Classes, cprintf(), ADAPT_RESULTS::NumMatches, and ADAPT_RESULTS::Ratings.

Referenced by AdaptiveClassifier().

02887                                                                      {
02888     int i;
02889 
02890     if (Results->NumMatches > 0) {
02891       cprintf ("%c(%d)  %4.1f  ", Results->Classes[0], Results->Classes[0],
02892         Results->Ratings[Results->Classes[0]] * 100.0);
02893 
02894       for (i = 1; i < Results->NumMatches; i++) {
02895         cprintf ("%c(%d)  %4.1f  ", Results->Classes[i],
02896           Results->Classes[i],
02897           Results->Ratings[Results->Classes[i]] * 100.0);
02898       }
02899     }
02900   }                              /* PrintAdaptiveMatchResults */

void PrintAdaptiveStatistics ( FILE *  File  ) 

Print to File the statistics which have been gathered for the adaptive matcher.

Parameters:
File open text file to print adaptive statistics to
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Thu Apr 18 14:37:37 1991, DSJ, Created.

Definition at line 998 of file adaptmatch.cpp.

References AdaptedTemplates, AdaptiveMatcherCalls, AmbigClassifierCalls, BaselineClassifierCalls, CharNormClassifierCalls, NumAmbigClassesTried, NumBaselineClassesTried, NumCharNormClassesTried, NumCharsAdaptedTo, NumClassesOutput, NumWordsAdaptedTo, and PrintAdaptedTemplates().

Referenced by dj_statistics().

00998                                          {
00999   #ifndef SECURE_NAMES
01000 
01001   fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
01002   fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
01003   fprintf (File, "\tNum classes output   = %d (Avg = %4.2f)\n",
01004     NumClassesOutput,
01005     ((AdaptiveMatcherCalls == 0) ? (0.0) :
01006   ((float) NumClassesOutput / AdaptiveMatcherCalls)));
01007   fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
01008     BaselineClassifierCalls,
01009     ((BaselineClassifierCalls == 0) ? (0.0) :
01010   ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
01011   fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
01012     CharNormClassifierCalls,
01013     ((CharNormClassifierCalls == 0) ? (0.0) :
01014   ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
01015   fprintf (File, "\t\tAmbig    Classifier: %4d calls (%4.2f classes/call)\n",
01016     AmbigClassifierCalls,
01017     ((AmbigClassifierCalls == 0) ? (0.0) :
01018   ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
01019 
01020   fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
01021   fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
01022   fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
01023 
01024   PrintAdaptedTemplates(File, AdaptedTemplates);
01025   #endif
01026 }                                /* PrintAdaptiveStatistics */

void RemoveBadMatches ( ADAPT_RESULTS Results  ) 

Steps thru each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad - all good matches get moved to the front of the classes array.

Parameters:
Results contains matches to be filtered
Note:
Globals: BadMatchPad defines a "bad match"
Returns:
none
Note:
Exceptions: none
Date:
Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2915 of file adaptmatch.cpp.

References ADAPT_RESULTS::BestRating, ADAPT_RESULTS::Classes, NULL, ADAPT_RESULTS::NumMatches, and ADAPT_RESULTS::Ratings.

Referenced by AdaptiveClassifier(), AdaptToPunc(), and GetAmbiguities().

02915                                                 {
02916     int Next, NextGood;
02917     FLOAT32 *Rating = Results->Ratings;
02918     CLASS_ID *Match = Results->Classes;
02919     FLOAT32 BadMatchThreshold;
02920     static const char* romans = "ivxIVX";
02921     BadMatchThreshold = Results->BestRating + BadMatchPad;
02922 
02923     if (bln_numericmode) {
02924       for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
02925         if (Rating[Match[Next]] <= BadMatchThreshold) {
02926           if (!isalpha(Match[Next]) || strchr(romans, Match[Next]) != NULL) {
02927             Match[NextGood++] = Match[Next];
02928           } else if (Match[Next] == 'l' && Rating['1'] >= BadMatchThreshold) {
02929             Match[NextGood++] = '1';
02930             Rating['1'] = Rating['l'];
02931           } else if (Match[Next] == 'O' && Rating['0'] >= BadMatchThreshold) {
02932               Match[NextGood++] = '0';
02933               Rating['0'] = Rating['O'];
02934           }
02935         }
02936       }
02937     }
02938     else {
02939       for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
02940         if (Rating[Match[Next]] <= BadMatchThreshold)
02941           Match[NextGood++] = Match[Next];
02942       }
02943     }
02944 
02945     Results->NumMatches = NextGood;
02946 
02947   }                              /* RemoveBadMatches */

void RemoveExtraPuncs ( ADAPT_RESULTS Results  ) 

Steps thru each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad - all good matches get moved to the front of the classes array.

Parameters:
Results contains matches to be filtered
Note:
Globals: BadMatchPad defines a "bad match"
Returns:
none
Note:
Exceptions: none
Date:
Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2962 of file adaptmatch.cpp.

References ADAPT_RESULTS::Classes, NULL, and ADAPT_RESULTS::NumMatches.

Referenced by AdaptiveClassifier().

02962                                                 {
02963     int Next, NextGood;
02964     int punc_count;              /*no of garbage characters */
02965     int digit_count;
02966     CLASS_ID *Match = Results->Classes;
02967                                  /*garbage characters */
02968     static char punc_chars[] = ".,;:/`~'-=\\|\"!_^";
02969     static char digit_chars[] = "0123456789";
02970 
02971     punc_count = 0;
02972     digit_count = 0;
02973     for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
02974       if (strchr (punc_chars, Match[Next]) == NULL) {
02975         if (strchr (digit_chars, Match[Next]) == NULL) {
02976           Match[NextGood++] = Match[Next];
02977         }
02978         else {
02979           if (digit_count < 1)
02980             Match[NextGood++] = Match[Next];
02981           digit_count++;
02982         }
02983       }
02984       else {
02985         if (punc_count < 2)
02986           Match[NextGood++] = Match[Next];
02987         punc_count++;            /*count them */
02988       }
02989     }
02990     Results->NumMatches = NextGood;
02991   }                              /* RemoveExtraPuncs */

void ResetAdaptiveClassifier (  ) 

Free allocated memory and reset templates.

Definition at line 934 of file adaptmatch.cpp.

References AdaptedTemplates, free_adapted_templates(), and NULL.

Referenced by TessBaseAPI::ClearAdaptiveClassifier(), and TessBaseAPI::End().

void SetAdaptiveThreshold ( FLOAT32  Threshold  ) 

Resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters:
Threshold threshold for creating new templates
Note:
Globals: GoodAdaptiveMatch default good match rating
Returns:
none
Note:
Exceptions: none
Date:
Tue Apr 9 08:33:13 1991, DSJ, Created.

Definition at line 3004 of file adaptmatch.cpp.

References GoodAdaptiveMatch, SetFeatureThresh(), and SetProtoThresh().

Referenced by AdaptToChar().

03004                                                {
03005     if (Threshold == GoodAdaptiveMatch) {
03006       /* the blob was probably classified correctly - use the default rating
03007          threshold */
03008       SetProtoThresh (0.9);
03009       SetFeatureThresh (0.9);
03010     }
03011     else {
03012       /* the blob was probably incorrectly classified */
03013       SetProtoThresh (1.0 - Threshold);
03014       SetFeatureThresh (1.0 - Threshold);
03015     }
03016   }                              /* SetAdaptiveThreshold */

void SettupPass1 (  ) 

This routine prepares the adaptive matcher for the start of the first pass.

Parameters:
none 
Note:
Globals:
  • EnableLearning set to TRUE by this routine
Returns:
none
Learning is enabled (unless it is disabled for the whole program).
Note:
Exceptions: none
Date:
Mon Apr 15 16:39:29 1991, DSJ, Created.
Note:
This is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass.
If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Definition at line 1050 of file adaptmatch.cpp.

References old_enable_learning, and SettupStopperPass1().

Referenced by set_pass1().

01050                    {
01051   EnableLearning = old_enable_learning;
01052 
01053   SettupStopperPass1();
01054 
01055 }                                /* SettupPass1 */

void SettupPass2 (  ) 

Prepares the adaptive matcher for the start of the second pass.

Parameters:
none 
Note:
Globals:
  • EnableLearning set to FALSE by this routine
Returns:
none
Further learning is disabled.
Note:
Exceptions: none
Date:
Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 1072 of file adaptmatch.cpp.

References FALSE, and SettupStopperPass2().

Referenced by set_pass2().

01072                    {
01073   EnableLearning = FALSE;
01074   SettupStopperPass2();
01075 
01076 }                                /* SettupPass2 */

void ShowBestMatchFor ( TBLOB Blob,
LINE_STATS LineStats,
CLASS_ID  ClassId,
BOOL8  AdaptiveOn,
BOOL8  PreTrainedOn 
)

Compares Blob to both sets of templates (adaptive and pre-trained) and then displays debug information for the config which matched best.

Parameters:
Blob blob to show best matching config for
LineStats statistics for text line Blob is in
ClassId class whose configs are to be searched
AdaptiveOn TRUE if adaptive configs are enabled
PreTrainedOn TRUE if pretrained configs are enabled
Note:
Globals:
  • PreTrainedTemplates built-in training
  • AdaptedTemplates adaptive templates
  • AllProtosOn dummy proto mask
  • AllConfigsOn dummy config mask
Returns:
none
Note:
Exceptions: none
Some changes for v1.03
Date:
Fri Mar 22 08:43:52 1991, DSJ, Created.

Definition at line 3039 of file adaptmatch.cpp.

References AdaptedTemplates, AllConfigsOn, AllProtosOn, baseline, character, ClassForClassId, INT_RESULT_STRUCT::Config, cprintf(), GetBaselineFeatures(), GetCharNormFeatures(), GetPicoFeatureLength, IndexForClassId, IntegerMatcher(), LegalClassId, NO_DEBUG, NormMethod, PreTrainedTemplates, INT_RESULT_STRUCT::Rating, SetBaseLineMatch(), SetCharNormMatch(), ADAPT_TEMPLATES_STRUCT::Templates, and UnusedClassIdIn.

Referenced by DebugAdaptiveClassifier().

03043                                             {
03044     int CNOutlineLength = 0, BLOutlineLength = 0;
03045     int NumCNFeatures = 0, NumBLFeatures = 0;
03046     INT_FEATURE_ARRAY CNFeatures, BLFeatures;
03047     INT_RESULT_STRUCT CNResult, BLResult;
03048     CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
03049     CLASS_INDEX ClassIndex;
03050     FLOAT32 BlobLength;
03051     UINT32 ConfigMask;
03052     static int next_config = -1;
03053 
03054     if (PreTrainedOn) next_config = -1;
03055 
03056     CNResult.Rating = BLResult.Rating = 2.0;
03057 
03058     if (!LegalClassId (ClassId)) {
03059       cprintf ("%c is not a legal class!!\n", ClassId);
03060       return;
03061     }
03062 
03063     if (PreTrainedOn)
03064       if (UnusedClassIdIn (PreTrainedTemplates, ClassId))
03065         cprintf ("No built-in templates for class '%c'\n", ClassId);
03066     else {
03067       NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
03068         PreTrainedTemplates,
03069         CNFeatures, CNAdjust,
03070         &BlobLength);
03071       if (NumCNFeatures <= 0)
03072         cprintf ("Illegal blob (char norm features)!\n");
03073       else {
03074         CNOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
03075         ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
03076 
03077         SetCharNormMatch();
03078         IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
03079           AllProtosOn, AllConfigsOn,
03080           CNOutlineLength, NumCNFeatures, CNFeatures, 0,
03081           CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
03082 
03083         cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n",
03084           CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassIndex]);
03085       }
03086     }
03087 
03088     if (AdaptiveOn)
03089       if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId))
03090         cprintf ("No AD templates for class '%c'\n", ClassId);
03091     else {
03092       NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
03093         AdaptedTemplates->Templates,
03094         BLFeatures, BLAdjust,
03095         &BlobLength);
03096       if (NumBLFeatures <= 0)
03097         cprintf ("Illegal blob (baseline features)!\n");
03098       else {
03099         BLOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
03100         ClassIndex =
03101           IndexForClassId (AdaptedTemplates->Templates, ClassId);
03102 
03103         SetBaseLineMatch();
03104         IntegerMatcher (ClassForClassId
03105           (AdaptedTemplates->Templates, ClassId),
03106                         AllProtosOn, AllConfigsOn,
03107 //          AdaptedTemplates->Class[ClassIndex]->PermProtos,
03108 //          AdaptedTemplates->Class[ClassIndex]->PermConfigs,
03109           BLOutlineLength, NumBLFeatures, BLFeatures, 0,
03110           BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
03111 
03112         #ifndef SECURE_NAMES
03113         cprintf ("Best adaptive template match is config %2d (%4.1f)\n",
03114           BLResult.Config, BLResult.Rating * 100.0);
03115         #endif
03116       }
03117     }
03118 
03119     cprintf ("\n");
03120     if (BLResult.Rating < CNResult.Rating) {
03121       ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
03122       if (next_config < 0) {
03123         ConfigMask = 1 << BLResult.Config;
03124         next_config = 0;
03125       } else {
03126         ConfigMask = 1 << next_config;
03127         ++next_config;
03128       }
03129       NormMethod = baseline;
03130 
03131       SetBaseLineMatch();
03132       IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId),
03133                       AllProtosOn,
03134 //        AdaptedTemplates->Class[ClassIndex]->PermProtos,
03135         (BIT_VECTOR) & ConfigMask,
03136         BLOutlineLength, NumBLFeatures, BLFeatures, 0,
03137         BLAdjust[ClassIndex], &BLResult, MatchDebugFlags);
03138       cprintf ("Adaptive template match for config %2d is %4.1f\n",
03139         BLResult.Config, BLResult.Rating * 100.0);
03140     }
03141     else {
03142       ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
03143       ConfigMask = 1 << CNResult.Config;
03144       NormMethod = character;
03145 
03146       SetCharNormMatch();
03147                                  //xiaofan
03148       IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask,
03149         CNOutlineLength, NumCNFeatures, CNFeatures, 0,
03150         CNAdjust[ClassIndex], &CNResult, MatchDebugFlags);
03151     }
03152 }// ShowBestMatchFor


Variable Documentation

ADAPT_TEMPLATES AdaptedTemplates [static]

Adapted templates

Definition at line 507 of file adaptmatch.cpp.

Referenced by AdaptiveClassifier(), AdaptToChar(), DoAdaptiveMatch(), EndAdaptiveClassifier(), GetBestRatingFor(), InitAdaptiveClassifier(), PrintAdaptiveStatistics(), ResetAdaptiveClassifier(), and ShowBestMatchFor().

int AdaptiveMatcherCalls = 0 [static]

Definition at line 457 of file adaptmatch.cpp.

Referenced by DoAdaptiveMatch(), and PrintAdaptiveStatistics().

BIT_VECTOR AllConfigsOff [static]

Definition at line 516 of file adaptmatch.cpp.

Referenced by EndAdaptiveClassifier(), InitAdaptiveClassifier(), and MakeNewTemporaryConfig().

BIT_VECTOR AllConfigsOn [static]

Definition at line 514 of file adaptmatch.cpp.

Referenced by AdaptToChar(), AmbigClassifier(), compare_tess_blobs(), EndAdaptiveClassifier(), GetBestRatingFor(), InitAdaptiveClassifier(), MakeNewTemporaryConfig(), and ShowBestMatchFor().

BIT_VECTOR AllProtosOff [static]

Definition at line 515 of file adaptmatch.cpp.

Referenced by EndAdaptiveClassifier(), and InitAdaptiveClassifier().

BIT_VECTOR AllProtosOn [static]

Dummy proto and config masks for use with the built-in templates.

Definition at line 512 of file adaptmatch.cpp.

Referenced by AdaptToChar(), AmbigClassifier(), compare_tess_blobs(), EndAdaptiveClassifier(), GetBestRatingFor(), InitAdaptiveClassifier(), MakeNewAdaptedClass(), MakeNewTemporaryConfig(), and ShowBestMatchFor().

int AmbigClassifierCalls = 0 [static]

Definition at line 460 of file adaptmatch.cpp.

Referenced by AmbigClassifier(), and PrintAdaptiveStatistics().

int BaselineClassifierCalls = 0 [static]

Definition at line 458 of file adaptmatch.cpp.

Referenced by BaselineClassifier(), and PrintAdaptiveStatistics().

CLASS_CUTOFF_ARRAY BaselineCutoffs [static]

Definition at line 502 of file adaptmatch.cpp.

Referenced by BaselineClassifier(), InitAdaptiveClassifier(), and MakeNewAdaptedClass().

INT_FEATURE_ARRAY BaselineFeatures [static]

Definition at line 483 of file adaptmatch.cpp.

Referenced by GetIntBaselineFeatures(), and GetIntCharNormFeatures().

const char* BuiltInCutoffsFile = BUILT_IN_CUTOFFS_FILE [static]

Definition at line 500 of file adaptmatch.cpp.

Referenced by InitAdaptiveClassifier(), and InitAdaptiveClassifierVars().

const char* BuiltInTemplatesFile = BUILT_IN_TEMPLATES_FILE [static]

Filenames of training data.

Definition at line 499 of file adaptmatch.cpp.

Referenced by InitAdaptiveClassifier(), and InitAdaptiveClassifierVars().

int CharNormClassifierCalls = 0 [static]

Definition at line 459 of file adaptmatch.cpp.

Referenced by CharNormClassifier(), and PrintAdaptiveStatistics().

CLASS_CUTOFF_ARRAY CharNormCutoffs [static]

Definition at line 501 of file adaptmatch.cpp.

Referenced by CharNormClassifier(), InitAdaptiveClassifier(), and MakeNewAdaptedClass().

INT_FEATURE_ARRAY CharNormFeatures [static]

Definition at line 484 of file adaptmatch.cpp.

Referenced by GetIntBaselineFeatures(), and GetIntCharNormFeatures().

FLOAT32* CurrentRatings [static]

Current ratings.

Hold onto the current ratings so that the comparison function passes to qsort can get at them

Definition at line 494 of file adaptmatch.cpp.

Referenced by AdaptiveClassifier(), CompareCurrentRatings(), and GetAmbiguities().

BOOL8 FeaturesHaveBeenExtracted = FALSE [static]

Definition at line 481 of file adaptmatch.cpp.

Referenced by GetIntBaselineFeatures(), and GetIntCharNormFeatures().

BOOL8 FeaturesOK = TRUE [static]

Definition at line 482 of file adaptmatch.cpp.

Referenced by GetIntBaselineFeatures(), and GetIntCharNormFeatures().

INT_FX_RESULT_STRUCT FXInfo [static]

Definition at line 486 of file adaptmatch.cpp.

Referenced by ExtractCharNormFeatures(), GetIntBaselineFeatures(), and GetIntCharNormFeatures().

char imagefile[]

Image file name

Definition at line 53 of file globals.cpp.

Referenced by add_document_word(), apply_boxes(), EndAdaptiveClassifier(), InitAdaptiveClassifier(), LearnBlob(), program_editup2(), and save_summary().

int NumAmbigClassesTried = 0 [static]

Definition at line 465 of file adaptmatch.cpp.

Referenced by AmbigClassifier(), and PrintAdaptiveStatistics().

int NumBaselineClassesTried = 0 [static]

Definition at line 463 of file adaptmatch.cpp.

Referenced by BaselineClassifier(), and PrintAdaptiveStatistics().

int NumCharNormClassesTried = 0 [static]

Definition at line 464 of file adaptmatch.cpp.

Referenced by CharNormClassifier(), and PrintAdaptiveStatistics().

int NumCharsAdaptedTo = 0 [static]

Definition at line 462 of file adaptmatch.cpp.

Referenced by AdaptToChar(), and PrintAdaptiveStatistics().

int NumClassesOutput = 0 [static]

Definition at line 466 of file adaptmatch.cpp.

Referenced by AdaptiveClassifier(), and PrintAdaptiveStatistics().

int NumWordsAdaptedTo = 0 [static]

Definition at line 461 of file adaptmatch.cpp.

Referenced by AdaptToWord(), and PrintAdaptiveStatistics().

int old_enable_learning = 1 [static]

Definition at line 539 of file adaptmatch.cpp.

Referenced by InitAdaptiveClassifier(), and SettupPass1().

INT_TEMPLATES PreTrainedTemplates [static]

Built-in templates

Definition at line 505 of file adaptmatch.cpp.

Referenced by AdaptToPunc(), AddNewResult(), DoAdaptiveMatch(), EndAdaptiveClassifier(), GetAmbiguities(), GetBestRatingFor(), InitAdaptiveClassifier(), MakeNewAdaptedClass(), and ShowBestMatchFor().

BIT_VECTOR PrunedProtos [static]

Definition at line 513 of file adaptmatch.cpp.

Referenced by CharNormClassifier(), EndAdaptiveClassifier(), and InitAdaptiveClassifier().

BIT_VECTOR TempProtoMask [static]

Definition at line 517 of file adaptmatch.cpp.

Referenced by EndAdaptiveClassifier(), InitAdaptiveClassifier(), and MakeNewTemporaryConfig().

int tess_bn_matching = 0

Definition at line 591 of file adaptmatch.cpp.

Referenced by DoAdaptiveMatch(), tess_bn_matcher(), tess_cn_matcher(), tess_default_matcher(), and tess_training_tester().

int tess_cn_matching = 0

Definition at line 590 of file adaptmatch.cpp.

Referenced by DoAdaptiveMatch(), tess_bn_matcher(), tess_cn_matcher(), tess_default_matcher(), and tess_training_tester().


Generated on Wed Feb 28 19:49:18 2007 for Tesseract by  doxygen 1.5.1