00001
00020
00021
00022
00023 #include <ctype.h>
00024 #include "adaptmatch.h"
00025 #include "normfeat.h"
00026 #include "mfoutline.h"
00027 #include "picofeat.h"
00028 #include "float2int.h"
00029 #include "outfeat.h"
00030 #include "emalloc.h"
00031 #include "intfx.h"
00032 #include "permnum.h"
00033 #include "speckle.h"
00034 #include "efio.h"
00035 #include "normmatch.h"
00036 #include "stopper.h"
00037 #include "permute.h"
00038 #include "context.h"
00039 #include "ndminx.h"
00040 #include "intproto.h"
00041 #include "const.h"
00042 #include "globals.h"
00043 #include "werd.h"
00044 #include "callcpp.h"
00045 #include "tordvars.h"
00046
00047 #include <stdio.h>
00048 #include <string.h>
00049 #include <ctype.h>
00050 #include <stdlib.h>
00051 #include <math.h>
00052 #ifdef __UNIX__
00053 #include <assert.h>
00054 #endif
00055
00056 #define ADAPT_TEMPLATE_SUFFIX ".a"
00057 #define BUILT_IN_TEMPLATES_FILE "tessdata/inttemp"
00058 #define BUILT_IN_CUTOFFS_FILE "tessdata/pffmtable"
00059
00061 #define MAX_MATCHES 10
00062
00063 #define UNLIKELY_NUM_FEAT 200
00064 #define NO_DEBUG 0
00065
00066 #define MAX_ADAPTABLE_WERD_SIZE 40
00067 #define ADAPTABLE_WERD (GOOD_NUMBER + 0.05)
00068
00072 #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
00073
00075 #define WORST_POSSIBLE_RATING (1.0)
00076
00081 typedef struct
00082 {
00083 FLOAT32 BlobLength;
00084 int NumMatches;
00085 CLASS_ID Classes[MAX_NUM_CLASSES];
00086 FLOAT32 Ratings[MAX_CLASS_ID + 1];
00087 UINT8 Configs[MAX_CLASS_ID + 1];
00088 FLOAT32 BestRating;
00089 CLASS_ID BestClass;
00090 UINT8 BestConfig;
00091 } ADAPT_RESULTS;
00092
00098 typedef struct
00099 {
00100 ADAPT_TEMPLATES Templates;
00101 CLASS_ID ClassId;
00102 int ConfigId;
00103 } PROTO_KEY;
00104
00105
00106
00107
00109 #define MarginalMatch(Rating) ((Rating) > GreatAdaptiveMatch)
00110
00111 #define TempConfigReliable(Config) \
00112 ((Config)->NumTimesSeen > ReliableConfigThreshold)
00113
00114 #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
00115
00116
00117
00118
00119 void AdaptToChar(TBLOB *Blob,
00120 LINE_STATS *LineStats,
00121 CLASS_ID ClassId,
00122 FLOAT32 Threshold);
00123
00124 void AdaptToPunc(TBLOB *Blob,
00125 LINE_STATS *LineStats,
00126 CLASS_ID ClassId,
00127 FLOAT32 Threshold);
00128
00129 void AddNewResult(ADAPT_RESULTS *Results,
00130 CLASS_ID ClassId,
00131 FLOAT32 Rating,
00132 int ConfigId);
00133
00134 void AmbigClassifier(TBLOB *Blob,
00135 LINE_STATS *LineStats,
00136 INT_TEMPLATES Templates,
00137 char *Ambiguities,
00138 ADAPT_RESULTS *Results);
00139
00140 char *BaselineClassifier(TBLOB *Blob,
00141 LINE_STATS *LineStats,
00142 ADAPT_TEMPLATES Templates,
00143 ADAPT_RESULTS *Results);
00144
00145 void make_config_pruner(INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner);
00146
00147 void CharNormClassifier(TBLOB *Blob,
00148 LINE_STATS *LineStats,
00149 INT_TEMPLATES Templates,
00150 ADAPT_RESULTS *Results);
00151
00152 void ClassifyAsNoise(TBLOB *Blob,
00153 LINE_STATS *LineStats,
00154 ADAPT_RESULTS *Results);
00155
00156 int CompareCurrentRatings(const void *arg1,
00157 const void *arg2);
00158
00159 LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results);
00160
00161 void DebugAdaptiveClassifier(TBLOB *Blob,
00162 LINE_STATS *LineStats,
00163 ADAPT_RESULTS *Results);
00164
00165 void DoAdaptiveMatch(TBLOB *Blob,
00166 LINE_STATS *LineStats,
00167 ADAPT_RESULTS *Results);
00168
00169 void GetAdaptThresholds (TWERD * Word,
00170 LINE_STATS * LineStats,
00171 const char *BestChoice,
00172 const char *BestRawChoice, FLOAT32 Thresholds[]);
00173
00174 char *GetAmbiguities(TBLOB *Blob,
00175 LINE_STATS *LineStats,
00176 CLASS_ID CorrectClass);
00177
00178 int GetBaselineFeatures(TBLOB *Blob,
00179 LINE_STATS *LineStats,
00180 INT_TEMPLATES Templates,
00181 INT_FEATURE_ARRAY IntFeatures,
00182 CLASS_NORMALIZATION_ARRAY CharNormArray,
00183 FLOAT32 *BlobLength);
00184
00185 FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);
00186
00187 int GetCharNormFeatures(TBLOB *Blob,
00188 LINE_STATS *LineStats,
00189 INT_TEMPLATES Templates,
00190 INT_FEATURE_ARRAY IntFeatures,
00191 CLASS_NORMALIZATION_ARRAY CharNormArray,
00192 FLOAT32 *BlobLength);
00193
00194 int GetIntBaselineFeatures(TBLOB *Blob,
00195 LINE_STATS *LineStats,
00196 INT_TEMPLATES Templates,
00197 INT_FEATURE_ARRAY IntFeatures,
00198 CLASS_NORMALIZATION_ARRAY CharNormArray,
00199 FLOAT32 *BlobLength);
00200
00201 int GetIntCharNormFeatures(TBLOB *Blob,
00202 LINE_STATS *LineStats,
00203 INT_TEMPLATES Templates,
00204 INT_FEATURE_ARRAY IntFeatures,
00205 CLASS_NORMALIZATION_ARRAY CharNormArray,
00206 FLOAT32 *BlobLength);
00207
00208 void InitMatcherRatings(register FLOAT32 *Rating);
00209
00210 void MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
00211 CLASS_ID ClassId,
00212 int NumFeatures,
00213 INT_FEATURE_ARRAY Features,
00214 FEATURE_SET FloatFeatures);
00215
00216 PROTO_ID MakeNewTempProtos (FEATURE_SET Features,
00217 int NumBadFeat,
00218 FEATURE_ID BadFeat[],
00219 INT_CLASS IClass,
00220 ADAPT_CLASS Class, BIT_VECTOR TempProtoMask);
00221
00222 void MakePermanent(ADAPT_TEMPLATES Templates,
00223 CLASS_ID ClassId,
00224 int ConfigId,
00225 TBLOB *Blob,
00226 LINE_STATS *LineStats);
00227
00228 int MakeTempProtoPerm(void *item1, void *item2);
00229
00230 int NumBlobsIn(TWERD *Word);
00231
00232 int NumOutlinesInBlob(TBLOB *Blob);
00233
00234 void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
00235
00236 void RemoveBadMatches(ADAPT_RESULTS *Results);
00237
00238 void RemoveExtraPuncs(ADAPT_RESULTS *Results);
00239
00240 void SetAdaptiveThreshold(FLOAT32 Threshold);
00241 void ShowBestMatchFor(TBLOB *Blob,
00242 LINE_STATS *LineStats,
00243 CLASS_ID ClassId,
00244 BOOL8 AdaptiveOn,
00245 BOOL8 PreTrainedOn);
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00441 extern char imagefile[];
00442
00443
00446 INT_VAR (tessedit_single_match, FALSE, "Top choice only from CP");
00449
00450
00451
00452
00453
00454
00455
00456
00457 static int AdaptiveMatcherCalls = 0;
00458 static int BaselineClassifierCalls = 0;
00459 static int CharNormClassifierCalls = 0;
00460 static int AmbigClassifierCalls = 0;
00461 static int NumWordsAdaptedTo = 0;
00462 static int NumCharsAdaptedTo = 0;
00463 static int NumBaselineClassesTried = 0;
00464 static int NumCharNormClassesTried = 0;
00465 static int NumAmbigClassesTried = 0;
00466 static int NumClassesOutput = 0;
00467
00481 static BOOL8 FeaturesHaveBeenExtracted = FALSE;
00482 static BOOL8 FeaturesOK = TRUE;
00483 static INT_FEATURE_ARRAY BaselineFeatures;
00484 static INT_FEATURE_ARRAY CharNormFeatures;
00485
00486 static INT_FX_RESULT_STRUCT FXInfo;
00487
00494 static FLOAT32 *CurrentRatings;
00495
00499 static const char *BuiltInTemplatesFile = BUILT_IN_TEMPLATES_FILE;
00500 static const char *BuiltInCutoffsFile = BUILT_IN_CUTOFFS_FILE;
00501 static CLASS_CUTOFF_ARRAY CharNormCutoffs;
00502 static CLASS_CUTOFF_ARRAY BaselineCutoffs;
00503
00505 static INT_TEMPLATES PreTrainedTemplates;
00507 static ADAPT_TEMPLATES AdaptedTemplates;
00508
00512 static BIT_VECTOR AllProtosOn;
00513 static BIT_VECTOR PrunedProtos;
00514 static BIT_VECTOR AllConfigsOn;
00515 static BIT_VECTOR AllProtosOff;
00516 static BIT_VECTOR AllConfigsOff;
00517 static BIT_VECTOR TempProtoMask;
00518
00519
00520 make_toggle_const (EnableAdaptiveMatcher, 1, MakeEnableAdaptiveMatcher);
00521
00522
00523 make_toggle_const (UsePreAdaptedTemplates, 0, MakeUsePreAdaptedTemplates);
00524 make_toggle_const (SaveAdaptedTemplates, 0, MakeSaveAdaptedTemplates);
00525
00526 make_toggle_var (EnableAdaptiveDebugger, 0, MakeEnableAdaptiveDebugger,
00527 18, 1, SetEnableAdaptiveDebugger, "Enable match debugger");
00528
00529 make_int_var (MatcherDebugLevel, 0, MakeMatcherDebugLevel,
00530 18, 2, SetMatcherDebugLevel, "Matcher Debug Level: ");
00531
00532 make_int_var (MatchDebugFlags, 0, MakeMatchDebugFlags,
00533 18, 3, SetMatchDebugFlags, "Matcher Debug Flags: ");
00534
00535 make_toggle_var (EnableLearning, 1, MakeEnableLearning,
00536 18, 4, SetEnableLearning, "Enable learning");
00537
00538
00539 static int old_enable_learning = 1;
00540
00541 make_int_var (LearningDebugLevel, 0, MakeLearningDebugLevel,
00542 18, 5, SetLearningDebugLevel, "Learning Debug Level: ");
00543
00544 make_float_var (GoodAdaptiveMatch, 0.125, MakeGoodAdaptiveMatch,
00545 18, 6, SetGoodAdaptiveMatch, "Good Match (0-1): ");
00546
00547 make_float_var (GreatAdaptiveMatch, 0.0, MakeGreatAdaptiveMatch,
00548 18, 7, SetGreatAdaptiveMatch, "Great Match (0-1): ");
00549
00550
00551 make_float_var (PerfectRating, 0.02, MakePerfectRating,
00552 18, 8, SetPerfectRating, "Perfect Match (0-1): ");
00553
00554 make_float_var (BadMatchPad, 0.15, MakeBadMatchPad,
00555 18, 9, SetBadMatchPad, "Bad Match Pad (0-1): ");
00556
00557 make_float_var (RatingMargin, 0.1, MakeRatingMargin,
00558 18, 10, SetRatingMargin, "New template margin (0-1): ");
00559
00560 make_float_var (NoiseBlobLength, 0.6, MakeNoiseBlobLength,
00561 18, 11, SetNoiseBlobLength, "Avg. noise blob length: ");
00562
00563 make_int_var (MinNumPermClasses, 3, MakeMinNumPermClasses,
00564 18, 12, SetMinNumPermClasses, "Min # of permanent classes: ");
00565
00566
00567 make_int_var (ReliableConfigThreshold, 2, MakeReliableConfigThreshold,
00568 18, 13, SetReliableConfigThreshold,
00569 "Reliable Config Threshold: ");
00570
00571 make_float_var (MaxAngleDelta, 0.015, MakeMaxAngleDelta,
00572 18, 14, SetMaxAngleDelta,
00573 "Maximum angle delta for proto clustering: ");
00574
00575 make_toggle_var (EnableIntFX, 1, MakeEnableIntFX,
00576 18, 15, SetEnableIntFX, "Enable integer fx");
00577
00578
00579 make_toggle_var (EnableNewAdaptRules, 1, MakeEnableNewAdaptRules,
00580 18, 16, SetEnableNewAdaptRules,
00581 "Enable new adaptation rules");
00582
00583
00584 make_float_var (RatingScale, 30.0, MakeRatingScale,
00585 18, 17, SetRatingScale, "Rating scale: ");
00586
00587 make_float_var (CertaintyScale, 20.0, MakeCertaintyScale,
00588 18, 18, SetCertaintyScale, "CertaintyScale: ");
00589
00590 int tess_cn_matching = 0;
00591 int tess_bn_matching = 0;
00592
00593
00594
00595
00616 LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
00617 LIST Choices;
00618 ADAPT_RESULTS Results;
00619 LINE_STATS LineStats;
00620
00621
00622 if (AdaptedTemplates == NULL)
00623 AdaptedTemplates = NewAdaptedTemplates ();
00624 EnterClassifyMode;
00625
00626 Results.BlobLength = MAX_FLOAT32;
00627 Results.NumMatches = 0;
00628 Results.BestRating = WORST_POSSIBLE_RATING;
00629 Results.BestClass = NO_CLASS;
00630 Results.BestConfig = 0;
00631 GetLineStatsFromRow(Row, &LineStats);
00632 InitMatcherRatings (Results.Ratings);
00633
00634 DoAdaptiveMatch(Blob, &LineStats, &Results);
00635 RemoveBadMatches(&Results);
00636
00637
00638 CurrentRatings = Results.Ratings;
00639 qsort ((void *) (Results.Classes), Results.NumMatches,
00640 sizeof (CLASS_ID), CompareCurrentRatings);
00641 RemoveExtraPuncs(&Results);
00642 Choices = ConvertMatchesToChoices (&Results);
00643
00644 if (MatcherDebugLevel >= 1) {
00645 cprintf ("AD Matches = ");
00646 PrintAdaptiveMatchResults(stdout, &Results);
00647 }
00648
00649 if (LargeSpeckle (Blob, Row))
00650 Choices = AddLargeSpeckleTo (Choices);
00651
00652 #ifndef GRAPHICS_DISABLED
00653 if (EnableAdaptiveDebugger)
00654 DebugAdaptiveClassifier(Blob, &LineStats, &Results);
00655 #endif
00656
00657 NumClassesOutput += count (Choices);
00658 if (Choices == NIL) {
00659 if (!bln_numericmode)
00660 printf ("Nil classification!\n");
00661 return (append_choice (NIL, "", 50.0f, -20.0f, -1));
00662 }
00663
00664 return (Choices);
00665
00666 }
00667
00668
00669
00700 void AdaptToWord(TWERD *Word,
00701 TEXTROW *Row,
00702 const char *BestChoice,
00703 const char *BestRawChoice,
00704 const char *rejmap) {
00705 TBLOB *Blob;
00706 LINE_STATS LineStats;
00707 FLOAT32 Thresholds[MAX_ADAPTABLE_WERD_SIZE];
00708 FLOAT32 *Threshold;
00709 const char *map = rejmap;
00710 char map_char = '1';
00711
00712 if (EnableLearning) {
00713 NumWordsAdaptedTo++;
00714
00715 #ifndef SECURE_NAMES
00716 if (LearningDebugLevel >= 1)
00717 cprintf ("\n\nAdapting to word = %s\n", BestChoice);
00718 #endif
00719 GetLineStatsFromRow(Row, &LineStats);
00720
00721 GetAdaptThresholds(Word,
00722 &LineStats,
00723 BestChoice,
00724 BestRawChoice,
00725 Thresholds);
00726
00727 for (Blob = Word->blobs, Threshold = Thresholds;
00728 Blob != NULL; Blob = Blob->next, BestChoice++, Threshold++) {
00729 InitIntFX();
00730
00731 if (rejmap != NULL)
00732 map_char = *map++;
00733
00734 assert (map_char == '1' || map_char == '0');
00735
00736 if (map_char == '1') {
00737
00738 if (isalnum (*BestChoice)) {
00739
00740 if ((*BestChoice == 'i'
00741 || il1_adaption_test && *BestChoice == 'I'
00742 && islower (BestChoice[1])) && (Blob == Word->blobs
00743 ||
00744 ispunct (*
00745 (BestChoice -
00746 1))
00747 || !il1_adaption_test
00748 &&
00749 NumOutlinesInBlob
00750 (Blob) != 2)) {
00751 if (LearningDebugLevel >= 1)
00752 cprintf ("Rejecting char = %c\n", *BestChoice);
00753 }
00754 else {
00755 #ifndef SECURE_NAMES
00756 if (LearningDebugLevel >= 1)
00757 cprintf ("Adapting to char = %c, thr= %g\n", *BestChoice, *Threshold);
00758 #endif
00759 AdaptToChar(Blob, &LineStats, *BestChoice, *Threshold);
00760 }
00761 }
00762 else
00763 AdaptToPunc(Blob, &LineStats, *BestChoice, *Threshold);
00764 }
00765 }
00766 if (LearningDebugLevel >= 1)
00767 cprintf ("\n");
00768 }
00769 }
00770
00771
00772
00790 void EndAdaptiveClassifier() {
00791 char Filename[256];
00792 FILE *File;
00793
00794 #ifndef SECURE_NAMES
00795 if (EnableAdaptiveMatcher && SaveAdaptedTemplates) {
00796 strcpy(Filename, imagefile);
00797 strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
00798 File = fopen (Filename, "wb");
00799 if (File == NULL)
00800 cprintf ("Unable to save adapted templates to %s!\n", Filename);
00801 else {
00802 cprintf ("\nSaving adapted templates to %s ...", Filename);
00803 fflush(stdout);
00804 WriteAdaptedTemplates(File, AdaptedTemplates);
00805 cprintf ("\n");
00806 fclose(File);
00807 }
00808 }
00809 #endif
00810
00811
00812 EndDangerousAmbigs();
00813 FreeNormProtos();
00814 free_int_templates(PreTrainedTemplates);
00815 PreTrainedTemplates = NULL;
00816 FreeBitVector(AllProtosOn);
00817 FreeBitVector(PrunedProtos);
00818 FreeBitVector(AllConfigsOn);
00819 FreeBitVector(AllProtosOff);
00820 FreeBitVector(AllConfigsOff);
00821 FreeBitVector(TempProtoMask);
00822 AllProtosOn = NULL;
00823 PrunedProtos = NULL;
00824 AllConfigsOn = NULL;
00825 AllProtosOff = NULL;
00826 AllConfigsOff = NULL;
00827 TempProtoMask = NULL;
00828 }
00829
00830
00831
00850 void InitAdaptiveClassifier() {
00851 int i;
00852 FILE *File;
00853 char Filename[1024];
00854
00855 if (!EnableAdaptiveMatcher)
00856 return;
00857
00858 strcpy(Filename, demodir);
00859 strcat(Filename, BuiltInTemplatesFile);
00860 #ifndef SECURE_NAMES
00861
00862
00863 fflush(stdout);
00864 #endif
00865
00866 #ifdef __UNIX__
00867 File = Efopen (Filename, "r");
00868 #else
00869 File = Efopen (Filename, "rb");
00870 #endif
00871 PreTrainedTemplates = ReadIntTemplates (File, TRUE);
00872 fclose(File);
00873
00874 strcpy(Filename, demodir);
00875 strcat(Filename, BuiltInCutoffsFile);
00876 #ifndef SECURE_NAMES
00877
00878
00879 fflush(stdout);
00880 #endif
00881 ReadNewCutoffs (Filename, PreTrainedTemplates->IndexFor, CharNormCutoffs);
00882
00883 GetNormProtos();
00884
00885 InitIntegerMatcher();
00886 InitIntegerFX();
00887
00888 AllProtosOn = NewBitVector (MAX_NUM_PROTOS);
00889 PrunedProtos = NewBitVector (MAX_NUM_PROTOS);
00890 AllConfigsOn = NewBitVector (MAX_NUM_CONFIGS);
00891 AllProtosOff = NewBitVector (MAX_NUM_PROTOS);
00892 AllConfigsOff = NewBitVector (MAX_NUM_CONFIGS);
00893 TempProtoMask = NewBitVector (MAX_NUM_PROTOS);
00894 set_all_bits (AllProtosOn, WordsInVectorOfSize (MAX_NUM_PROTOS));
00895 set_all_bits (PrunedProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
00896 set_all_bits (AllConfigsOn, WordsInVectorOfSize (MAX_NUM_CONFIGS));
00897 zero_all_bits (AllProtosOff, WordsInVectorOfSize (MAX_NUM_PROTOS));
00898 zero_all_bits (AllConfigsOff, WordsInVectorOfSize (MAX_NUM_CONFIGS));
00899
00900 if (UsePreAdaptedTemplates) {
00901 strcpy(Filename, imagefile);
00902 strcat(Filename, ADAPT_TEMPLATE_SUFFIX);
00903 File = fopen (Filename, "rb");
00904 if (File == NULL)
00905 AdaptedTemplates = NewAdaptedTemplates ();
00906 else {
00907 #ifndef SECURE_NAMES
00908 cprintf ("\nReading pre-adapted templates from %s ...", Filename);
00909 fflush(stdout);
00910 #endif
00911 AdaptedTemplates = ReadAdaptedTemplates (File);
00912 cprintf ("\n");
00913 fclose(File);
00914 PrintAdaptedTemplates(stdout, AdaptedTemplates);
00915
00916 for (i = 0; i < NumClassesIn (AdaptedTemplates->Templates); i++) {
00917 BaselineCutoffs[i] =
00918 CharNormCutoffs[IndexForClassId (PreTrainedTemplates,
00919 ClassIdForIndex
00920 (AdaptedTemplates->Templates,
00921 i))];
00922 }
00923 }
00924 }
00925 else
00926 AdaptedTemplates = NewAdaptedTemplates ();
00927 old_enable_learning = EnableLearning;
00928
00929 }
00930
00934 void ResetAdaptiveClassifier() {
00935 free_adapted_templates(AdaptedTemplates);
00936 AdaptedTemplates = NULL;
00937 }
00938
00939
00940
00950 void InitAdaptiveClassifierVars() {
00951 VALUE dummy;
00952
00953 string_variable (BuiltInTemplatesFile, "BuiltInTemplatesFile",
00954 BUILT_IN_TEMPLATES_FILE);
00955 string_variable (BuiltInCutoffsFile, "BuiltInCutoffsFile",
00956 BUILT_IN_CUTOFFS_FILE);
00957
00958 MakeEnableAdaptiveMatcher();
00959 MakeUsePreAdaptedTemplates();
00960 MakeSaveAdaptedTemplates();
00961
00962 MakeEnableLearning();
00963 MakeEnableAdaptiveDebugger();
00964 MakeBadMatchPad();
00965 MakeGoodAdaptiveMatch();
00966 MakeGreatAdaptiveMatch();
00967 MakeNoiseBlobLength();
00968 MakeMinNumPermClasses();
00969 MakeReliableConfigThreshold();
00970 MakeMaxAngleDelta();
00971 MakeLearningDebugLevel();
00972 MakeMatcherDebugLevel();
00973 MakeMatchDebugFlags();
00974 MakeRatingMargin();
00975 MakePerfectRating();
00976 MakeEnableIntFX();
00977 MakeEnableNewAdaptRules();
00978 MakeRatingScale();
00979 MakeCertaintyScale();
00980
00981 InitPicoFXVars();
00982 InitOutlineFXVars();
00983
00984 }
00985
00986
00987
00998 void PrintAdaptiveStatistics(FILE *File) {
00999 #ifndef SECURE_NAMES
01000
01001 fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
01002 fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
01003 fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
01004 NumClassesOutput,
01005 ((AdaptiveMatcherCalls == 0) ? (0.0) :
01006 ((float) NumClassesOutput / AdaptiveMatcherCalls)));
01007 fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
01008 BaselineClassifierCalls,
01009 ((BaselineClassifierCalls == 0) ? (0.0) :
01010 ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
01011 fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
01012 CharNormClassifierCalls,
01013 ((CharNormClassifierCalls == 0) ? (0.0) :
01014 ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
01015 fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
01016 AmbigClassifierCalls,
01017 ((AmbigClassifierCalls == 0) ? (0.0) :
01018 ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
01019
01020 fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
01021 fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
01022 fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
01023
01024 PrintAdaptedTemplates(File, AdaptedTemplates);
01025 #endif
01026 }
01027
01028
01029
01050 void SettupPass1() {
01051 EnableLearning = old_enable_learning;
01052
01053 SettupStopperPass1();
01054
01055 }
01056
01057
01058
01072 void SettupPass2() {
01073 EnableLearning = FALSE;
01074 SettupStopperPass2();
01075
01076 }
01077
01078
01079
01096 void MakeNewAdaptedClass(TBLOB *Blob,
01097 LINE_STATS *LineStats,
01098 CLASS_ID ClassId,
01099 ADAPT_TEMPLATES Templates) {
01100 FEATURE_SET Features;
01101 int Fid, Pid;
01102 FEATURE Feature;
01103 int NumFeatures;
01104 TEMP_PROTO TempProto;
01105 PROTO Proto;
01106 ADAPT_CLASS Class;
01107 INT_CLASS IClass;
01108 CLASS_INDEX ClassIndex;
01109 TEMP_CONFIG Config;
01110
01111 NormMethod = baseline;
01112 Features = ExtractOutlineFeatures (Blob, LineStats);
01113 NumFeatures = NumFeaturesIn (Features);
01114 if (NumFeatures > UNLIKELY_NUM_FEAT) {
01115 FreeFeatureSet(Features);
01116 return;
01117 }
01118
01119 Class = NewAdaptedClass ();
01120 ClassIndex = AddAdaptedClass (Templates, Class, ClassId);
01121 Config = NewTempConfig (NumFeatures - 1);
01122 TempConfigFor (Class, 0) = Config;
01123
01124
01125 BaselineCutoffs[ClassIndex] =
01126 CharNormCutoffs[IndexForClassId (PreTrainedTemplates, ClassId)];
01127
01128 IClass = ClassForClassId (Templates->Templates, ClassId);
01129
01130 for (Fid = 0; Fid < NumFeaturesIn (Features); Fid++) {
01131 Pid = AddIntProto (IClass);
01132 assert (Pid != NO_PROTO);
01133
01134 Feature = FeatureIn (Features, Fid);
01135 TempProto = NewTempProto ();
01136 Proto = &(TempProto->Proto);
01137
01138
01139
01140
01141 ProtoAngle (Proto) = ParamOf (Feature, OutlineFeatDir);
01142 ProtoX (Proto) = ParamOf (Feature, OutlineFeatX);
01143 ProtoY (Proto) = ParamOf (Feature, OutlineFeatY) - Y_DIM_OFFSET;
01144 ProtoLength (Proto) = ParamOf (Feature, OutlineFeatLength);
01145 FillABC(Proto);
01146
01147 TempProto->ProtoId = Pid;
01148 SET_BIT (Config->Protos, Pid);
01149
01150 ConvertProto(Proto, Pid, IClass);
01151 AddProtoToProtoPruner(Proto, Pid, IClass);
01152
01153 Class->TempProtos = push (Class->TempProtos, TempProto);
01154 }
01155 FreeFeatureSet(Features);
01156
01157 AddIntConfig(IClass);
01158 ConvertConfig (AllProtosOn, 0, IClass);
01159
01160 if (LearningDebugLevel >= 1) {
01161 cprintf ("Added new class '%c' with index %d and %d protos.\n",
01162 ClassId, ClassIndex, NumFeatures);
01163 }
01164
01165 }
01166
01167
01168
01185 int GetAdaptiveFeatures(TBLOB *Blob,
01186 LINE_STATS *LineStats,
01187 INT_FEATURE_ARRAY IntFeatures,
01188 FEATURE_SET *FloatFeatures) {
01189 FEATURE_SET Features;
01190 int NumFeatures;
01191
01192 NormMethod = baseline;
01193 Features = ExtractPicoFeatures (Blob, LineStats);
01194
01195 NumFeatures = NumFeaturesIn (Features);
01196 if (NumFeatures > UNLIKELY_NUM_FEAT) {
01197 FreeFeatureSet(Features);
01198 return (0);
01199 }
01200
01201 ComputeIntFeatures(Features, IntFeatures);
01202 *FloatFeatures = Features;
01203
01204 return (NumFeatures);
01205
01206 }
01207
01208
01209
01210
01211
01224 int AdaptableWord(TWERD *Word,
01225 const char *BestChoice,
01226 const char *BestRawChoice) {
01227 int BestChoiceLength;
01228
01229 return
01230 (
01231
01232
01233 BestChoice != NULL
01234 && BestRawChoice != NULL
01235 && Word != NULL
01236 && ( BestChoiceLength = strlen (BestChoice)) > 0
01237 && BestChoiceLength == NumBlobsIn (Word)
01238 && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE
01239 && (
01240
01241 EnableNewAdaptRules
01242 && CurrentBestChoiceAdjustFactor() <= ADAPTABLE_WERD
01243 && AlternativeChoicesWorseThan(ADAPTABLE_WERD)
01244 && CurrentBestChoiceIs(BestChoice)
01245 ||
01246
01247 !EnableNewAdaptRules
01248 && BestChoiceLength == strlen (BestRawChoice)
01249 && (
01250 (
01251 valid_word (BestChoice)
01252 && case_ok (BestChoice)
01253 )
01254 || (
01255 valid_number (BestChoice)
01256 && pure_number (BestChoice)
01257 )
01258 )
01259 && punctuation_ok (BestChoice) != -1
01260 && punctuation_ok (BestChoice) <= 1
01261 )
01262 );
01263 }
01264
01265
01266
01282 void AdaptToChar(TBLOB *Blob,
01283 LINE_STATS *LineStats,
01284 CLASS_ID ClassId,
01285 FLOAT32 Threshold) {
01286 int NumFeatures;
01287 INT_FEATURE_ARRAY IntFeatures;
01288 INT_RESULT_STRUCT IntResult;
01289 CLASS_INDEX ClassIndex;
01290 INT_CLASS IClass;
01291 ADAPT_CLASS Class;
01292 TEMP_CONFIG TempConfig;
01293 FEATURE_SET FloatFeatures;
01294
01295 NumCharsAdaptedTo++;
01296 if (!LegalClassId (ClassId))
01297 return;
01298
01299 if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
01300 MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates);
01301 }
01302 else {
01303 IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
01304 ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
01305 Class = AdaptedTemplates->Class[ClassIndex];
01306
01307 NumFeatures = GetAdaptiveFeatures (Blob, LineStats,
01308 IntFeatures, &FloatFeatures);
01309 if (NumFeatures <= 0)
01310 return;
01311
01312 SetBaseLineMatch();
01313 IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
01314 NumFeatures, NumFeatures, IntFeatures, 0, 0,
01315 &IntResult, NO_DEBUG);
01316
01317 SetAdaptiveThreshold(Threshold);
01318
01319 if (IntResult.Rating <= Threshold) {
01320 if (ConfigIsPermanent (Class, IntResult.Config)) {
01321 if (LearningDebugLevel >= 1)
01322 cprintf ("Found good match to perm config %d = %4.1f%%.\n",
01323 IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
01324 FreeFeatureSet(FloatFeatures);
01325 return;
01326 }
01327
01328 TempConfig = TempConfigFor (Class, IntResult.Config);
01329 IncreaseConfidence(TempConfig);
01330 if (LearningDebugLevel >= 1)
01331 cprintf ("Increasing reliability of temp config %d to %d.\n",
01332 IntResult.Config, TempConfig->NumTimesSeen);
01333
01334 if (TempConfigReliable (TempConfig))
01335 MakePermanent (AdaptedTemplates, ClassId, IntResult.Config,
01336 Blob, LineStats);
01337 }
01338 else {
01339 if (LearningDebugLevel >= 1)
01340 cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
01341 IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
01342 MakeNewTemporaryConfig(AdaptedTemplates,
01343 ClassId,
01344 NumFeatures,
01345 IntFeatures,
01346 FloatFeatures);
01347 if (LearningDebugLevel >= 1) {
01348 IntegerMatcher (IClass, AllProtosOn, AllConfigsOn,
01349 NumFeatures, NumFeatures, IntFeatures, 0, 0,
01350 &IntResult, NO_DEBUG);
01351 cprintf ("Best match to temp config %d = %4.1f%%.\n",
01352 IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
01353 if (LearningDebugLevel >= 2) {
01354 UINT32 ConfigMask;
01355 ConfigMask = 1 << IntResult.Config;
01356 ShowMatchDisplay();
01357 IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask,
01358 NumFeatures, NumFeatures, IntFeatures, 0, 0,
01359 &IntResult, 6 | 0x19);
01360 UpdateMatchDisplay();
01361 GetClassToDebug ("Adapting");
01362 }
01363 }
01364 }
01365 FreeFeatureSet(FloatFeatures);
01366 }
01367 }
01368
01369
01370
01383 void AdaptToPunc(TBLOB *Blob,
01384 LINE_STATS *LineStats,
01385 CLASS_ID ClassId,
01386 FLOAT32 Threshold) {
01387 ADAPT_RESULTS Results;
01388 int i;
01389
01390 Results.BlobLength = MAX_FLOAT32;
01391 Results.NumMatches = 0;
01392 Results.BestRating = WORST_POSSIBLE_RATING;
01393 Results.BestClass = NO_CLASS;
01394 Results.BestConfig = 0;
01395 InitMatcherRatings (Results.Ratings);
01396 CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
01397 RemoveBadMatches(&Results);
01398
01399 if (Results.NumMatches != 1) {
01400 if (LearningDebugLevel >= 1) {
01401 cprintf ("Rejecting punc = %c (Alternatives = ", ClassId);
01402
01403 for (i = 0; i < Results.NumMatches; i++)
01404 cprintf ("%c", Results.Classes[i]);
01405 cprintf (")\n");
01406 }
01407 return;
01408 }
01409
01410 #ifndef SECURE_NAMES
01411 if (LearningDebugLevel >= 1)
01412 cprintf ("Adapting to punc = %c\n", ClassId);
01413 #endif
01414 AdaptToChar(Blob, LineStats, ClassId, Threshold);
01415
01416 }
01417
01418
01419
01441 void AddNewResult(ADAPT_RESULTS *Results,
01442 CLASS_ID ClassId,
01443 FLOAT32 Rating,
01444 int ConfigId) {
01445 FLOAT32 OldRating;
01446 INT_CLASS_STRUCT* CharClass = NULL;
01447
01448 OldRating = Results->Ratings[ClassId];
01449 if (Rating <= Results->BestRating + BadMatchPad && Rating < OldRating) {
01450 Results->Ratings[ClassId] = Rating;
01451 if (ClassId != NO_CLASS)
01452 CharClass = ClassForClassId(PreTrainedTemplates, ClassId);
01453 if (CharClass != NULL && NumIntConfigsIn(CharClass) == 32)
01454 Results->Configs[ClassId] = ConfigId;
01455 else
01456 Results->Configs[ClassId] = ~0;
01457
01458 if (Rating < Results->BestRating) {
01459 Results->BestRating = Rating;
01460 Results->BestClass = ClassId;
01461 Results->BestConfig = ConfigId;
01462 }
01463
01464
01465 if (OldRating == WORST_POSSIBLE_RATING)
01466 Results->Classes[Results->NumMatches++] = ClassId;
01467 }
01468 }
01469
01470
01471
01489 void AmbigClassifier(TBLOB *Blob,
01490 LINE_STATS *LineStats,
01491 INT_TEMPLATES Templates,
01492 char *Ambiguities,
01493 ADAPT_RESULTS *Results) {
01494 int IntOutlineLength;
01495 int NumFeatures;
01496 INT_FEATURE_ARRAY IntFeatures;
01497 CLASS_NORMALIZATION_ARRAY CharNormArray;
01498 INT_RESULT_STRUCT IntResult;
01499 CLASS_ID ClassId;
01500 CLASS_INDEX ClassIndex;
01501
01502 AmbigClassifierCalls++;
01503
01504 NumFeatures = GetCharNormFeatures (Blob, LineStats,
01505 Templates,
01506 IntFeatures, CharNormArray,
01507 &(Results->BlobLength));
01508 if (NumFeatures <= 0)
01509 return;
01510
01511 IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
01512
01513 if (MatcherDebugLevel >= 2)
01514 cprintf ("AM Matches = ");
01515
01516 while (*Ambiguities) {
01517 ClassId = *Ambiguities;
01518 ClassIndex = IndexForClassId (Templates, ClassId);
01519
01520 SetCharNormMatch();
01521 IntegerMatcher (ClassForClassId (Templates, ClassId),
01522 AllProtosOn, AllConfigsOn,
01523 IntOutlineLength, NumFeatures, IntFeatures, 0,
01524 CharNormArray[ClassIndex], &IntResult, NO_DEBUG);
01525
01526 if (MatcherDebugLevel >= 2)
01527 cprintf ("%c-%-2d %2.0f ", ClassId, IntResult.Config,
01528 IntResult.Rating * 100.0);
01529
01530 AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
01531
01532 Ambiguities++;
01533
01534 NumAmbigClassesTried++;
01535 }
01536 if (MatcherDebugLevel >= 2)
01537 cprintf ("\n");
01538
01539 }
01540
01541
01542
01559 char *BaselineClassifier(TBLOB *Blob,
01560 LINE_STATS *LineStats,
01561 ADAPT_TEMPLATES Templates,
01562 ADAPT_RESULTS *Results) {
01563 int IntOutlineLength;
01564 int NumFeatures;
01565 int NumClasses;
01566 int i;
01567 int config;
01568 float best_rating;
01569 INT_FEATURE_ARRAY IntFeatures;
01570 CLASS_NORMALIZATION_ARRAY CharNormArray;
01571 CLASS_PRUNER_RESULTS ClassPrunerResults;
01572 INT_RESULT_STRUCT IntResult;
01573 CLASS_ID ClassId;
01574 CLASS_INDEX ClassIndex;
01575 ADAPT_CLASS Class;
01576
01577 BaselineClassifierCalls++;
01578
01579 NumFeatures = GetBaselineFeatures (Blob, LineStats,
01580 Templates->Templates,
01581 IntFeatures, CharNormArray,
01582 &(Results->BlobLength));
01583 if (NumFeatures <= 0)
01584 return NULL;
01585
01586 IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
01587
01588 NumClasses = ClassPruner (Templates->Templates, NumFeatures,
01589 IntFeatures, CharNormArray,
01590 BaselineCutoffs, ClassPrunerResults,
01591 MatchDebugFlags);
01592
01593 NumBaselineClassesTried += NumClasses;
01594
01595 if (MatcherDebugLevel >= 2 || display_ratings > 1)
01596 cprintf ("BL Matches = ");
01597
01598 best_rating = WORST_POSSIBLE_RATING;
01599 for (i = 0; i < NumClasses
01600 && ((newcp_ratings_on & 12) < 8
01601 || (newcp_ratings_on & 12) == 8
01602 && ClassPrunerResults[i].Rating < best_rating + BadMatchPad / 2
01603 && ClassPrunerResults[i].Rating < newcp_duff_rating
01604 && NumClasses > 1); i++) {
01605 ClassId = ClassPrunerResults[i].Class;
01606 ClassIndex = IndexForClassId (Templates->Templates, ClassId);
01607
01608 SetBaseLineMatch();
01609 IntegerMatcher (ClassForClassId (Templates->Templates, ClassId),
01610 Templates->Class[ClassIndex]->PermProtos,
01611 Templates->Class[ClassIndex]->PermConfigs,
01612 IntOutlineLength, NumFeatures, IntFeatures, 0,
01613 CharNormArray[ClassIndex], &IntResult, MatchDebugFlags);
01614
01615 if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01616 cprintf ("%c-%-2d %2.1f(%2.1f/%2.1f) ", ClassId, IntResult.Config,
01617 IntResult.Rating * 100.0,
01618 ClassPrunerResults[i].Rating * 100.0,
01619 ClassPrunerResults[i].Rating2 * 100.0);
01620 if (i % 4 == 3)
01621 cprintf ("\n");
01622 }
01623
01624 AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
01625 }
01626 while (i < NumClasses) {
01627 ClassId = ClassPrunerResults[i].Class;
01628 ClassIndex = IndexForClassId (Templates->Templates, ClassId);
01629 Class = Templates->Class[ClassIndex];
01630 config =
01631 NumIntConfigsIn (ClassForIndex (Templates->Templates, ClassIndex));
01632 for (config--; config >= 0 && !ConfigIsPermanent (Class, config);
01633 config--);
01634
01635 if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01636 cprintf ("%c(%d) %2.1f(%2.1f) ", ClassId, config,
01637 ClassPrunerResults[i].Rating * 200.0,
01638 ClassPrunerResults[i].Rating2 * 100.0);
01639 if (i % 4 == 3)
01640 cprintf ("\n");
01641 }
01642
01643 AddNewResult (Results, ClassId, ClassPrunerResults[i].Rating * 2,
01644 config);
01645 i++;
01646 }
01647 if (MatcherDebugLevel >= 2 || display_ratings > 1)
01648 cprintf ("\n");
01649
01650 ClassId = Results->BestClass;
01651 if (ClassId == NO_CLASS)
01652 return (NULL);
01653
01654
01655 ClassIndex = IndexForClassId (Templates->Templates, ClassId);
01656 return ((char *) (Templates->Class[ClassIndex]->
01657 Config[Results->BestConfig].Perm));
01658
01659 }
01660
01661
01662
01679 void make_config_pruner(INT_TEMPLATES templates,
01680 CONFIG_PRUNER *config_pruner) {
01681 int classid;
01682 int x;
01683 int word_index;
01684 int bit_index;
01685 UINT32 XFeatureAddress;
01686 UINT32 YFeatureAddress;
01687 UINT32 ThetaFeatureAddress;
01688 INT_CLASS ClassTemplate;
01689 int ProtoSetIndex;
01690 PROTO_SET ProtoSet;
01691 UINT32 *ProtoPrunerPtr;
01692 UINT32 ProtoNum;
01693 INT32 proto_offset;
01694 UINT32 ConfigWord;
01695 UINT32 ProtoWord;
01696 INT_PROTO Proto;
01697 UINT32 x_config_mask;
01698 UINT32 y_config_mask;
01699 UINT32 th_config_mask;
01700
01701 for (classid = 0; classid < NumClassesIn (templates); classid++) {
01702 ClassTemplate = ClassForIndex (templates, classid);
01703 for (x = 0; x < NUM_PP_BUCKETS; x++) {
01704 XFeatureAddress = (x << 1);
01705 YFeatureAddress = (NUM_PP_BUCKETS << 1) + (x << 1);
01706 ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + (x << 1);
01707 x_config_mask = 0;
01708 y_config_mask = 0;
01709 th_config_mask = 0;
01710 for (ProtoSetIndex = 0;
01711 ProtoSetIndex < NumProtoSetsIn (ClassTemplate);
01712 ProtoSetIndex++) {
01713 ProtoSet = ProtoSetIn (ClassTemplate, ProtoSetIndex);
01714 ProtoPrunerPtr = (UINT32 *) ((*ProtoSet).ProtoPruner);
01715 for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
01716 ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ProtoPrunerPtr++) {
01717
01718 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
01719 for (proto_offset = 0; ProtoWord != 0;
01720 proto_offset++, ProtoWord >>= 1) {
01721 if (ProtoWord & 1) {
01722 Proto =
01723 &(ProtoSet->Protos[ProtoNum + proto_offset]);
01724 ConfigWord = Proto->Configs[0];
01725 x_config_mask |= ConfigWord;
01726 }
01727 }
01728
01729 ProtoWord = *(ProtoPrunerPtr + YFeatureAddress);
01730 for (proto_offset = 0; ProtoWord != 0;
01731 proto_offset++, ProtoWord >>= 1) {
01732 if (ProtoWord & 1) {
01733 Proto =
01734 &(ProtoSet->Protos[ProtoNum + proto_offset]);
01735 ConfigWord = Proto->Configs[0];
01736 y_config_mask |= ConfigWord;
01737 }
01738 }
01739
01740 ProtoWord = *(ProtoPrunerPtr + ThetaFeatureAddress);
01741 for (proto_offset = 0; ProtoWord != 0;
01742 proto_offset++, ProtoWord >>= 1) {
01743 if (ProtoWord & 1) {
01744 Proto =
01745 &(ProtoSet->Protos[ProtoNum + proto_offset]);
01746 ConfigWord = Proto->Configs[0];
01747 th_config_mask |= ConfigWord;
01748 }
01749 }
01750 }
01751 }
01752 for (word_index = 0; word_index < 4; word_index++) {
01753 ConfigWord = 0;
01754 for (bit_index = 0; bit_index < 8; bit_index++) {
01755 if (x_config_mask & 1)
01756 ConfigWord |= 1 << (bit_index * 4);
01757 x_config_mask >>= 1;
01758 }
01759 config_pruner[classid][0][x][word_index] = ConfigWord;
01760
01761 ConfigWord = 0;
01762 for (bit_index = 0; bit_index < 8; bit_index++) {
01763 if (y_config_mask & 1)
01764 ConfigWord |= 1 << (bit_index * 4);
01765 y_config_mask >>= 1;
01766 }
01767 config_pruner[classid][1][x][word_index] = ConfigWord;
01768
01769 ConfigWord = 0;
01770 for (bit_index = 0; bit_index < 8; bit_index++) {
01771 if (th_config_mask & 1)
01772 ConfigWord |= 1 << (bit_index * 4);
01773 th_config_mask >>= 1;
01774 }
01775 config_pruner[classid][2][x][word_index] = ConfigWord;
01776 }
01777 }
01778 }
01779 }
01780
01781
01782
01802 void CharNormClassifier(TBLOB *Blob,
01803 LINE_STATS *LineStats,
01804 INT_TEMPLATES Templates,
01805 ADAPT_RESULTS *Results) {
01806 int IntOutlineLength;
01807 int NumFeatures;
01808 int NumClasses;
01809 int i;
01810 INT32 min_misses;
01811 float best_rating;
01812 INT_FEATURE_ARRAY IntFeatures;
01813 CLASS_NORMALIZATION_ARRAY CharNormArray;
01814 CLASS_PRUNER_RESULTS ClassPrunerResults;
01815 INT_RESULT_STRUCT IntResult;
01816 CLASS_ID ClassId;
01817 CLASS_INDEX ClassIndex;
01818
01819 CharNormClassifierCalls++;
01820
01821 NumFeatures = GetCharNormFeatures (Blob, LineStats,
01822 Templates,
01823 IntFeatures, CharNormArray,
01824 &(Results->BlobLength));
01825 if (NumFeatures <= 0)
01826 return;
01827
01828 IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
01829
01830 NumClasses = ClassPruner (Templates, NumFeatures,
01831 IntFeatures, CharNormArray,
01832 CharNormCutoffs, ClassPrunerResults,
01833 MatchDebugFlags);
01834
01835 if (feature_prune_percentile > 0) {
01836 min_misses = feature_pruner (Templates, NumFeatures,
01837 IntFeatures, NumClasses,
01838 ClassPrunerResults);
01839 NumClasses =
01840 prune_configs(Templates,
01841 min_misses,
01842 NumFeatures,
01843 IntFeatures,
01844 CharNormArray,
01845 NumClasses,
01846 IntOutlineLength,
01847 ClassPrunerResults,
01848 MatchDebugFlags);
01849 }
01850 else
01851 min_misses = 0;
01852 if (tessedit_single_match && NumClasses > 1)
01853 NumClasses = 1;
01854 NumCharNormClassesTried += NumClasses;
01855
01856 if (MatcherDebugLevel >= 2 || display_ratings > 1)
01857 cprintf ("CN Matches = ");
01858
01859 best_rating = WORST_POSSIBLE_RATING;
01860 for (i = 0; i < NumClasses
01861 && ((newcp_ratings_on & 3) < 2
01862 || (newcp_ratings_on & 3) == 2
01863 && ClassPrunerResults[i].Rating < best_rating + BadMatchPad / 2
01864 && ClassPrunerResults[i].Rating < newcp_duff_rating
01865 && NumClasses > 1); i++) {
01866 ClassId = ClassPrunerResults[i].Class;
01867 ClassIndex = IndexForClassId (Templates, ClassId);
01868
01869 SetCharNormMatch();
01870
01871 if (feature_prune_percentile > 0)
01872
01873 config_mask_to_proto_mask (ClassForClassId (Templates, ClassId), (BIT_VECTOR) & ClassPrunerResults[i].config_mask,
01874 PrunedProtos);
01875
01876 IntegerMatcher (ClassForClassId (Templates, ClassId), PrunedProtos, (BIT_VECTOR) & ClassPrunerResults[i].config_mask,
01877 IntOutlineLength, NumFeatures, IntFeatures, 0,
01878 CharNormArray[ClassIndex], &IntResult, MatchDebugFlags);
01879
01880 if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01881 cprintf ("%c-%-2d %2.1f(%2.1f/%2.1f) ", ClassId, IntResult.Config,
01882 IntResult.Rating * 100.0,
01883 ClassPrunerResults[i].Rating * 100.0,
01884 ClassPrunerResults[i].Rating2 * 100.0);
01885 if (i % 4 == 3)
01886 cprintf ("\n");
01887 }
01888
01889 AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
01890 if (IntResult.Rating < best_rating)
01891 best_rating = IntResult.Rating;
01892 }
01893 while (i < NumClasses) {
01894 ClassId = ClassPrunerResults[i].Class;
01895 ClassIndex = IndexForClassId (Templates, ClassId);
01896
01897 if (MatcherDebugLevel >= 2 || display_ratings > 1) {
01898 cprintf ("%c %2.1f(%2.1f) ", ClassId,
01899 ClassPrunerResults[i].Rating * 200.0,
01900 ClassPrunerResults[i].Rating2 * 100.0);
01901 if (i % 4 == 3)
01902 cprintf ("\n");
01903 }
01904
01905 AddNewResult (Results, ClassId, ClassPrunerResults[i].Rating * 2, 0);
01906 i++;
01907 }
01908 if (MatcherDebugLevel >= 2 || display_ratings > 1)
01909 cprintf ("\n");
01910
01911 }
01912
01913
01914
01929 void ClassifyAsNoise(TBLOB *Blob,
01930 LINE_STATS *LineStats,
01931 ADAPT_RESULTS *Results) {
01932 register FLOAT32 Rating;
01933
01934 Rating = Results->BlobLength / NoiseBlobLength;
01935 Rating *= Rating;
01936 Rating /= 1.0 + Rating;
01937
01938 AddNewResult (Results, NO_CLASS, Rating, 0);
01939
01940 }
01941
01942
01943
01960 int CompareCurrentRatings(
01961 const void *arg1,
01962 const void *arg2) {
01963 FLOAT32 Rating1, Rating2;
01964 CLASS_ID *Class1 = (CLASS_ID *) arg1;
01965 CLASS_ID *Class2 = (CLASS_ID *) arg2;
01966
01967 Rating1 = CurrentRatings[*Class1];
01968 Rating2 = CurrentRatings[*Class2];
01969
01970 if (Rating1 < Rating2)
01971 return (-1);
01972 else if (Rating1 > Rating2)
01973 return (1);
01974 else
01975 return (0);
01976
01977 }
01978
01979
01980
01998 LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results) {
01999 char ChoiceString[2];
02000 int i;
02001 LIST Choices;
02002 CLASS_ID NextMatch;
02003 FLOAT32 Rating;
02004 FLOAT32 Certainty;
02005
02006 ChoiceString[1] = '\0';
02007 if (Results->NumMatches > MAX_MATCHES)
02008 Results->NumMatches = MAX_MATCHES;
02009
02010 for (Choices = NIL, i = 0; i < Results->NumMatches; i++) {
02011 NextMatch = Results->Classes[i];
02012 ChoiceString[0] = NextMatch;
02013 Rating = Certainty = Results->Ratings[NextMatch];
02014 Rating *= RatingScale * Results->BlobLength;
02015 Certainty *= -CertaintyScale;
02016 Choices = append_choice (Choices, ChoiceString, Rating, Certainty,
02017 Results->Configs[NextMatch]);
02018 }
02019 return (Choices);
02020
02021 }
02022
02023
02024
02036 #ifndef GRAPHICS_DISABLED
02037 void DebugAdaptiveClassifier(TBLOB *Blob,
02038 LINE_STATS *LineStats,
02039 ADAPT_RESULTS *Results) {
02040 const char *Prompt =
02041 "\nType class id (or CTRL-A,B,C) in IntegerMatch Window ...";
02042 const char *DebugMode = "All Templates";
02043 CLASS_ID LastClass = Results->BestClass;
02044 CLASS_ID ClassId;
02045 BOOL8 AdaptiveOn = TRUE;
02046 BOOL8 PreTrainedOn = TRUE;
02047
02048 ShowMatchDisplay();
02049 cprintf ("\nDebugging class = %c (%s) ...\n", LastClass, DebugMode);
02050 ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
02051 UpdateMatchDisplay();
02052
02053 while ((ClassId = GetClassToDebug (Prompt)) != 0) {
02054 switch (ClassId) {
02055 case 'b':
02056 AdaptiveOn = TRUE;
02057 PreTrainedOn = FALSE;
02058 DebugMode = "Adaptive Templates Only";
02059 break;
02060
02061 case 'c':
02062 AdaptiveOn = FALSE;
02063 PreTrainedOn = TRUE;
02064 DebugMode = "PreTrained Templates Only";
02065 break;
02066
02067 case 'a':
02068 AdaptiveOn = TRUE;
02069 PreTrainedOn = TRUE;
02070 DebugMode = "All Templates";
02071 break;
02072
02073 default:
02074 LastClass = ClassId;
02075 break;
02076 }
02077
02078 ShowMatchDisplay();
02079 cprintf ("\nDebugging class = %c (%s) ...\n", LastClass, DebugMode);
02080 ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
02081 UpdateMatchDisplay();
02082 }
02083 }
02084 #endif
02085
02086
02110 void DoAdaptiveMatch(TBLOB *Blob,
02111 LINE_STATS *LineStats,
02112 ADAPT_RESULTS *Results) {
02113 char *Ambiguities;
02114
02115 AdaptiveMatcherCalls++;
02116 InitIntFX();
02117
02118 if (AdaptedTemplates->NumPermClasses < MinNumPermClasses
02119 || tess_cn_matching) {
02120 CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
02121 }
02122 else {
02123 Ambiguities = BaselineClassifier (Blob, LineStats,
02124 AdaptedTemplates, Results);
02125
02126 if (Results->NumMatches > 0 && MarginalMatch (Results->BestRating)
02127 && !tess_bn_matching) {
02128 CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
02129 }
02130 else if (Ambiguities && *Ambiguities) {
02131 AmbigClassifier(Blob,
02132 LineStats,
02133 PreTrainedTemplates,
02134 Ambiguities,
02135 Results);
02136 }
02137 }
02138
02139 if (Results->NumMatches == 0)
02140 ClassifyAsNoise(Blob, LineStats, Results);
02141 }
02142
02143
02175 void GetAdaptThresholds (TWERD * Word,
02176 LINE_STATS * LineStats,
02177 const char *BestChoice,
02178 const char *BestRawChoice, FLOAT32 Thresholds[]) {
02179 TBLOB *Blob;
02180
02181 if (EnableNewAdaptRules) {
02182 FindClassifierErrors(PerfectRating,
02183 GoodAdaptiveMatch,
02184 RatingMargin,
02185 Thresholds);
02186 }
02187 else {
02188 for (Blob = Word->blobs;
02189 Blob != NULL;
02190 Blob = Blob->next, BestChoice++, BestRawChoice++, Thresholds++)
02191 if (*BestChoice == *BestRawChoice)
02192 *Thresholds = GoodAdaptiveMatch;
02193 else {
02194
02195 *Thresholds = GetBestRatingFor (Blob, LineStats, *BestChoice);
02196 *Thresholds *= (1.0 - RatingMargin);
02197 if (*Thresholds > GoodAdaptiveMatch)
02198 *Thresholds = GoodAdaptiveMatch;
02199 if (*Thresholds < PerfectRating)
02200 *Thresholds = PerfectRating;
02201 }
02202 }
02203 }
02204
02205
02220 char *GetAmbiguities(TBLOB *Blob,
02221 LINE_STATS *LineStats,
02222 CLASS_ID CorrectClass) {
02223 ADAPT_RESULTS Results;
02224 char *Ambiguities;
02225 int i;
02226
02227 EnterClassifyMode;
02228
02229 Results.NumMatches = 0;
02230 Results.BestRating = WORST_POSSIBLE_RATING;
02231 Results.BestClass = NO_CLASS;
02232 Results.BestConfig = 0;
02233 InitMatcherRatings (Results.Ratings);
02234
02235 CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results);
02236 RemoveBadMatches(&Results);
02237
02238
02239 CurrentRatings = Results.Ratings;
02240 qsort ((void *) (Results.Classes), Results.NumMatches,
02241 sizeof (CLASS_ID), CompareCurrentRatings);
02242
02243
02244
02245 Ambiguities = (char *) Emalloc (sizeof (char) * (Results.NumMatches + 1));
02246 if (Results.NumMatches > 1 ||
02247 Results.NumMatches == 1 && Results.Classes[0] != CorrectClass) {
02248 for (i = 0; i < Results.NumMatches; i++)
02249 Ambiguities[i] = Results.Classes[i];
02250 Ambiguities[i] = '\0';
02251 }
02252 else
02253 Ambiguities[0] = '\0';
02254
02255 return (Ambiguities);
02256
02257 }
02258
02259
02281 int GetBaselineFeatures(TBLOB *Blob,
02282 LINE_STATS *LineStats,
02283 INT_TEMPLATES Templates,
02284 INT_FEATURE_ARRAY IntFeatures,
02285 CLASS_NORMALIZATION_ARRAY CharNormArray,
02286 FLOAT32 *BlobLength) {
02287 FEATURE_SET Features;
02288 int NumFeatures;
02289
02290 if (EnableIntFX)
02291 return (GetIntBaselineFeatures (Blob, LineStats, Templates,
02292 IntFeatures, CharNormArray, BlobLength));
02293
02294 NormMethod = baseline;
02295 Features = ExtractPicoFeatures (Blob, LineStats);
02296
02297 NumFeatures = NumFeaturesIn (Features);
02298 *BlobLength = NumFeatures * GetPicoFeatureLength ();
02299 if (NumFeatures > UNLIKELY_NUM_FEAT) {
02300 FreeFeatureSet(Features);
02301 return (0);
02302 }
02303
02304 ComputeIntFeatures(Features, IntFeatures);
02305 ClearCharNormArray(Templates, CharNormArray);
02306
02307 FreeFeatureSet(Features);
02308 return (NumFeatures);
02309
02310 }
02311
02312
02329 FLOAT32 GetBestRatingFor(TBLOB *Blob,
02330 LINE_STATS *LineStats,
02331 CLASS_ID ClassId) {
02332 int CNOutlineLength, BLOutlineLength;
02333 int NumCNFeatures, NumBLFeatures;
02334 INT_FEATURE_ARRAY CNFeatures, BLFeatures;
02335 INT_RESULT_STRUCT CNResult, BLResult;
02336 CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
02337 CLASS_INDEX ClassIndex;
02338 FLOAT32 BlobLength;
02339
02340 CNResult.Rating = BLResult.Rating = 1.0;
02341
02342 if (!LegalClassId (ClassId))
02343 return (1.0);
02344
02345 if (!UnusedClassIdIn (PreTrainedTemplates, ClassId)) {
02346 NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
02347 PreTrainedTemplates,
02348 CNFeatures, CNAdjust, &BlobLength);
02349 if (NumCNFeatures > 0) {
02350 CNOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
02351 ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
02352
02353 SetCharNormMatch();
02354 IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
02355 AllProtosOn, AllConfigsOn,
02356 CNOutlineLength, NumCNFeatures, CNFeatures, 0,
02357 CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
02358 }
02359 }
02360
02361 if (!UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) {
02362 NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
02363 AdaptedTemplates->Templates,
02364 BLFeatures, BLAdjust, &BlobLength);
02365 if (NumBLFeatures > 0) {
02366 BLOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
02367 ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
02368
02369 SetBaseLineMatch();
02370 IntegerMatcher (ClassForClassId
02371 (AdaptedTemplates->Templates, ClassId),
02372 AdaptedTemplates->Class[ClassIndex]->PermProtos,
02373 AdaptedTemplates->Class[ClassIndex]->PermConfigs,
02374 BLOutlineLength, NumBLFeatures, BLFeatures, 0,
02375 BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
02376 }
02377 }
02378
02379 return (MIN (BLResult.Rating, CNResult.Rating));
02380
02381 }
02382
02383
02407 int GetCharNormFeatures(TBLOB *Blob,
02408 LINE_STATS *LineStats,
02409 INT_TEMPLATES Templates,
02410 INT_FEATURE_ARRAY IntFeatures,
02411 CLASS_NORMALIZATION_ARRAY CharNormArray,
02412 FLOAT32 *BlobLength) {
02413 return (GetIntCharNormFeatures (Blob, LineStats, Templates,
02414 IntFeatures, CharNormArray, BlobLength));
02415 }
02416
02417
02444 int GetIntBaselineFeatures(TBLOB *Blob,
02445 LINE_STATS *LineStats,
02446 INT_TEMPLATES Templates,
02447 INT_FEATURE_ARRAY IntFeatures,
02448 CLASS_NORMALIZATION_ARRAY CharNormArray,
02449 FLOAT32 *BlobLength) {
02450 register INT_FEATURE Src, Dest, End;
02451
02452 if (!FeaturesHaveBeenExtracted) {
02453 FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
02454 CharNormFeatures, &FXInfo);
02455 FeaturesHaveBeenExtracted = TRUE;
02456 }
02457
02458 if (!FeaturesOK) {
02459 *BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
02460 return (0);
02461 }
02462
02463 for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
02464 Src < End; *Dest++ = *Src++);
02465
02466 ClearCharNormArray(Templates, CharNormArray);
02467 *BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
02468 return (FXInfo.NumBL);
02469
02470 }
02471
02472
02499 int GetIntCharNormFeatures(TBLOB *Blob,
02500 LINE_STATS *LineStats,
02501 INT_TEMPLATES Templates,
02502 INT_FEATURE_ARRAY IntFeatures,
02503 CLASS_NORMALIZATION_ARRAY CharNormArray,
02504 FLOAT32 *BlobLength) {
02505 register INT_FEATURE Src, Dest, End;
02506 FEATURE NormFeature;
02507 FLOAT32 Baseline, Scale;
02508
02509 if (!FeaturesHaveBeenExtracted) {
02510 FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures,
02511 CharNormFeatures, &FXInfo);
02512 FeaturesHaveBeenExtracted = TRUE;
02513 }
02514
02515 if (!FeaturesOK) {
02516 *BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
02517 return (0);
02518 }
02519
02520 for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
02521 Src < End; *Dest++ = *Src++);
02522
02523 NormFeature = NewFeature (&CharNormDesc);
02524 Baseline = BaselineAt (LineStats, FXInfo.Xmean);
02525 Scale = ComputeScaleFactor (LineStats);
02526 ParamOf (NormFeature, CharNormY) = (FXInfo.Ymean - Baseline) * Scale;
02527 ParamOf (NormFeature, CharNormLength) =
02528 FXInfo.Length * Scale / LENGTH_COMPRESSION;
02529 ParamOf (NormFeature, CharNormRx) = FXInfo.Rx * Scale;
02530 ParamOf (NormFeature, CharNormRy) = FXInfo.Ry * Scale;
02531 ComputeIntCharNormArray(NormFeature, Templates, CharNormArray);
02532 FreeFeature(NormFeature);
02533
02534 *BlobLength = FXInfo.Length * Scale;
02535 return (FXInfo.NumCN);
02536
02537 }
02538
02539
02550 void InitMatcherRatings(register FLOAT32 *Rating) {
02551 register FLOAT32 *LastRating;
02552 register FLOAT32 WorstRating = WORST_POSSIBLE_RATING;
02553
02554 for (LastRating = Rating + MAX_CLASS_ID;
02555 Rating <= LastRating; *Rating++ = WorstRating);
02556
02557 }
02558
02559
02576 void MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
02577 CLASS_ID ClassId,
02578 int NumFeatures,
02579 INT_FEATURE_ARRAY Features,
02580 FEATURE_SET FloatFeatures) {
02581 CLASS_INDEX ClassIndex;
02582 INT_CLASS IClass;
02583 ADAPT_CLASS Class;
02584 PROTO_ID OldProtos[MAX_NUM_PROTOS];
02585 FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
02586 int NumOldProtos;
02587 int NumBadFeatures;
02588 int MaxProtoId, OldMaxProtoId;
02589 int BlobLength = 0;
02590 int MaskSize;
02591 int ConfigId;
02592 TEMP_CONFIG Config;
02593 int i;
02594 int debug_level = NO_DEBUG;
02595
02596 if (LearningDebugLevel >= 3)
02597 debug_level =
02598 PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES;
02599
02600 ClassIndex = IndexForClassId (Templates->Templates, ClassId);
02601 IClass = ClassForClassId (Templates->Templates, ClassId);
02602 Class = Templates->Class[ClassIndex];
02603
02604 if (NumIntConfigsIn (IClass) >= MAX_NUM_CONFIGS)
02605 return;
02606
02607 OldMaxProtoId = NumIntProtosIn (IClass) - 1;
02608
02609 NumOldProtos = FindGoodProtos (IClass, AllProtosOn, AllConfigsOff,
02610 BlobLength, NumFeatures, Features,
02611 OldProtos, debug_level);
02612 NumOldProtos = 0;
02613
02614 MaskSize = WordsInVectorOfSize (MAX_NUM_PROTOS);
02615 zero_all_bits(TempProtoMask, MaskSize);
02616 for (i = 0; i < NumOldProtos; i++)
02617 SET_BIT (TempProtoMask, OldProtos[i]);
02618
02619 NumBadFeatures = FindBadFeatures (IClass, TempProtoMask, AllConfigsOn,
02620 BlobLength, NumFeatures, Features,
02621 BadFeatures, debug_level);
02622
02623 MaxProtoId = MakeNewTempProtos (FloatFeatures, NumBadFeatures, BadFeatures,
02624 IClass, Class, TempProtoMask);
02625 if (MaxProtoId == NO_PROTO)
02626 return;
02627
02628 ConfigId = AddIntConfig (IClass);
02629 ConvertConfig(TempProtoMask, ConfigId, IClass);
02630 Config = NewTempConfig (MaxProtoId);
02631 TempConfigFor (Class, ConfigId) = Config;
02632 copy_all_bits (TempProtoMask, Config->Protos, Config->ProtoVectorSize);
02633
02634 if (LearningDebugLevel >= 1)
02635 cprintf ("Making new temp config %d using %d old and %d new protos.\n",
02636 ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId);
02637
02638 }
02639
02640
02663 PROTO_ID
02664 MakeNewTempProtos (FEATURE_SET Features,
02665 int NumBadFeat,
02666 FEATURE_ID BadFeat[],
02667 INT_CLASS IClass,
02668 ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) {
02669 FEATURE_ID *ProtoStart;
02670 FEATURE_ID *ProtoEnd;
02671 FEATURE_ID *LastBad;
02672 TEMP_PROTO TempProto;
02673 PROTO Proto;
02674 FEATURE F1, F2;
02675 FLOAT32 X1, X2, Y1, Y2;
02676 FLOAT32 A1, A2, AngleDelta;
02677 FLOAT32 SegmentLength;
02678 PROTO_ID Pid;
02679
02680 for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
02681 ProtoStart < LastBad; ProtoStart = ProtoEnd) {
02682 F1 = FeatureIn (Features, *ProtoStart);
02683 X1 = ParamOf (F1, PicoFeatX);
02684 Y1 = ParamOf (F1, PicoFeatY);
02685 A1 = ParamOf (F1, PicoFeatDir);
02686
02687 for (ProtoEnd = ProtoStart + 1,
02688 SegmentLength = GetPicoFeatureLength ();
02689 ProtoEnd < LastBad;
02690 ProtoEnd++, SegmentLength += GetPicoFeatureLength ()) {
02691 F2 = FeatureIn (Features, *ProtoEnd);
02692 X2 = ParamOf (F2, PicoFeatX);
02693 Y2 = ParamOf (F2, PicoFeatY);
02694 A2 = ParamOf (F2, PicoFeatDir);
02695
02696 AngleDelta = fabs (A1 - A2);
02697 if (AngleDelta > 0.5)
02698 AngleDelta = 1.0 - AngleDelta;
02699
02700 if (AngleDelta > MaxAngleDelta ||
02701 fabs (X1 - X2) > SegmentLength ||
02702 fabs (Y1 - Y2) > SegmentLength)
02703 break;
02704 }
02705
02706 F2 = FeatureIn (Features, *(ProtoEnd - 1));
02707 X2 = ParamOf (F2, PicoFeatX);
02708 Y2 = ParamOf (F2, PicoFeatY);
02709 A2 = ParamOf (F2, PicoFeatDir);
02710
02711 Pid = AddIntProto (IClass);
02712 if (Pid == NO_PROTO)
02713 return (NO_PROTO);
02714
02715 TempProto = NewTempProto ();
02716 Proto = &(TempProto->Proto);
02717
02718
02719 ProtoLength (Proto) = SegmentLength;
02720 ProtoAngle (Proto) = A1;
02721 ProtoX (Proto) = (X1 + X2) / 2.0;
02722 ProtoY (Proto) = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
02723 FillABC(Proto);
02724
02725 TempProto->ProtoId = Pid;
02726 SET_BIT(TempProtoMask, Pid);
02727
02728 ConvertProto(Proto, Pid, IClass);
02729 AddProtoToProtoPruner(Proto, Pid, IClass);
02730
02731 Class->TempProtos = push (Class->TempProtos, TempProto);
02732 }
02733 return (NumIntProtosIn (IClass) - 1);
02734 }
02735
02736
02750 void MakePermanent(ADAPT_TEMPLATES Templates,
02751 CLASS_ID ClassId,
02752 int ConfigId,
02753 TBLOB *Blob,
02754 LINE_STATS *LineStats) {
02755 char *Ambigs;
02756 TEMP_CONFIG Config;
02757 CLASS_INDEX ClassIndex;
02758 ADAPT_CLASS Class;
02759 PROTO_KEY ProtoKey;
02760
02761 ClassIndex = IndexForClassId (Templates->Templates, ClassId);
02762 Class = Templates->Class[ClassIndex];
02763 Config = TempConfigFor (Class, ConfigId);
02764
02765 MakeConfigPermanent(Class, ConfigId);
02766 if (Class->NumPermConfigs == 0)
02767 Templates->NumPermClasses++;
02768 Class->NumPermConfigs++;
02769
02770 ProtoKey.Templates = Templates;
02771 ProtoKey.ClassId = ClassId;
02772 ProtoKey.ConfigId = ConfigId;
02773 Class->TempProtos = delete_d (Class->TempProtos, &ProtoKey,
02774 MakeTempProtoPerm);
02775 FreeTempConfig(Config);
02776
02777 Ambigs = GetAmbiguities (Blob, LineStats, ClassId);
02778 PermConfigFor (Class, ConfigId) = Ambigs;
02779
02780 if (LearningDebugLevel >= 1)
02781 cprintf ("Making config %d permanent with ambiguities '%s'.\n",
02782 ConfigId, Ambigs);
02783
02784 }
02785
02786
02799 int MakeTempProtoPerm(void *item1,
02800 void *item2) {
02801 CLASS_INDEX ClassIndex;
02802 ADAPT_CLASS Class;
02803 TEMP_CONFIG Config;
02804 TEMP_PROTO TempProto;
02805 PROTO_KEY *ProtoKey;
02806
02807 TempProto = (TEMP_PROTO) item1;
02808 ProtoKey = (PROTO_KEY *) item2;
02809
02810 ClassIndex = IndexForClassId (ProtoKey->Templates->Templates,
02811 ProtoKey->ClassId);
02812 Class = ProtoKey->Templates->Class[ClassIndex];
02813 Config = TempConfigFor (Class, ProtoKey->ConfigId);
02814
02815 if (TempProto->ProtoId > Config->MaxProtoId ||
02816 !test_bit (Config->Protos, TempProto->ProtoId))
02817 return (FALSE);
02818
02819 MakeProtoPermanent (Class, TempProto->ProtoId);
02820 AddProtoToClassPruner (&(TempProto->Proto), ProtoKey->ClassId,
02821 ProtoKey->Templates->Templates);
02822 FreeTempProto(TempProto);
02823
02824 return (TRUE);
02825
02826 }
02827
02828
02838 int NumBlobsIn(TWERD *Word) {
02839 register TBLOB *Blob;
02840 register int NumBlobs;
02841
02842 if (Word == NULL)
02843 return (0);
02844
02845 for (Blob = Word->blobs, NumBlobs = 0;
02846 Blob != NULL; Blob = Blob->next, NumBlobs++);
02847
02848 return (NumBlobs);
02849
02850 }
02851
02852
02862 int NumOutlinesInBlob(TBLOB *Blob) {
02863 register TESSLINE *Outline;
02864 register int NumOutlines;
02865
02866 if (Blob == NULL)
02867 return (0);
02868
02869 for (Outline = Blob->outlines, NumOutlines = 0;
02870 Outline != NULL; Outline = Outline->next, NumOutlines++);
02871
02872 return (NumOutlines);
02873
02874 }
02875
02876
02887 void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
02888 int i;
02889
02890 if (Results->NumMatches > 0) {
02891 cprintf ("%c(%d) %4.1f ", Results->Classes[0], Results->Classes[0],
02892 Results->Ratings[Results->Classes[0]] * 100.0);
02893
02894 for (i = 1; i < Results->NumMatches; i++) {
02895 cprintf ("%c(%d) %4.1f ", Results->Classes[i],
02896 Results->Classes[i],
02897 Results->Ratings[Results->Classes[i]] * 100.0);
02898 }
02899 }
02900 }
02901
02902
02915 void RemoveBadMatches(ADAPT_RESULTS *Results) {
02916 int Next, NextGood;
02917 FLOAT32 *Rating = Results->Ratings;
02918 CLASS_ID *Match = Results->Classes;
02919 FLOAT32 BadMatchThreshold;
02920 static const char* romans = "ivxIVX";
02921 BadMatchThreshold = Results->BestRating + BadMatchPad;
02922
02923 if (bln_numericmode) {
02924 for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
02925 if (Rating[Match[Next]] <= BadMatchThreshold) {
02926 if (!isalpha(Match[Next]) || strchr(romans, Match[Next]) != NULL) {
02927 Match[NextGood++] = Match[Next];
02928 } else if (Match[Next] == 'l' && Rating['1'] >= BadMatchThreshold) {
02929 Match[NextGood++] = '1';
02930 Rating['1'] = Rating['l'];
02931 } else if (Match[Next] == 'O' && Rating['0'] >= BadMatchThreshold) {
02932 Match[NextGood++] = '0';
02933 Rating['0'] = Rating['O'];
02934 }
02935 }
02936 }
02937 }
02938 else {
02939 for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
02940 if (Rating[Match[Next]] <= BadMatchThreshold)
02941 Match[NextGood++] = Match[Next];
02942 }
02943 }
02944
02945 Results->NumMatches = NextGood;
02946
02947 }
02948
02949
02962 void RemoveExtraPuncs(ADAPT_RESULTS *Results) {
02963 int Next, NextGood;
02964 int punc_count;
02965 int digit_count;
02966 CLASS_ID *Match = Results->Classes;
02967
02968 static char punc_chars[] = ".,;:/`~'-=\\|\"!_^";
02969 static char digit_chars[] = "0123456789";
02970
02971 punc_count = 0;
02972 digit_count = 0;
02973 for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
02974 if (strchr (punc_chars, Match[Next]) == NULL) {
02975 if (strchr (digit_chars, Match[Next]) == NULL) {
02976 Match[NextGood++] = Match[Next];
02977 }
02978 else {
02979 if (digit_count < 1)
02980 Match[NextGood++] = Match[Next];
02981 digit_count++;
02982 }
02983 }
02984 else {
02985 if (punc_count < 2)
02986 Match[NextGood++] = Match[Next];
02987 punc_count++;
02988 }
02989 }
02990 Results->NumMatches = NextGood;
02991 }
02992
02993
03004 void SetAdaptiveThreshold(FLOAT32 Threshold) {
03005 if (Threshold == GoodAdaptiveMatch) {
03006
03007
03008 SetProtoThresh (0.9);
03009 SetFeatureThresh (0.9);
03010 }
03011 else {
03012
03013 SetProtoThresh (1.0 - Threshold);
03014 SetFeatureThresh (1.0 - Threshold);
03015 }
03016 }
03017
03018
03039 void ShowBestMatchFor(TBLOB *Blob,
03040 LINE_STATS *LineStats,
03041 CLASS_ID ClassId,
03042 BOOL8 AdaptiveOn,
03043 BOOL8 PreTrainedOn) {
03044 int CNOutlineLength = 0, BLOutlineLength = 0;
03045 int NumCNFeatures = 0, NumBLFeatures = 0;
03046 INT_FEATURE_ARRAY CNFeatures, BLFeatures;
03047 INT_RESULT_STRUCT CNResult, BLResult;
03048 CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
03049 CLASS_INDEX ClassIndex;
03050 FLOAT32 BlobLength;
03051 UINT32 ConfigMask;
03052 static int next_config = -1;
03053
03054 if (PreTrainedOn) next_config = -1;
03055
03056 CNResult.Rating = BLResult.Rating = 2.0;
03057
03058 if (!LegalClassId (ClassId)) {
03059 cprintf ("%c is not a legal class!!\n", ClassId);
03060 return;
03061 }
03062
03063 if (PreTrainedOn)
03064 if (UnusedClassIdIn (PreTrainedTemplates, ClassId))
03065 cprintf ("No built-in templates for class '%c'\n", ClassId);
03066 else {
03067 NumCNFeatures = GetCharNormFeatures (Blob, LineStats,
03068 PreTrainedTemplates,
03069 CNFeatures, CNAdjust,
03070 &BlobLength);
03071 if (NumCNFeatures <= 0)
03072 cprintf ("Illegal blob (char norm features)!\n");
03073 else {
03074 CNOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
03075 ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
03076
03077 SetCharNormMatch();
03078 IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
03079 AllProtosOn, AllConfigsOn,
03080 CNOutlineLength, NumCNFeatures, CNFeatures, 0,
03081 CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
03082
03083 cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n",
03084 CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassIndex]);
03085 }
03086 }
03087
03088 if (AdaptiveOn)
03089 if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId))
03090 cprintf ("No AD templates for class '%c'\n", ClassId);
03091 else {
03092 NumBLFeatures = GetBaselineFeatures (Blob, LineStats,
03093 AdaptedTemplates->Templates,
03094 BLFeatures, BLAdjust,
03095 &BlobLength);
03096 if (NumBLFeatures <= 0)
03097 cprintf ("Illegal blob (baseline features)!\n");
03098 else {
03099 BLOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
03100 ClassIndex =
03101 IndexForClassId (AdaptedTemplates->Templates, ClassId);
03102
03103 SetBaseLineMatch();
03104 IntegerMatcher (ClassForClassId
03105 (AdaptedTemplates->Templates, ClassId),
03106 AllProtosOn, AllConfigsOn,
03107
03108
03109 BLOutlineLength, NumBLFeatures, BLFeatures, 0,
03110 BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
03111
03112 #ifndef SECURE_NAMES
03113 cprintf ("Best adaptive template match is config %2d (%4.1f)\n",
03114 BLResult.Config, BLResult.Rating * 100.0);
03115 #endif
03116 }
03117 }
03118
03119 cprintf ("\n");
03120 if (BLResult.Rating < CNResult.Rating) {
03121 ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
03122 if (next_config < 0) {
03123 ConfigMask = 1 << BLResult.Config;
03124 next_config = 0;
03125 } else {
03126 ConfigMask = 1 << next_config;
03127 ++next_config;
03128 }
03129 NormMethod = baseline;
03130
03131 SetBaseLineMatch();
03132 IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId),
03133 AllProtosOn,
03134
03135 (BIT_VECTOR) & ConfigMask,
03136 BLOutlineLength, NumBLFeatures, BLFeatures, 0,
03137 BLAdjust[ClassIndex], &BLResult, MatchDebugFlags);
03138 cprintf ("Adaptive template match for config %2d is %4.1f\n",
03139 BLResult.Config, BLResult.Rating * 100.0);
03140 }
03141 else {
03142 ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
03143 ConfigMask = 1 << CNResult.Config;
03144 NormMethod = character;
03145
03146 SetCharNormMatch();
03147
03148 IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask,
03149 CNOutlineLength, NumCNFeatures, CNFeatures, 0,
03150 CNAdjust[ClassIndex], &CNResult, MatchDebugFlags);
03151 }
03152 }