dict/stopper.cpp File Reference

#include "stopper.h"
#include "emalloc.h"
#include "matchdefs.h"
#include "debug.h"
#include "callcpp.h"
#include "permute.h"
#include "context.h"
#include "permnum.h"
#include "danerror.h"
#include "const.h"
#include "freelist.h"
#include "efio.h"
#include "globals.h"
#include "scanutils.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

Go to the source code of this file.

Classes

Defines

Typedefs

Functions

Variables


Define Documentation

#define AmbigThreshold ( F1,
F2   ) 

Value:

(((F2) - (F1)) * AmbigThresholdGain - \
            AmbigThresholdOffset)

Definition at line 107 of file stopper.cpp.

Referenced by FreeBadChoice(), and LogNewWordChoice().

#define BestCertainty ( Choices   )     (((VIABLE_CHOICE) first (Choices))->Certainty)

Definition at line 103 of file stopper.cpp.

Referenced by LogNewWordChoice().

#define BestFactor ( Choices   )     (((VIABLE_CHOICE) first (Choices))->AdjustFactor)

Definition at line 105 of file stopper.cpp.

Referenced by LogNewWordChoice().

#define BestRating ( Choices   )     (((VIABLE_CHOICE) first (Choices))->Rating)

Definition at line 104 of file stopper.cpp.

Referenced by AddNewResult(), and LogNewWordChoice().

#define DANGEROUS_AMBIGS   "tessdata/DangAmbigs"

File containing letter patterns that give tess fits

Definition at line 51 of file stopper.cpp.

Referenced by InitStopperVars().

#define MAX_AMBIG_SIZE   3

Definition at line 49 of file stopper.cpp.

Referenced by FillAmbigTable().

#define MAX_WERD_SIZE   100

Definition at line 48 of file stopper.cpp.

Referenced by NoDangerousAmbig().


Typedef Documentation

AMBIG_TABLE

FIX: Related to stopping criteria for word classifier.

Definition at line 57 of file stopper.cpp.

VIABLE_CHOICE

FIX: Related to stopping criteria for word classifier.

Definition at line 87 of file stopper.cpp.


Function Documentation

int AcceptableChoice ( CHOICES_LIST  Choices,
A_CHOICE BestChoice,
A_CHOICE RawChoice,
DANGERR fixpt 
)

Return TRUE if the results from this segmentation are good enough to stop; else FALSE.

Parameters:
Choices Choices for current segmentation
BestChoice Best choice for current segmentation
RawChoice Best raw choice for current segmentation
fixpt point to fix
Note:
Globals:
  • NonDictCertainty certainty for a non-dict word
  • SmallWordSize size of word to be treated as non-word
  • CertaintyPerChar certainty to add for each dict char
Returns:
TRUE or FALSE.
Note:
Exceptions: none
Date:
Mon Apr 29 14:57:32 1991, DSJ, Created.

Definition at line 226 of file stopper.cpp.

References case_ok(), CertaintyPerChar, class_certainty, class_string, cprintf(), FALSE, DANGERR::index, LengthOfShortestAlphaRun(), NoDangerousAmbig(), NonDictCertainty, NULL, punctuation_ok(), StopperDebugLevel, TRUE, UniformCertainties(), valid_number(), and valid_word().

Referenced by chop_word_main(), evaluate_state(), and improve_by_chopping().

00229                                      {
00230   float CertaintyThreshold = NonDictCertainty;
00231   int WordSize;
00232 
00233   if (fixpt != NULL)
00234     fixpt->index = -1;
00235   if ((BestChoice == NULL) || (class_string (BestChoice) == NULL))
00236     return (FALSE);
00237 
00238   if (StopperDebugLevel >= 1)
00239     cprintf ("\nStopper:  %s (word=%c, case=%c, punct=%c)\n",
00240       class_string (BestChoice),
00241       (valid_word (class_string (BestChoice)) ? 'y' : 'n'),
00242     (case_ok (class_string (BestChoice)) ? 'y' : 'n'),
00243     ((punctuation_ok (class_string (BestChoice)) !=
00244     -1) ? 'y' : 'n'));
00245 
00246   if (valid_word (class_string (BestChoice)) &&
00247     case_ok (class_string (BestChoice)) &&
00248   punctuation_ok (class_string (BestChoice)) != -1) {
00249     WordSize = LengthOfShortestAlphaRun (class_string (BestChoice));
00250     WordSize -= SmallWordSize;
00251     if (WordSize < 0)
00252       WordSize = 0;
00253     CertaintyThreshold += WordSize * CertaintyPerChar;
00254   }
00255   else if (stopper_numbers_on && valid_number (class_string (BestChoice))) {
00256     CertaintyThreshold += stopper_numbers_on * CertaintyPerChar;
00257   }
00258 
00259   if (StopperDebugLevel >= 1)
00260     cprintf ("Stopper:  Certainty = %4.1f, Threshold = %4.1f\n",
00261       class_certainty (BestChoice), CertaintyThreshold);
00262 
00263   if (NoDangerousAmbig (class_string (BestChoice), fixpt)
00264     && class_certainty (BestChoice) > CertaintyThreshold &&
00265     UniformCertainties (Choices, BestChoice))
00266     return (TRUE);
00267   else
00268     return (FALSE);
00269 
00270 }                                /* AcceptableChoice */

int AcceptableResult ( A_CHOICE BestChoice,
A_CHOICE RawChoice 
)

Return FALSE if the best choice for the current word is questionable and should be tried again on the second pass or should be flagged to the user.

Parameters:
BestChoice Best choice for current word
RawChoice Best raw choice for current word
Note:
Globals:
  • NonDictCertainty Certainty for a non-dict word
  • SmallWordSize Size of word to be treated as non-word
  • CertaintyPerChar Certainty to add for each dict char
  • BestChoices List of all good choices found
  • RejectOffset Allowed offset before a word is rejected
  • StopperDebugLevel Debugging level
Returns:
TRUE or FALSE.
Note:
Exceptions: none
Date:
Thu May 9 14:05:05 1991, DSJ, Created.

Definition at line 292 of file stopper.cpp.

References BestChoices, case_ok(), CertaintyPerChar, class_certainty, class_string, cprintf(), CurrentWordAmbig(), FALSE, LengthOfShortestAlphaRun(), NIL, NonDictCertainty, NULL, punctuation_ok(), RejectOffset, rest, StopperDebugLevel, TRUE, and valid_word().

Referenced by tess_acceptable_word().

00292                                                                 { 
00293   float CertaintyThreshold = NonDictCertainty - RejectOffset;
00294   int WordSize;
00295 
00296   if (StopperDebugLevel >= 1)
00297     cprintf ("\nRejecter: %s (word=%c, case=%c, punct=%c, unambig=%c)\n",
00298       class_string (BestChoice),
00299       (valid_word (class_string (BestChoice)) ? 'y' : 'n'),
00300     (case_ok (class_string (BestChoice)) ? 'y' : 'n'),
00301     ((punctuation_ok (class_string (BestChoice)) != -1) ? 'y' : 'n'),
00302     ((rest (BestChoices) != NIL) ? 'n' : 'y'));
00303 
00304   if ((BestChoice == NULL) ||
00305     (class_string (BestChoice) == NULL) || CurrentWordAmbig ())
00306     return (FALSE);
00307 
00308   if (valid_word (class_string (BestChoice)) &&
00309     case_ok (class_string (BestChoice)) &&
00310   punctuation_ok (class_string (BestChoice)) != -1) {
00311     WordSize = LengthOfShortestAlphaRun (class_string (BestChoice));
00312     WordSize -= SmallWordSize;
00313     if (WordSize < 0)
00314       WordSize = 0;
00315     CertaintyThreshold += WordSize * CertaintyPerChar;
00316   }
00317 
00318   if (StopperDebugLevel >= 1)
00319     cprintf ("Rejecter: Certainty = %4.1f, Threshold = %4.1f   ",
00320       class_certainty (BestChoice), CertaintyThreshold);
00321 
00322   if (class_certainty (BestChoice) > CertaintyThreshold) {
00323     if (StopperDebugLevel >= 1)
00324       cprintf ("ACCEPTED\n");
00325     return (TRUE);
00326   }
00327   else {
00328     if (StopperDebugLevel >= 1)
00329       cprintf ("REJECTED\n");
00330     return (FALSE);
00331   }
00332 }                                /* AcceptableResult */

void AddNewChunk ( VIABLE_CHOICE  Choice,
int  Blob 
)

Increments the chunk count of the character in Choice which corresponds to Blob.

Parameters:
Choice choice to add a new chunk to
Blob index of blob being split
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Mon May 20 11:43:27 1991, DSJ, Created.

Definition at line 858 of file stopper.cpp.

References assert(), VIABLE_CHOICE_STRUCT::Blob, cprintf(), FALSE, VIABLE_CHOICE_STRUCT::Length, mem_tidy(), and CHAR_CHOICE::NumChunks.

Referenced by LogNewSplit().

00858                                                  { 
00859   int i, LastChunk;
00860 
00861   for (i = 0, LastChunk = 0; i < Choice->Length; i++) {
00862     LastChunk += Choice->Blob[i].NumChunks;
00863     if (Blob < LastChunk) {
00864       (Choice->Blob[i].NumChunks)++;
00865       return;
00866     }
00867   }
00868   mem_tidy (1);
00869   cprintf ("AddNewChunk failed:Choice->Length=%d, LastChunk=%d, Blob=%d\n",
00870     Choice->Length, LastChunk, Blob);
00871   assert(FALSE);  /* this should never get executed */
00872 
00873 }                                /* AddNewChunk */

int AlternativeChoicesWorseThan ( FLOAT32  Threshold  ) 

Returns TRUE if there are no alternative choices for the current word OR if all alternatives have an adjust factor worse than Threshold.

Parameters:
Threshold Minimum adjust factor for alternative choices
Note:
Globals: BestChoices Alternative choices for current word
Returns:
TRUE or FALSE.
Note:
Exceptions: none
Date:
Mon Jun 3 09:36:31 1991, DSJ, Created.

Definition at line 347 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::AdjustFactor, BestChoices, FALSE, first, iterate, rest, and TRUE.

Referenced by AdaptableWord().

00347                                                    { 
00348   LIST Alternatives;
00349   VIABLE_CHOICE Choice;
00350 
00351   Alternatives = rest (BestChoices);
00352   iterate(Alternatives) {
00353     Choice = (VIABLE_CHOICE) first (Alternatives);
00354     if (Choice->AdjustFactor <= Threshold)
00355       return (FALSE);
00356   }
00357 
00358   return (TRUE);
00359 
00360 }                                /* AlternativeChoicesWorseThan */

int AmbigsFound ( char *  Word,
char *  CurrentChar,
const char *  Tail,
LIST  Ambigs,
DANGERR fixpt 
)

For each ambiguity in Ambigs, see if the remainder of the test string matches the start of Tail.

Parameters:
Word Word being tested for ambiguities
CurrentChar Position in Word to put ambig replacement
Tail End of word to place after ambiguity
Ambigs List of ambiguities to test at this position
fixpt point to fix
Note:
Globals: none
Returns:
TRUE if the Word is ambiguous at the specified position
If it does, construct a word consisting of the contents of Word up to, but not including, CurrentChar followed by the replacement string for the ambiguity followed by the unmatched contents of Tail. Then test this word to see if it is a dictionary word. If it is return TRUE. If none of the ambiguities result in a dictionary word, return FALSE.
Note:
Exceptions: none
Date:
Thu May 9 10:10:28 1991, DSJ, Created.

Definition at line 898 of file stopper.cpp.

References DANGERR::bad_length, cprintf(), FALSE, first, DANGERR::good_length, iterate, NULL, StopperDebugLevel, TRUE, and valid_word().

Referenced by NoDangerousAmbig().

00902                                 {
00903   char *AmbigSpec;
00904   const char *UnmatchedTail;
00905   int Matches;
00906   int bad_length;
00907 
00908   iterate(Ambigs) {
00909     AmbigSpec = (char *) first (Ambigs);
00910     bad_length = 1;
00911     UnmatchedTail = Tail;
00912     Matches = TRUE;
00913 
00914     while (*AmbigSpec != ' ' && Matches)
00915     if (*AmbigSpec == *UnmatchedTail) {
00916       AmbigSpec++;
00917       UnmatchedTail++;
00918       bad_length++;
00919     }
00920     else
00921       Matches = FALSE;
00922 
00923     if (Matches) {
00924       AmbigSpec++;               /* skip over the space */
00925                                  /* insert replacement string */
00926       strcpy(CurrentChar, AmbigSpec);
00927                                  /* add tail */
00928       strcat(Word, UnmatchedTail);
00929       if (valid_word (Word)) {
00930         if (StopperDebugLevel >= 1)
00931           cprintf ("Stopper:  Possible ambiguous word = %s\n", Word);
00932         if (fixpt != NULL) {
00933           fixpt->good_length = strlen (AmbigSpec);
00934           fixpt->bad_length = bad_length;
00935         }
00936         return (TRUE);
00937       }
00938     }
00939   }
00940   return (FALSE);
00941 
00942 }                                /* AmbigsFound */

int ChoiceSameAs ( A_CHOICE Choice,
VIABLE_CHOICE  ViableChoice 
)

Compares the corresponding strings of Choice and ViableChoice and returns TRUE if they are the same, else FALSE.

Parameters:
Choice Choice to compare to ViableChoice
ViableChoice Viable choice to compare to Choice
Note:
Globals: none
Returns:
TRUE or FALSE.
Note:
Exceptions: none
Date:
Fri May 17 08:48:04 1991, DSJ, Created.

Definition at line 957 of file stopper.cpp.

References class_string, and StringSameAs().

Referenced by LogNewRawChoice(), and LogNewWordChoice().

00957                                                                { 
00958   return (StringSameAs (class_string (Choice), ViableChoice));
00959 
00960 }                                /* ChoiceSameAs */

int CmpChoiceRatings ( void *  arg1,
void *  arg2 
)

Return -1 if the rating for Choice1 is less than the rating for Choice2, otherwise return (1).

Parameters:
arg1 Choice to compare ratings for
arg2 Choice to compare ratings for
Note:
Globals: none
Returns:
-1 or 1
Note:
Exceptions: none
Date:
Wed May 15 13:02:37 1991, DSJ, Created.

Definition at line 975 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::Rating.

Referenced by LogNewWordChoice().

00976                                  {
00977   float R1, R2;
00978   VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1;
00979   VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2;
00980 
00981   R1 = Choice1->Rating;
00982   R2 = Choice2->Rating;
00983 
00984   if (R1 < R2)
00985     return (-1);
00986   else
00987     return (1);
00988 
00989 }                                /* CmpChoiceRatings */

FLOAT32 CurrentBestChoiceAdjustFactor (  ) 

Return the adjustment factor for the best choice for the current word.

Parameters:
none 
Note:
Globals: BestChoices Set of best choices for current word
Returns:
Adjust factor for current best choice.
Note:
Exceptions: none
Date:
Thu May 30 14:48:24 1991, DSJ, Created.

Definition at line 392 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::AdjustFactor, BestChoices, first, MAX_FLOAT32, and NIL.

Referenced by AdaptableWord().

00392                                         { 
00393   VIABLE_CHOICE BestChoice;
00394 
00395   if (BestChoices == NIL)
00396     return (MAX_FLOAT32);
00397 
00398   BestChoice = (VIABLE_CHOICE) first (BestChoices);
00399   return (BestChoice->AdjustFactor);
00400 
00401 }                                /* CurrentBestChoiceAdjustFactor */

int CurrentBestChoiceIs ( const char *  Word  ) 

Returns TRUE if Word is the same as the current best choice, FALSE otherwise.

Parameters:
Word String to compare to current best choice
Note:
Globals: BestChoices Set of best choices for current word
Returns:
TRUE or FALSE
Note:
Exceptions: none
Date:
Thu May 30 14:44:22 1991, DSJ, Created.

Definition at line 374 of file stopper.cpp.

References BestChoices, first, NIL, and StringSameAs().

Referenced by AdaptableWord().

00374                                           { 
00375   return (BestChoices != NIL &&
00376     StringSameAs (Word, (VIABLE_CHOICE) first (BestChoices)));
00377 
00378 }                                /* CurrentBestChoiceIs */

int CurrentWordAmbig (  ) 

Returns TRUE if there are multiple good choices for the current word and FALSE otherwise.

Parameters:
none 
Note:
Globals: BestChoices Set of best choices for current word
Returns:
TRUE or FALSE
Note:
Exceptions: none
Date:
Wed May 22 15:38:38 1991, DSJ, Created.

Definition at line 415 of file stopper.cpp.

References BestChoices, NIL, and rest.

Referenced by AcceptableResult(), and add_document_word().

00415                        { 
00416   return (rest (BestChoices) != NIL);
00417 
00418 }                                /* CurrentWordAmbig */

void DebugWordChoices (  ) 

Print the current choices for this word to stdout.

Parameters:
none 
Note:
Globals: BestRawChoice, BestChoices
Returns:
none
Note:
Exceptions: none
Date:
Wed May 15 13:52:08 1991, DSJ, Created.

Definition at line 431 of file stopper.cpp.

References BestChoices, BestRawChoice, cprintf(), first, iterate, PrintViableChoice(), StopperDebugLevel, StringSameAs(), and WordToDebug.

Referenced by cc_recog().

00431                         { 
00432   LIST Choices;
00433   int i;
00434   char LabelString[80];
00435 
00436   if (StopperDebugLevel >= 1 ||
00437     WordToDebug && BestChoices &&
00438   StringSameAs (WordToDebug, (VIABLE_CHOICE) first (BestChoices))) {
00439     if (BestRawChoice)
00440       PrintViableChoice (stdout, "\nBest Raw Choice:   ", BestRawChoice);
00441 
00442     i = 1;
00443     Choices = BestChoices;
00444     if (Choices)
00445       cprintf ("\nBest Cooked Choices:\n");
00446     iterate(Choices) {
00447       sprintf (LabelString, "Cooked Choice #%d:  ", i);
00448       PrintViableChoice (stdout, LabelString,
00449         (VIABLE_CHOICE) first (Choices));
00450       i++;
00451     }
00452   }
00453 }                                /* DebugWordChoices */

void EndDangerousAmbigs (  ) 

Definition at line 801 of file stopper.cpp.

References AmbigFor, destroy_nodes(), Efree(), MAX_CLASS_ID, and NULL.

Referenced by EndAdaptiveClassifier().

00801                           {
00802   if (AmbigFor != NULL) {
00803     for (int i = 0; i <= MAX_CLASS_ID; ++i) {
00804       destroy_nodes(AmbigFor[i], Efree);
00805     }
00806     Efree(AmbigFor);
00807     AmbigFor = NULL;
00808   }
00809 }

void ExpandChoice ( VIABLE_CHOICE  Choice,
EXPANDED_CHOICE ExpandedChoice 
)

Expands Choice and places the results in ExpandedChoice.

Parameters:
Choice Choice to be expanded
ExpandedChoice Place to put resulting expanded choice
Note:
Globals: none
Returns:
none (results are placed in ExpandedChoice)
The primary function of expansion is to create an two arrays, one which holds the corresponding certainty for each chunk in Choice, and one which holds the class for each chunk.
Note:
Exceptions: none
Date:
Fri May 31 15:21:57 1991, DSJ, Created.

Definition at line 1007 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::Blob, CHAR_CHOICE::Certainty, EXPANDED_CHOICE::Choice, EXPANDED_CHOICE::ChunkCertainty, EXPANDED_CHOICE::ChunkClass, CHAR_CHOICE::Class, VIABLE_CHOICE_STRUCT::Length, and CHAR_CHOICE::NumChunks.

Referenced by FilterWordChoices(), and FindClassifierErrors().

01007                                                                          { 
01008   int i, j, Chunk;
01009 
01010   ExpandedChoice->Choice = Choice;
01011   for (i = 0, Chunk = 0; i < Choice->Length; i++)
01012   for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) {
01013     ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty;
01014     ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class;
01015   }
01016 }                                /* ExpandChoice */

AMBIG_TABLE * FillAmbigTable (  ) 

Allocates a new ambiguity table and fills it in from the file specified by DangerousAmbigs.

Parameters:
none 
Note:
Globals: DangerousAmbigs Filename of dangerous ambig info
Returns:
Pointer to new ambiguity table.
An ambiguity table is an array of lists. The array is indexed by a class id. Therefore, each entry in the table provides a list of potential ambiguities which can start with the corresponding character. Each potential ambiguity is described by a string which contains the remainder of the test string followed by a space followed by the replacement string. For example the ambiguity "rn -> m", would be located in the table at index 'r'. The string corresponding to this ambiguity would be "n m".
Note:
Exceptions: none
Date:
Thu May 9 09:20:57 1991, DSJ, Created.

Definition at line 1040 of file stopper.cpp.

References DangerousAmbigs, demodir, DoError(), Efopen(), Emalloc(), fscanf(), MAX_AMBIG_SIZE, MAX_CLASS_ID, NIL, and push_last().

Referenced by NoDangerousAmbig().

01040                               { 
01041   FILE *AmbigFile;
01042   AMBIG_TABLE *NewTable;
01043   int i;
01044   char TestString[256];
01045   char ReplacementString[256];
01046   char name[1024];
01047   char *AmbigSpec;
01048   int AmbigSize;
01049 
01050   strcpy(name, demodir);
01051   strcat(name, DangerousAmbigs);
01052   AmbigFile = Efopen (name, "r");
01053   NewTable = (AMBIG_TABLE *) Emalloc (sizeof (LIST) * (MAX_CLASS_ID + 1));
01054 
01055   for (i = 0; i <= MAX_CLASS_ID; i++)
01056     NewTable[i] = NIL;
01057 
01058   while (fscanf (AmbigFile, "%s", TestString) == 1 &&
01059   fscanf (AmbigFile, "%s", ReplacementString) == 1) {
01060     if (strlen (TestString) > MAX_AMBIG_SIZE ||
01061       strlen (ReplacementString) > MAX_AMBIG_SIZE)
01062       DoError (0, "Illegal ambiguity specification!");
01063 
01064     AmbigSize = strlen (TestString) + strlen (ReplacementString) + 1;
01065     AmbigSpec = (char *) Emalloc (sizeof (char) * AmbigSize);
01066 
01067     strcpy (AmbigSpec, &(TestString[1]));
01068     strcat (AmbigSpec, " ");
01069     strcat(AmbigSpec, ReplacementString);
01070     NewTable[TestString[0]] =
01071       push_last (NewTable[TestString[0]], AmbigSpec);
01072   }
01073   fclose(AmbigFile);
01074   return (NewTable);
01075 
01076 }                                /* FillAmbigTable */

void FilterWordChoices (  ) 

Removes from BestChoices all choices which are not within a reasonable range of the best choice.

Parameters:
none 
Note:
Globals: BestChoices Set of choices for current word
Returns:
none
Note:
Exceptions: none
Date:
Wed May 15 13:08:24 1991, DSJ, Created.

Definition at line 467 of file stopper.cpp.

References BestChoices, delete_d(), ExpandChoice(), first, FreeBadChoice(), NIL, rest, second, and set_rest.

Referenced by chop_word_main().

00467                          { 
00468   EXPANDED_CHOICE BestChoice;
00469 
00470   if (BestChoices == NIL || second (BestChoices) == NIL)
00471     return;
00472 
00473   /* compute certainties and class for each chunk in best choice */
00474   ExpandChoice ((VIABLE_CHOICE_STRUCT *) first (BestChoices), &BestChoice);
00475 
00476   set_rest (BestChoices, delete_d (rest (BestChoices),
00477     &BestChoice, FreeBadChoice));
00478 
00479 }                                /* FilterWordChoices */

void FindClassifierErrors ( FLOAT32  MinRating,
FLOAT32  MaxRating,
FLOAT32  RatingMargin,
FLOAT32  Thresholds[] 
)

Compares the best choice for the current word to the best raw choice to determine which characters were classified incorrectly by the classifier; places a separate threshold into Thresholds for each character in the word.

Parameters:
MinRating Limits how tight to make a template
MaxRating Limits how loose to make a template
RatingMargin Amount of margin to put in template
Thresholds[] Place to put error thresholds
Note:
Globals: none
Returns:
none (results are placed in Thresholds)
If the classifier was correct, MaxRating is placed into Thresholds. If the classifier was incorrect, the avg. match rating (error percentage) of the classifier's incorrect choice minus some margin is placed into thresholds.

This can then be used by the caller to try to create a new template for the desired class that will classify the character with a rating better than the threshold value. The match rating placed into Thresholds is never allowed to be below MinRating in order to prevent trying to make overly tight templates.

Note:
Exceptions: none
Date:
Fri May 31 16:02:57 1991, DSJ, Created.

Definition at line 511 of file stopper.cpp.

References assert(), BestChoices, BestRawChoice, VIABLE_CHOICE_STRUCT::Blob, CertaintyScale, EXPANDED_CHOICE::ChunkCertainty, EXPANDED_CHOICE::ChunkClass, CHAR_CHOICE::Class, ExpandChoice(), first, VIABLE_CHOICE_STRUCT::Length, NIL, NULL, and CHAR_CHOICE::NumChunks.

Referenced by GetAdaptThresholds().

00513                                             {
00514   EXPANDED_CHOICE BestRaw;
00515   VIABLE_CHOICE Choice;
00516   int i, j, Chunk;
00517   FLOAT32 AvgRating;
00518   int NumErrorChunks;
00519 
00520   assert (BestChoices != NIL);
00521   assert (BestRawChoice != NULL);
00522 
00523   ExpandChoice(BestRawChoice, &BestRaw);
00524   Choice = (VIABLE_CHOICE) first (BestChoices);
00525 
00526   for (i = 0, Chunk = 0; i < Choice->Length; i++, Thresholds++) {
00527     AvgRating = 0.0;
00528     NumErrorChunks = 0;
00529 
00530     for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++)
00531     if (Choice->Blob[i].Class != BestRaw.ChunkClass[Chunk]) {
00532       AvgRating += BestRaw.ChunkCertainty[Chunk];
00533       NumErrorChunks++;
00534     }
00535 
00536     if (NumErrorChunks > 0) {
00537       AvgRating /= NumErrorChunks;
00538       *Thresholds = (AvgRating / -CertaintyScale) * (1.0 - RatingMargin);
00539     }
00540     else
00541       *Thresholds = MaxRating;
00542 
00543     if (*Thresholds > MaxRating)
00544       *Thresholds = MaxRating;
00545     if (*Thresholds < MinRating)
00546       *Thresholds = MinRating;
00547   }
00548 }                                /* FindClassifierErrors */

int FreeBadChoice ( void *  item1,
void *  item2 
)

If the certainty of any chunk in Choice is not ambiguous with the corresponding chunk in the best choice, free Choice and return TRUE, else FALSE.

Parameters:
item1 Choice to be tested
item2 Choice to be tested
Note:
Globals:
  • AmbigThresholdGain
  • AmbigThresholdOffset
Returns:
TRUE or FALSE.
Note:
Exceptions: none
Date:
Wed May 15 13:20:26 1991, DSJ, Created.

Definition at line 1094 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::AdjustFactor, AmbigThreshold, VIABLE_CHOICE_STRUCT::Blob, CHAR_CHOICE::Certainty, EXPANDED_CHOICE::Choice, EXPANDED_CHOICE::ChunkCertainty, EXPANDED_CHOICE::ChunkClass, CHAR_CHOICE::Class, VIABLE_CHOICE_STRUCT::Length, memfree(), CHAR_CHOICE::NumChunks, and TRUE.

Referenced by FilterWordChoices().

01095                                {  //EXPANDED_CHOICE     *BestChoice)
01096   int i, j, Chunk;
01097   FLOAT32 Threshold;
01098   VIABLE_CHOICE Choice;
01099   EXPANDED_CHOICE *BestChoice;
01100 
01101   Choice = (VIABLE_CHOICE) item1;
01102   BestChoice = (EXPANDED_CHOICE *) item2;
01103 
01104   Threshold = AmbigThreshold (BestChoice->Choice->AdjustFactor,
01105     Choice->AdjustFactor);
01106 
01107   for (i = 0, Chunk = 0; i < Choice->Length; i++)
01108     for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++)
01109       if (Choice->Blob[i].Class != BestChoice->ChunkClass[Chunk] &&
01110     Choice->Blob[i].Certainty - BestChoice->ChunkCertainty[Chunk] <
01111       Threshold) {
01112         memfree(Choice);
01113     return (TRUE);
01114   }
01115 
01116   return (FALSE);
01117 
01118 }                                /* FreeBadChoice */

void InitChoiceAccum (  ) 

Initializes the data structures used to keep track the good word choices found for a word.

Parameters:
none 
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Fri May 17 07:59:00 1991, DSJ, Created.

Definition at line 589 of file stopper.cpp.

References BestChoices, BestRawChoice, CurrentSegmentation, destroy_nodes(), EnableChoiceAccum, MAX_NUM_CHUNKS, memfree(), NIL, and NULL.

Referenced by cc_recog(), and program_editdown().

00589                        { 
00590   BLOB_WIDTH *BlobWidth, *End;
00591 
00592   if (BestRawChoice)
00593     memfree(BestRawChoice);
00594 
00595   if (BestChoices)
00596     destroy_nodes(BestChoices, memfree);
00597 
00598   BestRawChoice = NULL;
00599   BestChoices = NIL;
00600   EnableChoiceAccum();
00601 
00602   for (BlobWidth = CurrentSegmentation,
00603     End = CurrentSegmentation + MAX_NUM_CHUNKS;
00604     BlobWidth < End; *BlobWidth++ = 1);
00605 
00606 }                                /* InitChoiceAccum */

void InitStopperVars (  ) 

Initializes the control variables used by the stopper.

Parameters:
none 
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Thu May 9 10:06:04 1991, DSJ, Created.

Definition at line 561 of file stopper.cpp.

References DANGEROUS_AMBIGS, DangerousAmbigs, dummy, string_variable, and WordToDebug.

Referenced by init_dj_debug().

00561                        { 
00562   VALUE dummy;
00563 
00564   string_variable (DangerousAmbigs, "DangerousAmbigs", DANGEROUS_AMBIGS);
00565   string_variable (WordToDebug, "WordToDebug", "");
00566 
00567   MakeNonDictCertainty();
00568   MakeRejectCertaintyOffset();
00569   MakeSmallWordSize();
00570   MakeCertaintyPerChar();
00571   MakeCertaintyVariation();
00572   MakeStopperDebugLevel();
00573   MakeAmbigThresholdGain();
00574   MakeAmbigThresholdOffset();
00575 }                                /* InitStopperVars */

int LengthOfShortestAlphaRun ( register char *  Word  ) 

Return the length of the shortest alpha run in Word.

Parameters:
Word Word to be tested
Note:
Globals: none
Returns:
Return the length of the shortest alpha run in Word.
Note:
Exceptions: none
Date:
Tue May 14 07:50:45 1991, DSJ, Created.

Definition at line 1131 of file stopper.cpp.

References MAXINT.

Referenced by AcceptableChoice(), and AcceptableResult().

01131                                                   { 
01132   register int Shortest = MAXINT;
01133   register int Length;
01134 
01135   for (; *Word; Word++)
01136   if (isalpha (*Word)) {
01137     for (Length = 1, Word++; isalpha (*Word); Word++, Length++);
01138     if (Length < Shortest)
01139       Shortest = Length;
01140 
01141     if (*Word == 0)
01142       break;
01143   }
01144   if (Shortest == MAXINT)
01145     Shortest = 0;
01146 
01147   return (Shortest);
01148 
01149 }                                /* LengthOfShortestAlphaRun */

void LogNewRawChoice ( A_CHOICE Choice,
FLOAT32  AdjustFactor,
float  Certainties[] 
)

Compares Choice to the best raw (non-dict) choice so far; if new choice is better, best raw choice is updated.

Parameters:
Choice New raw choice for current word
AdjustFactor Adjustment factor which was applied to choice
Certainties Certainties for each char in new choice
Note:
Globals: BestRawChoice Best raw choice so far for current word
Returns:
none
Note:
Exceptions: none
Date:
Wed May 15 09:57:19 1991, DSJ, Created.

Definition at line 623 of file stopper.cpp.

References BestRawChoice, ChoiceSameAs(), class_probability, KeepWordChoices, memfree(), NewViableChoice(), VIABLE_CHOICE_STRUCT::Rating, and ReplaceDuplicateChoice().

Referenced by permute_top_choice().

00623                                                                                {
00624   if (!KeepWordChoices)
00625     return;
00626 
00627   if (!BestRawChoice)
00628     BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties);
00629   else if (class_probability (Choice) < BestRawChoice->Rating) {
00630     if (ChoiceSameAs (Choice, BestRawChoice))
00631       ReplaceDuplicateChoice(BestRawChoice, Choice, AdjustFactor, Certainties);
00632     else {
00633       memfree(BestRawChoice);
00634       BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties);
00635     }
00636   }
00637 }                                /* LogNewRawChoice */

void LogNewSegmentation ( PIECES_STATE  BlobWidth  ) 

Updates the blob widths in CurrentSegmentation to be the same as provided in BlobWidth.

Parameters:
BlobWidth number of chunks in each blob in segmentation
Note:
Globals: CurrentSegmentation blob widths for current segmentation
Returns:
none
Note:
Exceptions: none
Date:
Mon May 20 11:52:26 1991, DSJ, Created.

Definition at line 650 of file stopper.cpp.

References CurrentSegmentation.

Referenced by evaluate_state().

00650                                                 { 
00651   BLOB_WIDTH *Segmentation;
00652 
00653   for (Segmentation = CurrentSegmentation; *BlobWidth != 0;
00654     BlobWidth++, Segmentation++)
00655   *Segmentation = *BlobWidth;
00656   *Segmentation = 0;
00657 
00658 }                                /* LogNewSegmentation */

void LogNewSplit ( int  Blob  ) 

Adds one chunk to the specified blob for each choice in BestChoices and for the BestRawChoice.

Parameters:
Blob index of blob that was split
Note:
Globals:
  • BestRawChoice current best raw choice
  • BestChoices list of best choices found so far
Returns:
none
Note:
Exceptions: none
Date:
Mon May 20 11:38:56 1991, DSJ, Created.

Definition at line 674 of file stopper.cpp.

References AddNewChunk(), BestChoices, BestRawChoice, first, and iterate.

Referenced by improve_by_chopping().

00674                            { 
00675   LIST Choices;
00676 
00677   if (BestRawChoice) {
00678     AddNewChunk(BestRawChoice, Blob);
00679   }
00680 
00681   Choices = BestChoices;
00682   iterate(Choices) {
00683     AddNewChunk ((VIABLE_CHOICE) first (Choices), Blob);
00684   }
00685 
00686 }                                /* LogNewSplit */

void LogNewWordChoice ( A_CHOICE Choice,
FLOAT32  AdjustFactor,
float  Certainties[] 
)

Adds Choice to BestChoices if the adjusted certainty for Choice is within a reasonable range of the best choice in BestChoices.

Parameters:
Choice new choice for current word
AdjustFactor adjustment factor which was applied to choice
Certainties certainties for each char in new choice
Note:
Globals: BestChoices best choices so far for current word
Returns:
none
The BestChoices list is kept in sorted order by rating. Duplicates are removed.
Note:
Exceptions: none
Date:
Wed May 15 09:57:19 1991, DSJ, Created.

Definition at line 706 of file stopper.cpp.

References AmbigThreshold, BestCertainty, BestChoices, BestFactor, BestRating, ChoiceSameAs(), class_certainty, class_probability, CmpChoiceRatings(), count(), delete_d(), destroy_nodes(), Efree(), first, is_same_node(), iterate, KeepWordChoices, NewViableChoice(), NIL, nth_cell(), NULL, PrintViableChoice(), ReplaceDuplicateChoice(), rest, s_adjoin(), set_rest, and StopperDebugLevel.

Referenced by adjust_non_word(), adjust_number(), and adjust_word().

00707                                            {
00708   VIABLE_CHOICE NewChoice;
00709   LIST Choices;
00710   FLOAT32 Threshold;
00711 
00712   if (!KeepWordChoices)
00713     return;
00714 
00715   /* throw out obviously bad choices to save some work */
00716   if (BestChoices != NIL) {
00717     Threshold = AmbigThreshold (BestFactor (BestChoices), AdjustFactor);
00718     if (Threshold > -AmbigThresholdOffset)
00719       Threshold = -AmbigThresholdOffset;
00720     if (class_certainty (Choice) - BestCertainty (BestChoices) < Threshold)
00721       return;
00722   }
00723 
00724   /* see if a choice with the same text string has already been found */
00725   NewChoice = NULL;
00726   Choices = BestChoices;
00727   iterate(Choices) {
00728     if (ChoiceSameAs (Choice, (VIABLE_CHOICE) first (Choices)))
00729       if (class_probability (Choice) < BestRating (Choices))
00730         NewChoice = (VIABLE_CHOICE) first (Choices);
00731     else
00732       return;
00733   }
00734 
00735   if (NewChoice) {
00736     ReplaceDuplicateChoice(NewChoice, Choice, AdjustFactor, Certainties);
00737     BestChoices = delete_d (BestChoices, NewChoice, is_same_node);
00738   }
00739   else {
00740     NewChoice = NewViableChoice (Choice, AdjustFactor, Certainties);
00741   }
00742 
00743   BestChoices = s_adjoin (BestChoices, NewChoice, CmpChoiceRatings);
00744   if (StopperDebugLevel >= 2)
00745     PrintViableChoice (stdout, "New Word Choice:  ", NewChoice);
00746   if (count (BestChoices) > tessedit_truncate_wordchoice_log) {
00747     Choices =
00748       (LIST) nth_cell (BestChoices, tessedit_truncate_wordchoice_log);
00749     destroy_nodes (rest (Choices), Efree);
00750     set_rest(Choices, NIL);
00751   }
00752 
00753 }                                /* LogNewWordChoice */

VIABLE_CHOICE NewViableChoice ( A_CHOICE Choice,
FLOAT32  AdjustFactor,
float  Certainties[] 
)

Allocate a new viable choice data structure, copy Choice, Certainties, and CurrentSegmentation into it, and return a pointer to it.

Parameters:
Choice Choice to be converted to a viable choice
AdjustFactor Factor used to adjust ratings for Choice
Certainties Certainty for each character in Choice
Note:
Globals: CurrentSegmentation Segmentation corresponding to Choice
Returns:
Ptr to new viable choice.
Note:
Exceptions: none
Date:
Thu May 16 15:28:29 1991, DSJ, Created.

Definition at line 1167 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::AdjustFactor, assert(), VIABLE_CHOICE_STRUCT::Blob, CHAR_CHOICE::Certainty, VIABLE_CHOICE_STRUCT::Certainty, CHAR_CHOICE::Class, class_certainty, class_probability, class_string, CurrentSegmentation, Emalloc(), VIABLE_CHOICE_STRUCT::Length, CHAR_CHOICE::NumChunks, and VIABLE_CHOICE_STRUCT::Rating.

Referenced by LogNewRawChoice(), and LogNewWordChoice().

01167                                                                                {
01168   VIABLE_CHOICE NewChoice;
01169   int Length;
01170   char *Word;
01171   CHAR_CHOICE *NewChar;
01172   BLOB_WIDTH *BlobWidth;
01173 
01174   Length = strlen (class_string (Choice));
01175   assert (Length <= MAX_NUM_CHUNKS && Length > 0);
01176 
01177   NewChoice = (VIABLE_CHOICE) Emalloc (sizeof (VIABLE_CHOICE_STRUCT) +
01178     (Length - 1) * sizeof (CHAR_CHOICE));
01179 
01180   NewChoice->Rating = class_probability (Choice);
01181   NewChoice->Certainty = class_certainty (Choice);
01182   NewChoice->AdjustFactor = AdjustFactor;
01183   NewChoice->Length = Length;
01184 
01185   for (Word = class_string (Choice),
01186     NewChar = &(NewChoice->Blob[0]),
01187     BlobWidth = CurrentSegmentation;
01188   *Word; Word++, NewChar++, Certainties++, BlobWidth++) {
01189     NewChar->Class = *Word;
01190     NewChar->NumChunks = *BlobWidth;
01191     NewChar->Certainty = *Certainties;
01192   }
01193 
01194   return (NewChoice);
01195 
01196 }                                /* NewViableChoice */

int NoDangerousAmbig ( const char *  Word,
DANGERR fixpt 
)

Definition at line 776 of file stopper.cpp.

References AmbigFor, AmbigsFound(), FALSE, FillAmbigTable(), DANGERR::index, MAX_WERD_SIZE, NULL, and TRUE.

Referenced by AcceptableChoice(), and word_adaptable().

00776                                                        {
00777 
00778   char NewWord[MAX_WERD_SIZE];
00779   char *NextNewChar;
00780   int bad_index = 0;
00781 
00782   if (!AmbigFor)
00783     AmbigFor = FillAmbigTable ();
00784 
00785   NextNewChar = NewWord;
00786   while (*Word)
00787   if (AmbigsFound (NewWord, NextNewChar, Word + 1, AmbigFor[*Word], fixpt)) {
00788     if (fixpt != NULL)
00789       fixpt->index = bad_index;
00790     return (FALSE);
00791   }
00792   else {
00793     *NextNewChar++ = *Word++;
00794     bad_index++;
00795   }
00796 
00797   return (TRUE);
00798 
00799 }                                /* NoDangerousAmbig */

void PrintViableChoice ( FILE *  File,
const char *  Label,
VIABLE_CHOICE  Choice 
)

Dumps a text representation of the specified Choice to File.

Parameters:
File Open text file to print Choice to
Label Text label to be printed with Choice
Choice Choice to be printed
Note:
Globals: none
Returns:
none
Note:
Exceptions: none
Date:
Mon May 20 11:16:44 1991, DSJ, Created.

Definition at line 1211 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::AdjustFactor, VIABLE_CHOICE_STRUCT::Blob, CHAR_CHOICE::Certainty, VIABLE_CHOICE_STRUCT::Certainty, CHAR_CHOICE::Class, VIABLE_CHOICE_STRUCT::Length, CHAR_CHOICE::NumChunks, and VIABLE_CHOICE_STRUCT::Rating.

Referenced by DebugWordChoices(), and LogNewWordChoice().

01211                                                                             { 
01212   int i, j;
01213 
01214   fprintf (File, "%s", Label);
01215 
01216   fprintf (File, "(R=%5.1f, C=%4.1f, F=%4.2f)  ",
01217     Choice->Rating, Choice->Certainty, Choice->AdjustFactor);
01218 
01219   for (i = 0; i < Choice->Length; i++)
01220     fprintf (File, "%c", Choice->Blob[i].Class);
01221   fprintf (File, "\n");
01222 
01223   for (i = 0; i < Choice->Length; i++) {
01224     fprintf (File, "  %c", Choice->Blob[i].Class);
01225     for (j = 0; j < Choice->Blob[i].NumChunks - 1; j++)
01226       fprintf (File, "   ");
01227   }
01228   fprintf (File, "\n");
01229 
01230   for (i = 0; i < Choice->Length; i++) {
01231     for (j = 0; j < Choice->Blob[i].NumChunks; j++)
01232       fprintf (File, "%3d", (int) (Choice->Blob[i].Certainty * -10.0));
01233   }
01234   fprintf (File, "\n");
01235 
01236 }                                /* PrintViableChoice */

void ReplaceDuplicateChoice ( VIABLE_CHOICE  OldChoice,
A_CHOICE NewChoice,
FLOAT32  AdjustFactor,
float  Certainties[] 
)

Updates OldChoice with relevant information from the new choice whenever a better segmentation (or contextual interpretation) is found for a word which already exists.

Parameters:
OldChoice Existing viable choice to be replaced
NewChoice Choice to replace OldChoice with
AdjustFactor Factor used to adjust ratings for OldChoice
Certainties Certainty for each character in OldChoice
Note:
Globals: CurrentSegmentation Segmentation for NewChoice
Returns:
none
The text string itself does not need to be copied since, by definition, has not changed.
Note:
Exceptions: none
Date:
Fri May 17 13:35:58 1991, DSJ, Created.

Definition at line 1258 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::AdjustFactor, VIABLE_CHOICE_STRUCT::Blob, CHAR_CHOICE::Certainty, VIABLE_CHOICE_STRUCT::Certainty, class_certainty, class_probability, class_string, CurrentSegmentation, CHAR_CHOICE::NumChunks, and VIABLE_CHOICE_STRUCT::Rating.

Referenced by LogNewRawChoice(), and LogNewWordChoice().

01260                                            {
01261   char *Word;
01262   CHAR_CHOICE *NewChar;
01263   BLOB_WIDTH *BlobWidth;
01264 
01265   OldChoice->Rating = class_probability (NewChoice);
01266   OldChoice->Certainty = class_certainty (NewChoice);
01267   OldChoice->AdjustFactor = AdjustFactor;
01268 
01269   for (Word = class_string (NewChoice),
01270     NewChar = &(OldChoice->Blob[0]),
01271     BlobWidth = CurrentSegmentation;
01272   *Word; Word++, NewChar++, Certainties++, BlobWidth++) {
01273     NewChar->NumChunks = *BlobWidth;
01274     NewChar->Certainty = *Certainties;
01275   }
01276 }                                /* ReplaceDuplicateChoice */

void SettupStopperPass1 (  ) 

Performs any settup of stopper variables that is needed in preparation for the first pass.

Parameters:
none 
Note:
Globals: RejectOffset Offset allowed before word is rejected
Returns:
none
Note:
Exceptions: none
Date:
Mon Jun 3 12:32:00 1991, DSJ, Created.

Definition at line 822 of file stopper.cpp.

References RejectOffset.

Referenced by SettupPass1().

00822                           { 
00823   RejectOffset = 0.0;
00824 }                                /* SettupStopperPass1 */

void SettupStopperPass2 (  ) 

Performs any settup of stopper variables that is needed in preparation for the second pass.

Parameters:
none 
Note:
Globals: RejectOffset Offset allowed before word is rejected
Returns:
none
Note:
Exceptions: none
Date:
Mon Jun 3 12:32:00 1991, DSJ, Created.

Definition at line 838 of file stopper.cpp.

References RejectCertaintyOffset, and RejectOffset.

Referenced by SettupPass2().

00838                           { 
00839   RejectOffset = RejectCertaintyOffset;
00840 }                                /* SettupStopperPass2 */

int StringSameAs ( const char *  String,
VIABLE_CHOICE  ViableChoice 
)

Compares String to ViableChoice and returns TRUE if they are the same, FALSE otherwise.

Parameters:
String String to compare to ViableChoice
ViableChoice Viable choice to compare to String
Note:
Globals: none
Returns:
TRUE or FALSE.
Note:
Exceptions: none
Date:
Fri May 17 08:48:04 1991, DSJ, Created.

Definition at line 1291 of file stopper.cpp.

References VIABLE_CHOICE_STRUCT::Blob, CHAR_CHOICE::Class, FALSE, and TRUE.

Referenced by ChoiceSameAs(), CurrentBestChoiceIs(), and DebugWordChoices().

01291                                                                  { 
01292   CHAR_CHOICE *Char;
01293   int i;
01294 
01295   for (Char = &(ViableChoice->Blob[0]), i = 0;
01296     i < ViableChoice->Length; String++, Char++, i++)
01297   if (*String != Char->Class)
01298     return (FALSE);
01299 
01300   if (*String == 0)
01301     return (TRUE);
01302   else
01303     return (FALSE);
01304 
01305 }                                /* StringSameAs */

int UniformCertainties ( CHOICES_LIST  Choices,
A_CHOICE BestChoice 
)

Returns TRUE if the certainty of the BestChoice word is within a reasonable range of the average certainties for the best choices for each character in the segmentation.

Parameters:
Choices Choices for current segmentation
BestChoice Best choice for current segmentation
Note:
Globals: CertaintyVariation Max allowed certainty variation
Returns:
TRUE or FALSE.
This test is used to catch words in which one character is much worse than the other characters in the word (i.e. FALSE will be returned in that case). The algorithm computes the mean and std deviation of the certainties in the word with the worst certainty thrown out.
Note:
Exceptions: none
Date:
Tue May 14 08:23:21 1991, DSJ, Created.

Definition at line 1327 of file stopper.cpp.

References array_count, array_index, best_certainty, class_certainty, cprintf(), FALSE, for_each_choice, MAX_FLOAT32, Mean(), NonDictCertainty, StopperDebugLevel, and TRUE.

Referenced by AcceptableChoice().

01327                                                                    { 
01328   int i;
01329   CHOICES CharChoices;
01330   float Certainty;
01331   float WorstCertainty = MAX_FLOAT32;
01332   float CertaintyThreshold;
01333   FLOAT64 TotalCertainty;
01334   FLOAT64 TotalCertaintySquared;
01335   FLOAT64 Variance;
01336   FLOAT32 Mean, StdDev;
01337   int WordLength;
01338 
01339   WordLength = array_count (Choices);
01340   if (WordLength < 3)
01341     return (TRUE);
01342 
01343   TotalCertainty = TotalCertaintySquared = 0.0;
01344   for_each_choice(Choices, i) {
01345     CharChoices = (CHOICES) array_index (Choices, i);
01346     Certainty = best_certainty (CharChoices);
01347     TotalCertainty += Certainty;
01348     TotalCertaintySquared += Certainty * Certainty;
01349     if (Certainty < WorstCertainty)
01350       WorstCertainty = Certainty;
01351   }
01352 
01353   /* subtract off worst certainty from statistics */
01354   WordLength--;
01355   TotalCertainty -= WorstCertainty;
01356   TotalCertaintySquared -= WorstCertainty * WorstCertainty;
01357 
01358   Mean = TotalCertainty / WordLength;
01359   Variance = ((WordLength * TotalCertaintySquared -
01360     TotalCertainty * TotalCertainty) /
01361     (WordLength * (WordLength - 1)));
01362   if (Variance < 0.0)
01363     Variance = 0.0;
01364   StdDev = sqrt (Variance);
01365 
01366   CertaintyThreshold = Mean - CertaintyVariation * StdDev;
01367   if (CertaintyThreshold > NonDictCertainty)
01368     CertaintyThreshold = NonDictCertainty;
01369 
01370   if (class_certainty (BestChoice) < CertaintyThreshold) {
01371     if (StopperDebugLevel >= 1)
01372       cprintf
01373         ("Stopper:  Non-uniform certainty = %4.1f (m=%4.1f, s=%4.1f, t=%4.1f)\n",
01374         class_certainty (BestChoice), Mean, StdDev, CertaintyThreshold);
01375     return (FALSE);
01376   }
01377   else
01378     return (TRUE);
01379 
01380 }                                /* UniformCertainties */


Variable Documentation

AMBIG_TABLE* AmbigFor = NULL [static]

Checks each letter in word against a list of potentially ambiguous characters.

Parameters:
Word Word to check for dangerous ambiguities
fixpt Point to fix
Note:
Globals: none
Returns:
TRUE if Word contains no dangerous ambiguities.
If a match is found that letter is replaced with its ambiguity and tested in the dictionary. If the ambiguous word is found in the dictionary, FALSE is returned. Otherwise, the search continues for other ambiguities. If no ambiguities that match in the dictionary are found, TRUE is returned.
Note:
Exceptions: none
Date:
Mon May 6 16:28:56 1991, DSJ, Created.

Definition at line 774 of file stopper.cpp.

Referenced by EndDangerousAmbigs(), and NoDangerousAmbig().

LIST BestChoices = NIL [static]

Definition at line 166 of file stopper.cpp.

Referenced by AcceptableResult(), AlternativeChoicesWorseThan(), CurrentBestChoiceAdjustFactor(), CurrentBestChoiceIs(), CurrentWordAmbig(), DebugWordChoices(), FilterWordChoices(), FindClassifierErrors(), InitChoiceAccum(), LogNewSplit(), and LogNewWordChoice().

VIABLE_CHOICE BestRawChoice = NULL [static]

structures to keep track of viable word choices

Definition at line 165 of file stopper.cpp.

Referenced by DebugWordChoices(), FindClassifierErrors(), InitChoiceAccum(), LogNewRawChoice(), and LogNewSplit().

float CertaintyScale

Note:
File: stopper.cpp
Stopping criteria for word classifier.
Author:
Dan Johnson
Date:
Mon Apr 29 14:56:49 1991, DSJ, Created.
 **	(c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.

Referenced by ConvertMatchesToChoices(), and FindClassifierErrors().

PIECES_STATE CurrentSegmentation [static]

Definition at line 167 of file stopper.cpp.

Referenced by InitChoiceAccum(), LogNewSegmentation(), NewViableChoice(), and ReplaceDuplicateChoice().

const char* DangerousAmbigs = DANGEROUS_AMBIGS [static]

Name of file containing potentially dangerous ambiguities

Definition at line 152 of file stopper.cpp.

Referenced by FillAmbigTable(), and InitStopperVars().

BOOL8 KeepWordChoices = TRUE

flag used to disable accumulation of word choices during compound word permutation

Definition at line 159 of file stopper.cpp.

Referenced by LogNewRawChoice(), and LogNewWordChoice().

FLOAT32 RejectOffset = 0.0 [static]

additional certainty padding allowed before a word is rejected

Definition at line 162 of file stopper.cpp.

Referenced by AcceptableResult(), SettupStopperPass1(), and SettupStopperPass2().

char* WordToDebug = NULL [static]

Word for which stopper debug information should be printed to stdout

Definition at line 155 of file stopper.cpp.

Referenced by DebugWordChoices(), and InitStopperVars().


Generated on Wed Feb 28 19:49:23 2007 for Tesseract by  doxygen 1.5.1