#include "choicearr.h"
#include "states.h"
Go to the source code of this file.
#define DisableChoiceAccum | ( | ) | (KeepWordChoices = FALSE) |
#define EnableChoiceAccum | ( | ) | (KeepWordChoices = TRUE) |
Type of UINT8, holds width of blob, in units of chunks.
Used as follows:
for (BlobWidth = CurrentSegmentation, End = CurrentSegmentation + MAX_NUM_CHUNKS; BlobWidth < End; *BlobWidth++ = 1)
int AcceptableChoice | ( | CHOICES_LIST | Choices, | |
A_CHOICE * | BestChoice, | |||
A_CHOICE * | RawChoice, | |||
DANGERR * | fixpt | |||
) |
Return TRUE if the results from this segmentation are good enough to stop; else FALSE.
Choices | Choices for current segmentation | |
BestChoice | Best choice for current segmentation | |
RawChoice | Best raw choice for current segmentation | |
fixpt | point to fix |
Definition at line 226 of file stopper.cpp.
References case_ok(), CertaintyPerChar, class_certainty, class_string, cprintf(), FALSE, DANGERR::index, LengthOfShortestAlphaRun(), NoDangerousAmbig(), NonDictCertainty, NULL, punctuation_ok(), StopperDebugLevel, TRUE, UniformCertainties(), valid_number(), and valid_word().
Referenced by chop_word_main(), evaluate_state(), and improve_by_chopping().
00229 { 00230 float CertaintyThreshold = NonDictCertainty; 00231 int WordSize; 00232 00233 if (fixpt != NULL) 00234 fixpt->index = -1; 00235 if ((BestChoice == NULL) || (class_string (BestChoice) == NULL)) 00236 return (FALSE); 00237 00238 if (StopperDebugLevel >= 1) 00239 cprintf ("\nStopper: %s (word=%c, case=%c, punct=%c)\n", 00240 class_string (BestChoice), 00241 (valid_word (class_string (BestChoice)) ? 'y' : 'n'), 00242 (case_ok (class_string (BestChoice)) ? 'y' : 'n'), 00243 ((punctuation_ok (class_string (BestChoice)) != 00244 -1) ? 'y' : 'n')); 00245 00246 if (valid_word (class_string (BestChoice)) && 00247 case_ok (class_string (BestChoice)) && 00248 punctuation_ok (class_string (BestChoice)) != -1) { 00249 WordSize = LengthOfShortestAlphaRun (class_string (BestChoice)); 00250 WordSize -= SmallWordSize; 00251 if (WordSize < 0) 00252 WordSize = 0; 00253 CertaintyThreshold += WordSize * CertaintyPerChar; 00254 } 00255 else if (stopper_numbers_on && valid_number (class_string (BestChoice))) { 00256 CertaintyThreshold += stopper_numbers_on * CertaintyPerChar; 00257 } 00258 00259 if (StopperDebugLevel >= 1) 00260 cprintf ("Stopper: Certainty = %4.1f, Threshold = %4.1f\n", 00261 class_certainty (BestChoice), CertaintyThreshold); 00262 00263 if (NoDangerousAmbig (class_string (BestChoice), fixpt) 00264 && class_certainty (BestChoice) > CertaintyThreshold && 00265 UniformCertainties (Choices, BestChoice)) 00266 return (TRUE); 00267 else 00268 return (FALSE); 00269 00270 } /* AcceptableChoice */
Return FALSE if the best choice for the current word is questionable and should be tried again on the second pass or should be flagged to the user.
BestChoice | Best choice for current word | |
RawChoice | Best raw choice for current word |
Definition at line 292 of file stopper.cpp.
References BestChoices, case_ok(), CertaintyPerChar, class_certainty, class_string, cprintf(), CurrentWordAmbig(), FALSE, LengthOfShortestAlphaRun(), NIL, NonDictCertainty, NULL, punctuation_ok(), RejectOffset, rest, StopperDebugLevel, TRUE, and valid_word().
Referenced by tess_acceptable_word().
00292 { 00293 float CertaintyThreshold = NonDictCertainty - RejectOffset; 00294 int WordSize; 00295 00296 if (StopperDebugLevel >= 1) 00297 cprintf ("\nRejecter: %s (word=%c, case=%c, punct=%c, unambig=%c)\n", 00298 class_string (BestChoice), 00299 (valid_word (class_string (BestChoice)) ? 'y' : 'n'), 00300 (case_ok (class_string (BestChoice)) ? 'y' : 'n'), 00301 ((punctuation_ok (class_string (BestChoice)) != -1) ? 'y' : 'n'), 00302 ((rest (BestChoices) != NIL) ? 'n' : 'y')); 00303 00304 if ((BestChoice == NULL) || 00305 (class_string (BestChoice) == NULL) || CurrentWordAmbig ()) 00306 return (FALSE); 00307 00308 if (valid_word (class_string (BestChoice)) && 00309 case_ok (class_string (BestChoice)) && 00310 punctuation_ok (class_string (BestChoice)) != -1) { 00311 WordSize = LengthOfShortestAlphaRun (class_string (BestChoice)); 00312 WordSize -= SmallWordSize; 00313 if (WordSize < 0) 00314 WordSize = 0; 00315 CertaintyThreshold += WordSize * CertaintyPerChar; 00316 } 00317 00318 if (StopperDebugLevel >= 1) 00319 cprintf ("Rejecter: Certainty = %4.1f, Threshold = %4.1f ", 00320 class_certainty (BestChoice), CertaintyThreshold); 00321 00322 if (class_certainty (BestChoice) > CertaintyThreshold) { 00323 if (StopperDebugLevel >= 1) 00324 cprintf ("ACCEPTED\n"); 00325 return (TRUE); 00326 } 00327 else { 00328 if (StopperDebugLevel >= 1) 00329 cprintf ("REJECTED\n"); 00330 return (FALSE); 00331 } 00332 } /* AcceptableResult */
int AlternativeChoicesWorseThan | ( | FLOAT32 | Threshold | ) |
Returns TRUE if there are no alternative choices for the current word OR if all alternatives have an adjust factor worse than Threshold.
Threshold | Minimum adjust factor for alternative choices |
Definition at line 347 of file stopper.cpp.
References VIABLE_CHOICE_STRUCT::AdjustFactor, BestChoices, FALSE, first, iterate, rest, and TRUE.
Referenced by AdaptableWord().
00347 { 00348 LIST Alternatives; 00349 VIABLE_CHOICE Choice; 00350 00351 Alternatives = rest (BestChoices); 00352 iterate(Alternatives) { 00353 Choice = (VIABLE_CHOICE) first (Alternatives); 00354 if (Choice->AdjustFactor <= Threshold) 00355 return (FALSE); 00356 } 00357 00358 return (TRUE); 00359 00360 } /* AlternativeChoicesWorseThan */
FLOAT32 CurrentBestChoiceAdjustFactor | ( | ) |
Return the adjustment factor for the best choice for the current word.
none |
Definition at line 392 of file stopper.cpp.
References VIABLE_CHOICE_STRUCT::AdjustFactor, BestChoices, first, MAX_FLOAT32, and NIL.
Referenced by AdaptableWord().
00392 { 00393 VIABLE_CHOICE BestChoice; 00394 00395 if (BestChoices == NIL) 00396 return (MAX_FLOAT32); 00397 00398 BestChoice = (VIABLE_CHOICE) first (BestChoices); 00399 return (BestChoice->AdjustFactor); 00400 00401 } /* CurrentBestChoiceAdjustFactor */
int CurrentBestChoiceIs | ( | const char * | Word | ) |
Returns TRUE if Word is the same as the current best choice, FALSE otherwise.
Word | String to compare to current best choice |
Definition at line 374 of file stopper.cpp.
References BestChoices, first, NIL, and StringSameAs().
Referenced by AdaptableWord().
00374 { 00375 return (BestChoices != NIL && 00376 StringSameAs (Word, (VIABLE_CHOICE) first (BestChoices))); 00377 00378 } /* CurrentBestChoiceIs */
int CurrentWordAmbig | ( | ) |
Returns TRUE if there are multiple good choices for the current word and FALSE otherwise.
none |
Definition at line 415 of file stopper.cpp.
References BestChoices, NIL, and rest.
Referenced by AcceptableResult(), and add_document_word().
00415 { 00416 return (rest (BestChoices) != NIL); 00417 00418 } /* CurrentWordAmbig */
void DebugWordChoices | ( | ) |
Print the current choices for this word to stdout.
none |
Definition at line 431 of file stopper.cpp.
References BestChoices, BestRawChoice, cprintf(), first, iterate, PrintViableChoice(), StopperDebugLevel, StringSameAs(), and WordToDebug.
Referenced by cc_recog().
00431 { 00432 LIST Choices; 00433 int i; 00434 char LabelString[80]; 00435 00436 if (StopperDebugLevel >= 1 || 00437 WordToDebug && BestChoices && 00438 StringSameAs (WordToDebug, (VIABLE_CHOICE) first (BestChoices))) { 00439 if (BestRawChoice) 00440 PrintViableChoice (stdout, "\nBest Raw Choice: ", BestRawChoice); 00441 00442 i = 1; 00443 Choices = BestChoices; 00444 if (Choices) 00445 cprintf ("\nBest Cooked Choices:\n"); 00446 iterate(Choices) { 00447 sprintf (LabelString, "Cooked Choice #%d: ", i); 00448 PrintViableChoice (stdout, LabelString, 00449 (VIABLE_CHOICE) first (Choices)); 00450 i++; 00451 } 00452 } 00453 } /* DebugWordChoices */
void EndDangerousAmbigs | ( | ) |
Definition at line 801 of file stopper.cpp.
References AmbigFor, destroy_nodes(), Efree(), MAX_CLASS_ID, and NULL.
Referenced by EndAdaptiveClassifier().
00801 { 00802 if (AmbigFor != NULL) { 00803 for (int i = 0; i <= MAX_CLASS_ID; ++i) { 00804 destroy_nodes(AmbigFor[i], Efree); 00805 } 00806 Efree(AmbigFor); 00807 AmbigFor = NULL; 00808 } 00809 }
void FilterWordChoices | ( | ) |
Removes from BestChoices all choices which are not within a reasonable range of the best choice.
none |
Definition at line 467 of file stopper.cpp.
References BestChoices, delete_d(), ExpandChoice(), first, FreeBadChoice(), NIL, rest, second, and set_rest.
Referenced by chop_word_main().
00467 { 00468 EXPANDED_CHOICE BestChoice; 00469 00470 if (BestChoices == NIL || second (BestChoices) == NIL) 00471 return; 00472 00473 /* compute certainties and class for each chunk in best choice */ 00474 ExpandChoice ((VIABLE_CHOICE_STRUCT *) first (BestChoices), &BestChoice); 00475 00476 set_rest (BestChoices, delete_d (rest (BestChoices), 00477 &BestChoice, FreeBadChoice)); 00478 00479 } /* FilterWordChoices */
void FindClassifierErrors | ( | FLOAT32 | MinRating, | |
FLOAT32 | MaxRating, | |||
FLOAT32 | RatingMargin, | |||
FLOAT32 | Thresholds[] | |||
) |
Compares the best choice for the current word to the best raw choice to determine which characters were classified incorrectly by the classifier; places a separate threshold into Thresholds for each character in the word.
MinRating | Limits how tight to make a template | |
MaxRating | Limits how loose to make a template | |
RatingMargin | Amount of margin to put in template | |
Thresholds[] | Place to put error thresholds |
This can then be used by the caller to try to create a new template for the desired class that will classify the character with a rating better than the threshold value. The match rating placed into Thresholds is never allowed to be below MinRating in order to prevent trying to make overly tight templates.
Definition at line 511 of file stopper.cpp.
References assert(), BestChoices, BestRawChoice, VIABLE_CHOICE_STRUCT::Blob, CertaintyScale, EXPANDED_CHOICE::ChunkCertainty, EXPANDED_CHOICE::ChunkClass, CHAR_CHOICE::Class, ExpandChoice(), first, VIABLE_CHOICE_STRUCT::Length, NIL, NULL, and CHAR_CHOICE::NumChunks.
Referenced by GetAdaptThresholds().
00513 { 00514 EXPANDED_CHOICE BestRaw; 00515 VIABLE_CHOICE Choice; 00516 int i, j, Chunk; 00517 FLOAT32 AvgRating; 00518 int NumErrorChunks; 00519 00520 assert (BestChoices != NIL); 00521 assert (BestRawChoice != NULL); 00522 00523 ExpandChoice(BestRawChoice, &BestRaw); 00524 Choice = (VIABLE_CHOICE) first (BestChoices); 00525 00526 for (i = 0, Chunk = 0; i < Choice->Length; i++, Thresholds++) { 00527 AvgRating = 0.0; 00528 NumErrorChunks = 0; 00529 00530 for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) 00531 if (Choice->Blob[i].Class != BestRaw.ChunkClass[Chunk]) { 00532 AvgRating += BestRaw.ChunkCertainty[Chunk]; 00533 NumErrorChunks++; 00534 } 00535 00536 if (NumErrorChunks > 0) { 00537 AvgRating /= NumErrorChunks; 00538 *Thresholds = (AvgRating / -CertaintyScale) * (1.0 - RatingMargin); 00539 } 00540 else 00541 *Thresholds = MaxRating; 00542 00543 if (*Thresholds > MaxRating) 00544 *Thresholds = MaxRating; 00545 if (*Thresholds < MinRating) 00546 *Thresholds = MinRating; 00547 } 00548 } /* FindClassifierErrors */
void InitChoiceAccum | ( | ) |
Initializes the data structures used to keep track the good word choices found for a word.
none |
Definition at line 589 of file stopper.cpp.
References BestChoices, BestRawChoice, CurrentSegmentation, destroy_nodes(), EnableChoiceAccum, MAX_NUM_CHUNKS, memfree(), NIL, and NULL.
Referenced by cc_recog(), and program_editdown().
00589 { 00590 BLOB_WIDTH *BlobWidth, *End; 00591 00592 if (BestRawChoice) 00593 memfree(BestRawChoice); 00594 00595 if (BestChoices) 00596 destroy_nodes(BestChoices, memfree); 00597 00598 BestRawChoice = NULL; 00599 BestChoices = NIL; 00600 EnableChoiceAccum(); 00601 00602 for (BlobWidth = CurrentSegmentation, 00603 End = CurrentSegmentation + MAX_NUM_CHUNKS; 00604 BlobWidth < End; *BlobWidth++ = 1); 00605 00606 } /* InitChoiceAccum */
void InitStopperVars | ( | ) |
Initializes the control variables used by the stopper.
none |
Definition at line 561 of file stopper.cpp.
References DANGEROUS_AMBIGS, DangerousAmbigs, dummy, string_variable, and WordToDebug.
Referenced by init_dj_debug().
00561 { 00562 VALUE dummy; 00563 00564 string_variable (DangerousAmbigs, "DangerousAmbigs", DANGEROUS_AMBIGS); 00565 string_variable (WordToDebug, "WordToDebug", ""); 00566 00567 MakeNonDictCertainty(); 00568 MakeRejectCertaintyOffset(); 00569 MakeSmallWordSize(); 00570 MakeCertaintyPerChar(); 00571 MakeCertaintyVariation(); 00572 MakeStopperDebugLevel(); 00573 MakeAmbigThresholdGain(); 00574 MakeAmbigThresholdOffset(); 00575 } /* InitStopperVars */
Compares Choice to the best raw (non-dict) choice so far; if new choice is better, best raw choice is updated.
Choice | New raw choice for current word | |
AdjustFactor | Adjustment factor which was applied to choice | |
Certainties | Certainties for each char in new choice |
Definition at line 623 of file stopper.cpp.
References BestRawChoice, ChoiceSameAs(), class_probability, KeepWordChoices, memfree(), NewViableChoice(), VIABLE_CHOICE_STRUCT::Rating, and ReplaceDuplicateChoice().
Referenced by permute_top_choice().
00623 { 00624 if (!KeepWordChoices) 00625 return; 00626 00627 if (!BestRawChoice) 00628 BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties); 00629 else if (class_probability (Choice) < BestRawChoice->Rating) { 00630 if (ChoiceSameAs (Choice, BestRawChoice)) 00631 ReplaceDuplicateChoice(BestRawChoice, Choice, AdjustFactor, Certainties); 00632 else { 00633 memfree(BestRawChoice); 00634 BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties); 00635 } 00636 } 00637 } /* LogNewRawChoice */
void LogNewSegmentation | ( | PIECES_STATE | BlobWidth | ) |
Updates the blob widths in CurrentSegmentation to be the same as provided in BlobWidth.
BlobWidth | number of chunks in each blob in segmentation |
Definition at line 650 of file stopper.cpp.
References CurrentSegmentation.
Referenced by evaluate_state().
00650 { 00651 BLOB_WIDTH *Segmentation; 00652 00653 for (Segmentation = CurrentSegmentation; *BlobWidth != 0; 00654 BlobWidth++, Segmentation++) 00655 *Segmentation = *BlobWidth; 00656 *Segmentation = 0; 00657 00658 } /* LogNewSegmentation */
void LogNewSplit | ( | int | Blob | ) |
Adds one chunk to the specified blob for each choice in BestChoices and for the BestRawChoice.
Blob | index of blob that was split |
Definition at line 674 of file stopper.cpp.
References AddNewChunk(), BestChoices, BestRawChoice, first, and iterate.
Referenced by improve_by_chopping().
00674 { 00675 LIST Choices; 00676 00677 if (BestRawChoice) { 00678 AddNewChunk(BestRawChoice, Blob); 00679 } 00680 00681 Choices = BestChoices; 00682 iterate(Choices) { 00683 AddNewChunk ((VIABLE_CHOICE) first (Choices), Blob); 00684 } 00685 00686 } /* LogNewSplit */
Adds Choice to BestChoices if the adjusted certainty for Choice is within a reasonable range of the best choice in BestChoices.
Choice | new choice for current word | |
AdjustFactor | adjustment factor which was applied to choice | |
Certainties | certainties for each char in new choice |
Definition at line 706 of file stopper.cpp.
References AmbigThreshold, BestCertainty, BestChoices, BestFactor, BestRating, ChoiceSameAs(), class_certainty, class_probability, CmpChoiceRatings(), count(), delete_d(), destroy_nodes(), Efree(), first, is_same_node(), iterate, KeepWordChoices, NewViableChoice(), NIL, nth_cell(), NULL, PrintViableChoice(), ReplaceDuplicateChoice(), rest, s_adjoin(), set_rest, and StopperDebugLevel.
Referenced by adjust_non_word(), adjust_number(), and adjust_word().
00707 { 00708 VIABLE_CHOICE NewChoice; 00709 LIST Choices; 00710 FLOAT32 Threshold; 00711 00712 if (!KeepWordChoices) 00713 return; 00714 00715 /* throw out obviously bad choices to save some work */ 00716 if (BestChoices != NIL) { 00717 Threshold = AmbigThreshold (BestFactor (BestChoices), AdjustFactor); 00718 if (Threshold > -AmbigThresholdOffset) 00719 Threshold = -AmbigThresholdOffset; 00720 if (class_certainty (Choice) - BestCertainty (BestChoices) < Threshold) 00721 return; 00722 } 00723 00724 /* see if a choice with the same text string has already been found */ 00725 NewChoice = NULL; 00726 Choices = BestChoices; 00727 iterate(Choices) { 00728 if (ChoiceSameAs (Choice, (VIABLE_CHOICE) first (Choices))) 00729 if (class_probability (Choice) < BestRating (Choices)) 00730 NewChoice = (VIABLE_CHOICE) first (Choices); 00731 else 00732 return; 00733 } 00734 00735 if (NewChoice) { 00736 ReplaceDuplicateChoice(NewChoice, Choice, AdjustFactor, Certainties); 00737 BestChoices = delete_d (BestChoices, NewChoice, is_same_node); 00738 } 00739 else { 00740 NewChoice = NewViableChoice (Choice, AdjustFactor, Certainties); 00741 } 00742 00743 BestChoices = s_adjoin (BestChoices, NewChoice, CmpChoiceRatings); 00744 if (StopperDebugLevel >= 2) 00745 PrintViableChoice (stdout, "New Word Choice: ", NewChoice); 00746 if (count (BestChoices) > tessedit_truncate_wordchoice_log) { 00747 Choices = 00748 (LIST) nth_cell (BestChoices, tessedit_truncate_wordchoice_log); 00749 destroy_nodes (rest (Choices), Efree); 00750 set_rest(Choices, NIL); 00751 } 00752 00753 } /* LogNewWordChoice */
int NoDangerousAmbig | ( | const char * | Word, | |
DANGERR * | fixpt | |||
) |
Definition at line 776 of file stopper.cpp.
References AmbigFor, AmbigsFound(), FALSE, FillAmbigTable(), DANGERR::index, MAX_WERD_SIZE, NULL, and TRUE.
Referenced by AcceptableChoice(), and word_adaptable().
00776 { 00777 00778 char NewWord[MAX_WERD_SIZE]; 00779 char *NextNewChar; 00780 int bad_index = 0; 00781 00782 if (!AmbigFor) 00783 AmbigFor = FillAmbigTable (); 00784 00785 NextNewChar = NewWord; 00786 while (*Word) 00787 if (AmbigsFound (NewWord, NextNewChar, Word + 1, AmbigFor[*Word], fixpt)) { 00788 if (fixpt != NULL) 00789 fixpt->index = bad_index; 00790 return (FALSE); 00791 } 00792 else { 00793 *NextNewChar++ = *Word++; 00794 bad_index++; 00795 } 00796 00797 return (TRUE); 00798 00799 } /* NoDangerousAmbig */
void SettupStopperPass1 | ( | ) |
Performs any settup of stopper variables that is needed in preparation for the first pass.
none |
Definition at line 822 of file stopper.cpp.
References RejectOffset.
Referenced by SettupPass1().
00822 { 00823 RejectOffset = 0.0; 00824 } /* SettupStopperPass1 */
void SettupStopperPass2 | ( | ) |
Performs any settup of stopper variables that is needed in preparation for the second pass.
none |
Definition at line 838 of file stopper.cpp.
References RejectCertaintyOffset, and RejectOffset.
Referenced by SettupPass2().
00838 { 00839 RejectOffset = RejectCertaintyOffset; 00840 } /* SettupStopperPass2 */
float CertaintyPerChar |
Certainty to add for each dict char above SmallWordSize (2), defaults to -0.50
Referenced by AcceptableChoice(), AcceptableResult(), and set_tess_tweak_vars().
flag used to disable accumulation of word choices during compound word permutation
Definition at line 159 of file stopper.cpp.
Referenced by LogNewRawChoice(), and LogNewWordChoice().
float NonDictCertainty |
Certainty threshold for non-dict words, defaults to -2.50
Referenced by AcceptableChoice(), AcceptableResult(), set_tess_tweak_vars(), and UniformCertainties().
float RejectCertaintyOffset |
Reject certainty offset, defaults to 1.0
Referenced by set_tess_tweak_vars(), and SettupStopperPass2().
Stopper debug level, defaults to 0
Referenced by AcceptableChoice(), AcceptableResult(), AmbigsFound(), DebugWordChoices(), LogNewWordChoice(), and UniformCertainties().