dict/permnum.cpp File Reference

#include "const.h"
#include "permnum.h"
#include "debug.h"
#include "permute.h"
#include "dawg.h"
#include "tordvars.h"
#include "stopper.h"
#include <math.h>
#include <ctype.h>

Go to the source code of this file.

Defines

Functions

Variables


Define Documentation

#define isleading ( ch   ) 

Value:

((ch == '{'  ) ||       \
   (ch == '['  ) ||       \
   (ch == '('  ) ||       \
   (ch == '#'  ) ||       \
   (ch == '@'  ) ||       \
   (ch == '$'  ))
Return non-zero if this is a leading type punctuation mark for the numeric grammar.

Definition at line 117 of file permnum.cpp.

Referenced by number_character_type().

#define isoperator ( ch   ) 

Value:

((ch == '*'  ) ||       \
   (ch == '+'  ) ||       \
   (ch == '-'  ) ||       \
   (ch == '/'  ) ||       \
   (ch == '.'  ) ||       \
   (ch == ':'  ) ||       \
   (ch == ','  ))
Return non-zero if this is a leading type punctuation mark for the numeric grammar.

Definition at line 143 of file permnum.cpp.

Referenced by number_character_type().

#define istrailing ( ch   ) 

Value:

((ch == '}'  ) ||       \
   (ch == ']'  ) ||       \
   (ch == ')'  ) ||       \
   (ch == ';'  ) ||       \
   (ch == ':'  ) ||       \
   (ch == ','  ) ||       \
   (ch == '.'  ) ||       \
   (ch == '%'  ))
Return non-zero if this is a leading type punctuation mark for the numeric grammar.

Definition at line 129 of file permnum.cpp.

Referenced by number_character_type().


Function Documentation

void adjust_number ( A_CHOICE best_choice,
float *  certainty_array 
)

Assign an adjusted value to a string that is a word; where value of that word choice is based on case and punctuation rules.

Definition at line 159 of file permnum.cpp.

References adjust_debug, class_probability, class_string, cprintf(), good_number, LogNewWordChoice(), ok_number, pure_number(), and RATING_PAD.

Referenced by append_number_choices().

00159                                                                   { 
00160   float adjust_factor;
00161 
00162   if (adjust_debug)
00163     cprintf ("Number: %s %4.2f ",
00164       class_string (best_choice), class_probability (best_choice));
00165 
00166   class_probability (best_choice) += RATING_PAD;
00167   if (pure_number (class_string (best_choice))) {
00168     class_probability (best_choice) *= good_number;
00169     adjust_factor = good_number;
00170     if (adjust_debug)
00171       cprintf (", %4.2f ", good_number);
00172   }
00173   else {
00174     class_probability (best_choice) *= ok_number;
00175     adjust_factor = ok_number;
00176     if (adjust_debug)
00177       cprintf (", N, %4.2f ", ok_number);
00178   }
00179 
00180   class_probability (best_choice) -= RATING_PAD;
00181   LogNewWordChoice(best_choice, adjust_factor, certainty_array);
00182   if (adjust_debug)
00183     cprintf (" --> %4.2f\n", class_probability (best_choice));
00184 }

void append_number_choices ( int  state,
char *  word,
CHOICES_LIST  choices,
int  char_index,
A_CHOICE this_choice,
float *  limit,
float  rating,
float  certainty,
float *  certainty_array,
CHOICES result 
)

Check to see whether or not the next choice is worth appending to the string being generated; if so then keep going deeper into the word.

Definition at line 192 of file permnum.cpp.

References adjust_number(), array_count, best_probability, class_certainty, class_probability, class_string, cprintf(), FALSE, first, free_choice(), JOIN_ON, kStateShift, min, new_choice(), number_debug, NUMBER_PERM, number_permute(), number_state_change(), pop_off, push_on, and TRUE.

Referenced by number_permute().

00201                                             {
00202   int word_ending = FALSE;
00203   int x;
00204 
00205   if (char_index == (array_count (choices) - 1))
00206     word_ending = TRUE;
00207 
00208   word[char_index] = class_string (this_choice)[0];
00209   word[char_index + 1] = '\0';
00210   if (word[char_index] == '\0')
00211     word[char_index] = ' ';
00212   certainty_array[char_index] = class_certainty (this_choice);
00213 
00214   rating += class_probability (this_choice);
00215   certainty = min (class_certainty (this_choice), certainty);
00216 
00217   if (rating < *limit) {
00218 
00219     state = number_state_change (state, word + char_index);
00220     if (number_debug)
00221       cprintf ("%-20s prob=%4.2f  state=%d\n", word, rating, state);
00222 
00223     if (state != -1) {
00224 
00225       if ((state >> kStateShift) == 3 &&
00226           char_index + 3 < array_count (choices)) {
00227         return;
00228       }
00229 
00230       if (word_ending) {
00231         for (x = 0; x <= char_index; x++) {
00232           if (isdigit (word[x])) {
00233             if (number_debug)
00234               cprintf ("new choice = %s\n", word);
00235             push_on (*result, new_choice (word, rating, certainty,
00236               -1, NUMBER_PERM));
00237             adjust_number ((A_CHOICE *) first (*result),
00238               certainty_array);
00239             if (best_probability (*result) > *limit) {
00240               free_choice (first (*result));
00241               pop_off(*result);
00242             }
00243             else {
00244               *limit = best_probability (*result);
00245               break;
00246             }
00247           }
00248         }
00249       }
00250       else {
00251         JOIN_ON (*result,
00252           number_permute (state, choices, char_index + 1, limit,
00253           word, rating, certainty,
00254           certainty_array));
00255       }
00256     }
00257   }
00258   else {
00259     if (number_debug)
00260       cprintf ("pruned word (%s, rating=%4.2f, limit=%4.2f)\n",
00261         word, rating, *limit);
00262   }
00263 }

void init_permnum (  ) 

Initialize anything that needs to be set up for the permute functions.

Definition at line 270 of file permnum.cpp.

Referenced by init_permute().

00270                     { 
00271   make_good_number(); 
00272   make_ok_number(); 
00273   make_number_debug(); 
00274   make_number_depth(); 
00275 }

int number_character_type ( char  ch,
int  state 
)

Decide which type of a character (with regard to the numeric state table) we are looking at.

Definition at line 283 of file permnum.cpp.

References allowed_char_strs, isleading, isoperator, istrailing, and NULL.

Referenced by number_state_change().

00285                                      {
00286   char lower_char = tolower (ch);
00287 
00288   if (isalpha (ch)) {
00289     if (state < 4 && strchr (allowed_char_strs[0], lower_char) != NULL)
00290       return 5;
00291     else if (state == 4
00292       && strchr (allowed_char_strs[1], lower_char) != NULL)
00293       return 6;
00294     else if (state == 5
00295       && strchr (allowed_char_strs[2], lower_char) != NULL)
00296       return 7;
00297     return 3;
00298   }
00299   else if (isdigit (ch))
00300     return (1);
00301   else if (isoperator (ch))
00302     return (2);
00303   else if (istrailing (ch))
00304     return (4);
00305   else if (isleading (ch))
00306     return (0);
00307   else
00308     return (-1);
00309 }

CHOICES number_permute ( int  state,
CHOICES_LIST  choices,
int  char_index,
float *  limit,
char *  word,
float  rating,
float  certainty,
float *  certainty_array 
)

Permute all the valid string that match the 'grammar' of numbers.

The valid syntax for numbers is encoded in a state table. The permuter uses this state table to enumerate all the string that can be produced using the input choices.

Definition at line 370 of file permnum.cpp.

References append_number_choices(), array_count, array_index, cprintf(), first, iterate_list, NIL, number_debug, and print_choices().

Referenced by append_number_choices(), and number_permute_and_select().

00377                                                {
00378   CHOICES result = NIL;
00379   CHOICES c;
00380   int depth = 0;
00381 
00382   if (number_debug) {
00383     cprintf ("number_permute (state=%d, char_index=%d, limit=%4.2f, ",
00384       state, char_index, *limit);
00385     cprintf ("word=%s, rating=%4.2f, certainty=%4.2f)\n",
00386       word, rating, certainty);
00387   }
00388   if (char_index < array_count (choices)) {
00389     iterate_list (c, (CHOICES) array_index (choices, char_index)) {
00390       if (depth++ < number_depth)
00391         append_number_choices (state, word, choices, char_index,
00392           (A_CHOICE *) first (c), limit, rating,
00393           certainty, certainty_array, &result);
00394     }
00395   }
00396   if (result && number_debug == 1)
00397     print_choices ("number_permute:", result);
00398   return (result);
00399 }

A_CHOICE* number_permute_and_select ( CHOICES_LIST  char_choices,
float  rating_limit 
)

Permute all the possible valid numbers and adjust their ratings; save the best rating.

Definition at line 406 of file permnum.cpp.

References array_count, best_probability, class_probability, clone_choice, display_ratings, first, free_choice(), MAX_WERD_LENGTH, MAXFLOAT, new_choice(), NIL, NO_PERM, NULL, number_permute(), pop_off, and print_choices().

Referenced by permute_all().

00407                                                         {
00408   CHOICES result = NIL;
00409   char word[MAX_WERD_LENGTH + 1];
00410   float certainty_array[MAX_WERD_LENGTH + 1];
00411   float rating = rating_limit;
00412   A_CHOICE *best_choice;
00413 
00414   best_choice = new_choice (NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM);
00415 
00416   if (array_count (char_choices) <= MAX_WERD_LENGTH) {
00417     word[0] = '\0';
00418     result = number_permute (0, char_choices, 0, &rating,
00419       word, 0.0, 0.0, certainty_array);
00420 
00421     if (display_ratings && result)
00422       print_choices ("number_permuter", result);
00423 
00424     while (result != NIL) {
00425       if (best_probability (result) < class_probability (best_choice)) {
00426         clone_choice (best_choice, first (result));
00427       }
00428       free_choice (first (result));
00429       pop_off(result);
00430     }
00431   }
00432   return (best_choice);
00433 }

int number_state_change ( int  state,
const char *  word 
)

Execute a state transition according to the state table and additional rules.

Returns:
>0 if matched in number_state_table, \-1 if not a good word, <0 if last state\-3 chars in word make an allowable word

Definition at line 321 of file permnum.cpp.

References allowed_alpha_strs, kMaxRepeats, kRepeatMask, kStateShift, new_state(), NULL, number_character_type(), and number_state_table.

Referenced by append_number_choices(), and valid_number().

00322                                           {  //current char
00323   int char_type;                 //type of char
00324   int new_state;                 //state to return
00325   int old_state = state >> kStateShift;
00326   int repeats = state & kRepeatMask;
00327   int index;
00328   char copy_word[4];             //tolowered chars
00329 
00330   char_type = number_character_type (*word, old_state);
00331   if (char_type == -1)
00332     return -1;
00333   new_state = number_state_table[old_state][char_type];
00334   if (new_state == old_state) {
00335     ++repeats;
00336     if (repeats >= kMaxRepeats[old_state])
00337       return -1;
00338   } else {
00339     repeats = 0;
00340   }
00341   if (new_state >= 0)
00342     return (new_state << kStateShift) | repeats;
00343   if (new_state == -99)
00344     return -1;
00345 
00346   //now check to see if the last state-3 chars in the word
00347   //make an allowable word. For now only 3 letter words
00348   //are allowed
00349   if (old_state != 6)
00350     return -1;                   //only 3 letters now
00351   copy_word[0] = tolower (word[-3]);
00352   copy_word[1] = tolower (word[-2]);
00353   copy_word[2] = tolower (word[-1]);
00354   copy_word[3] = '\0';
00355   for (index = 0; allowed_alpha_strs[index] != NULL; index++) {
00356     if (strcmp (copy_word, allowed_alpha_strs[index]) == 0)
00357       return (-new_state) << kStateShift;
00358   }
00359   return -1;                     //not a good word
00360 }

int pure_number ( const char *  string  ) 

Check to see if this string is a pure number (one that does not end with alphabetic characters).

Definition at line 440 of file permnum.cpp.

References FALSE, and TRUE.

Referenced by AdaptableWord(), and adjust_number().

00440                                     { 
00441   int x;
00442 
00443   for (x = strlen (string) - 1; x >= 0; x--) {
00444     if (isdigit (string[x])) {
00445       return (TRUE);
00446     }
00447     else if (isalpha (string[x]))
00448       return (FALSE);
00449   }
00450   return (FALSE);
00451 }

int valid_number ( const char *  string  ) 

Determine if this string contains a valid number.

Returns:
TRUE if it is

Definition at line 459 of file permnum.cpp.

References FALSE, and number_state_change().

Referenced by AcceptableChoice(), and AdaptableWord().

00459                                      { 
00460   int state = 0;
00461   int char_index;
00462   int num_chars = strlen (string);
00463   int num_digits = 0;
00464 
00465   for (char_index = 0; char_index < num_chars; char_index++) {
00466 
00467     state = number_state_change (state, string + char_index);
00468     if (state == -1)
00469       return (FALSE);
00470     if (isdigit (string[char_index]))
00471       num_digits++;
00472   }
00473   return num_digits > num_chars - num_digits;
00474 }


Variable Documentation

const char* allowed_alpha_strs[] [static]

Initial value:

 {
  "jan", "feb", "mar", "apr", "may", "jun",
  "jul", "aug", "sep", "oct", "nov", "dec", NULL
}
Note:
File: permnum.cpp (Formerly permnum.c)
Author:
Mark Seaman, OCR Technology
Date:
Oct 16 14:37:00 1987 Jul 2 14:12:43 1991 (Mark Seaman) marks
 * (c) Copyright 1987, Hewlett-Packard Company.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.

Definition at line 36 of file permnum.cpp.

Referenced by number_state_change().

const char* allowed_char_strs[] [static]

Initial value:

 {
  "adfjmnos", "aceopu", "bcglnrptvy"
}
FIX: combinations of letters from 3 groups makes plausible words?

Is 'i' missing from this list because it has its own problems? And similar applies for 'w' because it looks like 2 'v's? (if so, why are 'm' & 'l' here?)

Used in number_character_type()

Definition at line 51 of file permnum.cpp.

Referenced by number_character_type().

const int kMaxRepeats[kNumStates]

Initial value:

 {
  3, 10, 3, 3, 3, 3, 3
}

Definition at line 91 of file permnum.cpp.

Referenced by number_state_change().

const int kNumStates = 7

FIX: Why 7? Why 'ldoat123'?

Definition at line 58 of file permnum.cpp.

const int kRepeatMask = (1 << kStateShift) - 1

Definition at line 89 of file permnum.cpp.

Referenced by number_state_change().

const int kStateShift = 4

Definition at line 88 of file permnum.cpp.

Referenced by append_number_choices(), and number_state_change().

int number_state_table[kNumStates][8] [static]

Initial value:

 { {
                                 
  
    0, 1, 1, -99, -99, 4, -99, -99
  },
  {                              
    -99, 1, 1, 3, 2, 4, 3, 3
  },
  {                              
    -99, -99, 1, -99, 2, -99, -99, -99
  },
  {                              
    -99, -99, 3, 3, 2, 3, 3, 3
  },
  {                              
    -99, -1, -1, -99, -2, -99, 5, -99
  },
  {                              
    -99, -1, -1, -99, -2, -99, -99, 6
  },
  {                              
    -99, -1, -1, -99, -2, -99, -99, -99
  }
}

Definition at line 60 of file permnum.cpp.

Referenced by number_state_change().


Generated on Wed Feb 28 19:49:22 2007 for Tesseract by  doxygen 1.5.1