#include "const.h"
#include "permnum.h"
#include "debug.h"
#include "permute.h"
#include "dawg.h"
#include "tordvars.h"
#include "stopper.h"
#include <math.h>
#include <ctype.h>
Go to the source code of this file.
#define isleading | ( | ch | ) |
Value:
((ch == '{' ) || \ (ch == '[' ) || \ (ch == '(' ) || \ (ch == '#' ) || \ (ch == '@' ) || \ (ch == '$' ))
Definition at line 117 of file permnum.cpp.
Referenced by number_character_type().
#define isoperator | ( | ch | ) |
Value:
((ch == '*' ) || \ (ch == '+' ) || \ (ch == '-' ) || \ (ch == '/' ) || \ (ch == '.' ) || \ (ch == ':' ) || \ (ch == ',' ))
Definition at line 143 of file permnum.cpp.
Referenced by number_character_type().
#define istrailing | ( | ch | ) |
Value:
((ch == '}' ) || \ (ch == ']' ) || \ (ch == ')' ) || \ (ch == ';' ) || \ (ch == ':' ) || \ (ch == ',' ) || \ (ch == '.' ) || \ (ch == '%' ))
Definition at line 129 of file permnum.cpp.
Referenced by number_character_type().
void adjust_number | ( | A_CHOICE * | best_choice, | |
float * | certainty_array | |||
) |
Assign an adjusted value to a string that is a word; where value of that word choice is based on case and punctuation rules.
Definition at line 159 of file permnum.cpp.
References adjust_debug, class_probability, class_string, cprintf(), good_number, LogNewWordChoice(), ok_number, pure_number(), and RATING_PAD.
Referenced by append_number_choices().
00159 { 00160 float adjust_factor; 00161 00162 if (adjust_debug) 00163 cprintf ("Number: %s %4.2f ", 00164 class_string (best_choice), class_probability (best_choice)); 00165 00166 class_probability (best_choice) += RATING_PAD; 00167 if (pure_number (class_string (best_choice))) { 00168 class_probability (best_choice) *= good_number; 00169 adjust_factor = good_number; 00170 if (adjust_debug) 00171 cprintf (", %4.2f ", good_number); 00172 } 00173 else { 00174 class_probability (best_choice) *= ok_number; 00175 adjust_factor = ok_number; 00176 if (adjust_debug) 00177 cprintf (", N, %4.2f ", ok_number); 00178 } 00179 00180 class_probability (best_choice) -= RATING_PAD; 00181 LogNewWordChoice(best_choice, adjust_factor, certainty_array); 00182 if (adjust_debug) 00183 cprintf (" --> %4.2f\n", class_probability (best_choice)); 00184 }
void append_number_choices | ( | int | state, | |
char * | word, | |||
CHOICES_LIST | choices, | |||
int | char_index, | |||
A_CHOICE * | this_choice, | |||
float * | limit, | |||
float | rating, | |||
float | certainty, | |||
float * | certainty_array, | |||
CHOICES * | result | |||
) |
Check to see whether or not the next choice is worth appending to the string being generated; if so then keep going deeper into the word.
Definition at line 192 of file permnum.cpp.
References adjust_number(), array_count, best_probability, class_certainty, class_probability, class_string, cprintf(), FALSE, first, free_choice(), JOIN_ON, kStateShift, min, new_choice(), number_debug, NUMBER_PERM, number_permute(), number_state_change(), pop_off, push_on, and TRUE.
Referenced by number_permute().
00201 { 00202 int word_ending = FALSE; 00203 int x; 00204 00205 if (char_index == (array_count (choices) - 1)) 00206 word_ending = TRUE; 00207 00208 word[char_index] = class_string (this_choice)[0]; 00209 word[char_index + 1] = '\0'; 00210 if (word[char_index] == '\0') 00211 word[char_index] = ' '; 00212 certainty_array[char_index] = class_certainty (this_choice); 00213 00214 rating += class_probability (this_choice); 00215 certainty = min (class_certainty (this_choice), certainty); 00216 00217 if (rating < *limit) { 00218 00219 state = number_state_change (state, word + char_index); 00220 if (number_debug) 00221 cprintf ("%-20s prob=%4.2f state=%d\n", word, rating, state); 00222 00223 if (state != -1) { 00224 00225 if ((state >> kStateShift) == 3 && 00226 char_index + 3 < array_count (choices)) { 00227 return; 00228 } 00229 00230 if (word_ending) { 00231 for (x = 0; x <= char_index; x++) { 00232 if (isdigit (word[x])) { 00233 if (number_debug) 00234 cprintf ("new choice = %s\n", word); 00235 push_on (*result, new_choice (word, rating, certainty, 00236 -1, NUMBER_PERM)); 00237 adjust_number ((A_CHOICE *) first (*result), 00238 certainty_array); 00239 if (best_probability (*result) > *limit) { 00240 free_choice (first (*result)); 00241 pop_off(*result); 00242 } 00243 else { 00244 *limit = best_probability (*result); 00245 break; 00246 } 00247 } 00248 } 00249 } 00250 else { 00251 JOIN_ON (*result, 00252 number_permute (state, choices, char_index + 1, limit, 00253 word, rating, certainty, 00254 certainty_array)); 00255 } 00256 } 00257 } 00258 else { 00259 if (number_debug) 00260 cprintf ("pruned word (%s, rating=%4.2f, limit=%4.2f)\n", 00261 word, rating, *limit); 00262 } 00263 }
void init_permnum | ( | ) |
Initialize anything that needs to be set up for the permute functions.
Definition at line 270 of file permnum.cpp.
Referenced by init_permute().
00270 { 00271 make_good_number(); 00272 make_ok_number(); 00273 make_number_debug(); 00274 make_number_depth(); 00275 }
int number_character_type | ( | char | ch, | |
int | state | |||
) |
Decide which type of a character (with regard to the numeric state table) we are looking at.
Definition at line 283 of file permnum.cpp.
References allowed_char_strs, isleading, isoperator, istrailing, and NULL.
Referenced by number_state_change().
00285 { 00286 char lower_char = tolower (ch); 00287 00288 if (isalpha (ch)) { 00289 if (state < 4 && strchr (allowed_char_strs[0], lower_char) != NULL) 00290 return 5; 00291 else if (state == 4 00292 && strchr (allowed_char_strs[1], lower_char) != NULL) 00293 return 6; 00294 else if (state == 5 00295 && strchr (allowed_char_strs[2], lower_char) != NULL) 00296 return 7; 00297 return 3; 00298 } 00299 else if (isdigit (ch)) 00300 return (1); 00301 else if (isoperator (ch)) 00302 return (2); 00303 else if (istrailing (ch)) 00304 return (4); 00305 else if (isleading (ch)) 00306 return (0); 00307 else 00308 return (-1); 00309 }
CHOICES number_permute | ( | int | state, | |
CHOICES_LIST | choices, | |||
int | char_index, | |||
float * | limit, | |||
char * | word, | |||
float | rating, | |||
float | certainty, | |||
float * | certainty_array | |||
) |
Permute all the valid string that match the 'grammar' of numbers.
The valid syntax for numbers is encoded in a state table. The permuter uses this state table to enumerate all the string that can be produced using the input choices.
Definition at line 370 of file permnum.cpp.
References append_number_choices(), array_count, array_index, cprintf(), first, iterate_list, NIL, number_debug, and print_choices().
Referenced by append_number_choices(), and number_permute_and_select().
00377 { 00378 CHOICES result = NIL; 00379 CHOICES c; 00380 int depth = 0; 00381 00382 if (number_debug) { 00383 cprintf ("number_permute (state=%d, char_index=%d, limit=%4.2f, ", 00384 state, char_index, *limit); 00385 cprintf ("word=%s, rating=%4.2f, certainty=%4.2f)\n", 00386 word, rating, certainty); 00387 } 00388 if (char_index < array_count (choices)) { 00389 iterate_list (c, (CHOICES) array_index (choices, char_index)) { 00390 if (depth++ < number_depth) 00391 append_number_choices (state, word, choices, char_index, 00392 (A_CHOICE *) first (c), limit, rating, 00393 certainty, certainty_array, &result); 00394 } 00395 } 00396 if (result && number_debug == 1) 00397 print_choices ("number_permute:", result); 00398 return (result); 00399 }
A_CHOICE* number_permute_and_select | ( | CHOICES_LIST | char_choices, | |
float | rating_limit | |||
) |
Permute all the possible valid numbers and adjust their ratings; save the best rating.
Definition at line 406 of file permnum.cpp.
References array_count, best_probability, class_probability, clone_choice, display_ratings, first, free_choice(), MAX_WERD_LENGTH, MAXFLOAT, new_choice(), NIL, NO_PERM, NULL, number_permute(), pop_off, and print_choices().
Referenced by permute_all().
00407 { 00408 CHOICES result = NIL; 00409 char word[MAX_WERD_LENGTH + 1]; 00410 float certainty_array[MAX_WERD_LENGTH + 1]; 00411 float rating = rating_limit; 00412 A_CHOICE *best_choice; 00413 00414 best_choice = new_choice (NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM); 00415 00416 if (array_count (char_choices) <= MAX_WERD_LENGTH) { 00417 word[0] = '\0'; 00418 result = number_permute (0, char_choices, 0, &rating, 00419 word, 0.0, 0.0, certainty_array); 00420 00421 if (display_ratings && result) 00422 print_choices ("number_permuter", result); 00423 00424 while (result != NIL) { 00425 if (best_probability (result) < class_probability (best_choice)) { 00426 clone_choice (best_choice, first (result)); 00427 } 00428 free_choice (first (result)); 00429 pop_off(result); 00430 } 00431 } 00432 return (best_choice); 00433 }
int number_state_change | ( | int | state, | |
const char * | word | |||
) |
Execute a state transition according to the state table and additional rules.
Definition at line 321 of file permnum.cpp.
References allowed_alpha_strs, kMaxRepeats, kRepeatMask, kStateShift, new_state(), NULL, number_character_type(), and number_state_table.
Referenced by append_number_choices(), and valid_number().
00322 { //current char 00323 int char_type; //type of char 00324 int new_state; //state to return 00325 int old_state = state >> kStateShift; 00326 int repeats = state & kRepeatMask; 00327 int index; 00328 char copy_word[4]; //tolowered chars 00329 00330 char_type = number_character_type (*word, old_state); 00331 if (char_type == -1) 00332 return -1; 00333 new_state = number_state_table[old_state][char_type]; 00334 if (new_state == old_state) { 00335 ++repeats; 00336 if (repeats >= kMaxRepeats[old_state]) 00337 return -1; 00338 } else { 00339 repeats = 0; 00340 } 00341 if (new_state >= 0) 00342 return (new_state << kStateShift) | repeats; 00343 if (new_state == -99) 00344 return -1; 00345 00346 //now check to see if the last state-3 chars in the word 00347 //make an allowable word. For now only 3 letter words 00348 //are allowed 00349 if (old_state != 6) 00350 return -1; //only 3 letters now 00351 copy_word[0] = tolower (word[-3]); 00352 copy_word[1] = tolower (word[-2]); 00353 copy_word[2] = tolower (word[-1]); 00354 copy_word[3] = '\0'; 00355 for (index = 0; allowed_alpha_strs[index] != NULL; index++) { 00356 if (strcmp (copy_word, allowed_alpha_strs[index]) == 0) 00357 return (-new_state) << kStateShift; 00358 } 00359 return -1; //not a good word 00360 }
int pure_number | ( | const char * | string | ) |
Check to see if this string is a pure number (one that does not end with alphabetic characters).
Definition at line 440 of file permnum.cpp.
Referenced by AdaptableWord(), and adjust_number().
00440 { 00441 int x; 00442 00443 for (x = strlen (string) - 1; x >= 0; x--) { 00444 if (isdigit (string[x])) { 00445 return (TRUE); 00446 } 00447 else if (isalpha (string[x])) 00448 return (FALSE); 00449 } 00450 return (FALSE); 00451 }
int valid_number | ( | const char * | string | ) |
Determine if this string contains a valid number.
Definition at line 459 of file permnum.cpp.
References FALSE, and number_state_change().
Referenced by AcceptableChoice(), and AdaptableWord().
00459 { 00460 int state = 0; 00461 int char_index; 00462 int num_chars = strlen (string); 00463 int num_digits = 0; 00464 00465 for (char_index = 0; char_index < num_chars; char_index++) { 00466 00467 state = number_state_change (state, string + char_index); 00468 if (state == -1) 00469 return (FALSE); 00470 if (isdigit (string[char_index])) 00471 num_digits++; 00472 } 00473 return num_digits > num_chars - num_digits; 00474 }
const char* allowed_alpha_strs[] [static] |
Initial value:
{ "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", NULL }
* (c) Copyright 1987, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
Definition at line 36 of file permnum.cpp.
Referenced by number_state_change().
const char* allowed_char_strs[] [static] |
Initial value:
{ "adfjmnos", "aceopu", "bcglnrptvy" }
Is 'i' missing from this list because it has its own problems? And similar applies for 'w' because it looks like 2 'v's? (if so, why are 'm' & 'l' here?)
Used in number_character_type()
Definition at line 51 of file permnum.cpp.
Referenced by number_character_type().
const int kMaxRepeats[kNumStates] |
Initial value:
{ 3, 10, 3, 3, 3, 3, 3 }
Definition at line 91 of file permnum.cpp.
Referenced by number_state_change().
const int kNumStates = 7 |
const int kRepeatMask = (1 << kStateShift) - 1 |
const int kStateShift = 4 |
Definition at line 88 of file permnum.cpp.
Referenced by append_number_choices(), and number_state_change().
int number_state_table[kNumStates][8] [static] |
Initial value:
{ { 0, 1, 1, -99, -99, 4, -99, -99 }, { -99, 1, 1, 3, 2, 4, 3, 3 }, { -99, -99, 1, -99, 2, -99, -99, -99 }, { -99, -99, 3, 3, 2, 3, 3, 3 }, { -99, -1, -1, -99, -2, -99, 5, -99 }, { -99, -1, -1, -99, -2, -99, -99, 6 }, { -99, -1, -1, -99, -2, -99, -99, -99 } }
Definition at line 60 of file permnum.cpp.
Referenced by number_state_change().