00001
00019
00020
00021
00022 #include "const.h"
00023 #include "permnum.h"
00024 #include "debug.h"
00025 #include "permute.h"
00026 #include "dawg.h"
00027 #include "tordvars.h"
00028 #include "stopper.h"
00029
00030 #include <math.h>
00031 #include <ctype.h>
00032
00033
00034
00035
00036 static const char *allowed_alpha_strs[] = {
00037 "jan", "feb", "mar", "apr", "may", "jun",
00038 "jul", "aug", "sep", "oct", "nov", "dec", NULL
00039 };
00040
00051 static const char *allowed_char_strs[] = {
00052 "adfjmnos", "aceopu", "bcglnrptvy"
00053 };
00054
00058 const int kNumStates = 7;
00059
00060 static int number_state_table[kNumStates][8] = { {
00061
00062
00063 0, 1, 1, -99, -99, 4, -99, -99
00064 },
00065 {
00066 -99, 1, 1, 3, 2, 4, 3, 3
00067 },
00068 {
00069 -99, -99, 1, -99, 2, -99, -99, -99
00070 },
00071 {
00072 -99, -99, 3, 3, 2, 3, 3, 3
00073 },
00074 {
00075 -99, -1, -1, -99, -2, -99, 5, -99
00076 },
00077 {
00078 -99, -1, -1, -99, -2, -99, -99, 6
00079 },
00080 {
00081 -99, -1, -1, -99, -2, -99, -99, -99
00082 }
00083 };
00084
00085
00086
00087
00088 const int kStateShift = 4;
00089 const int kRepeatMask = (1 << kStateShift) - 1;
00090
00091 const int kMaxRepeats[kNumStates] = {
00092 3, 10, 3, 3, 3, 3, 3
00093 };
00094
00097 make_float_var (good_number, GOOD_NUMBER, make_good_number,
00098 8, 15, set_good_number, "Good number adjustment");
00099
00100 make_float_var (ok_number, OK_NUMBER, make_ok_number,
00101 8, 16, set_ok_number, "Bad number adjustment");
00102
00103 make_toggle_var (number_debug, 0, make_number_debug,
00104 8, 23, set_number_debug, "Number debug");
00105
00106 make_int_var (number_depth, 3, make_number_depth,
00107 8, 24, set_number_depth, "Number depth");
00110
00111
00112
00117 #define isleading(ch) \
00118 ((ch == '{' ) || \
00119 (ch == '[' ) || \
00120 (ch == '(' ) || \
00121 (ch == '#' ) || \
00122 (ch == '@' ) || \
00123 (ch == '$' ))
00124
00129 #define istrailing(ch) \
00130 ((ch == '}' ) || \
00131 (ch == ']' ) || \
00132 (ch == ')' ) || \
00133 (ch == ';' ) || \
00134 (ch == ':' ) || \
00135 (ch == ',' ) || \
00136 (ch == '.' ) || \
00137 (ch == '%' ))
00138
00143 #define isoperator(ch) \
00144 ((ch == '*' ) || \
00145 (ch == '+' ) || \
00146 (ch == '-' ) || \
00147 (ch == '/' ) || \
00148 (ch == '.' ) || \
00149 (ch == ':' ) || \
00150 (ch == ',' ))
00151
00152
00153
00154
00159 void adjust_number(A_CHOICE *best_choice, float *certainty_array) {
00160 float adjust_factor;
00161
00162 if (adjust_debug)
00163 cprintf ("Number: %s %4.2f ",
00164 class_string (best_choice), class_probability (best_choice));
00165
00166 class_probability (best_choice) += RATING_PAD;
00167 if (pure_number (class_string (best_choice))) {
00168 class_probability (best_choice) *= good_number;
00169 adjust_factor = good_number;
00170 if (adjust_debug)
00171 cprintf (", %4.2f ", good_number);
00172 }
00173 else {
00174 class_probability (best_choice) *= ok_number;
00175 adjust_factor = ok_number;
00176 if (adjust_debug)
00177 cprintf (", N, %4.2f ", ok_number);
00178 }
00179
00180 class_probability (best_choice) -= RATING_PAD;
00181 LogNewWordChoice(best_choice, adjust_factor, certainty_array);
00182 if (adjust_debug)
00183 cprintf (" --> %4.2f\n", class_probability (best_choice));
00184 }
00185
00186
00192 void append_number_choices(int state,
00193 char *word,
00194 CHOICES_LIST choices,
00195 int char_index,
00196 A_CHOICE *this_choice,
00197 float *limit,
00198 float rating,
00199 float certainty,
00200 float *certainty_array,
00201 CHOICES *result) {
00202 int word_ending = FALSE;
00203 int x;
00204
00205 if (char_index == (array_count (choices) - 1))
00206 word_ending = TRUE;
00207
00208 word[char_index] = class_string (this_choice)[0];
00209 word[char_index + 1] = '\0';
00210 if (word[char_index] == '\0')
00211 word[char_index] = ' ';
00212 certainty_array[char_index] = class_certainty (this_choice);
00213
00214 rating += class_probability (this_choice);
00215 certainty = min (class_certainty (this_choice), certainty);
00216
00217 if (rating < *limit) {
00218
00219 state = number_state_change (state, word + char_index);
00220 if (number_debug)
00221 cprintf ("%-20s prob=%4.2f state=%d\n", word, rating, state);
00222
00223 if (state != -1) {
00224
00225 if ((state >> kStateShift) == 3 &&
00226 char_index + 3 < array_count (choices)) {
00227 return;
00228 }
00229
00230 if (word_ending) {
00231 for (x = 0; x <= char_index; x++) {
00232 if (isdigit (word[x])) {
00233 if (number_debug)
00234 cprintf ("new choice = %s\n", word);
00235 push_on (*result, new_choice (word, rating, certainty,
00236 -1, NUMBER_PERM));
00237 adjust_number ((A_CHOICE *) first (*result),
00238 certainty_array);
00239 if (best_probability (*result) > *limit) {
00240 free_choice (first (*result));
00241 pop_off(*result);
00242 }
00243 else {
00244 *limit = best_probability (*result);
00245 break;
00246 }
00247 }
00248 }
00249 }
00250 else {
00251 JOIN_ON (*result,
00252 number_permute (state, choices, char_index + 1, limit,
00253 word, rating, certainty,
00254 certainty_array));
00255 }
00256 }
00257 }
00258 else {
00259 if (number_debug)
00260 cprintf ("pruned word (%s, rating=%4.2f, limit=%4.2f)\n",
00261 word, rating, *limit);
00262 }
00263 }
00264
00265
00270 void init_permnum() {
00271 make_good_number();
00272 make_ok_number();
00273 make_number_debug();
00274 make_number_depth();
00275 }
00276
00277
00283 int number_character_type(
00284 char ch,
00285 int state) {
00286 char lower_char = tolower (ch);
00287
00288 if (isalpha (ch)) {
00289 if (state < 4 && strchr (allowed_char_strs[0], lower_char) != NULL)
00290 return 5;
00291 else if (state == 4
00292 && strchr (allowed_char_strs[1], lower_char) != NULL)
00293 return 6;
00294 else if (state == 5
00295 && strchr (allowed_char_strs[2], lower_char) != NULL)
00296 return 7;
00297 return 3;
00298 }
00299 else if (isdigit (ch))
00300 return (1);
00301 else if (isoperator (ch))
00302 return (2);
00303 else if (istrailing (ch))
00304 return (4);
00305 else if (isleading (ch))
00306 return (0);
00307 else
00308 return (-1);
00309 }
00310
00311
00321 int number_state_change(int state,
00322 const char *word) {
00323 int char_type;
00324 int new_state;
00325 int old_state = state >> kStateShift;
00326 int repeats = state & kRepeatMask;
00327 int index;
00328 char copy_word[4];
00329
00330 char_type = number_character_type (*word, old_state);
00331 if (char_type == -1)
00332 return -1;
00333 new_state = number_state_table[old_state][char_type];
00334 if (new_state == old_state) {
00335 ++repeats;
00336 if (repeats >= kMaxRepeats[old_state])
00337 return -1;
00338 } else {
00339 repeats = 0;
00340 }
00341 if (new_state >= 0)
00342 return (new_state << kStateShift) | repeats;
00343 if (new_state == -99)
00344 return -1;
00345
00346
00347
00348
00349 if (old_state != 6)
00350 return -1;
00351 copy_word[0] = tolower (word[-3]);
00352 copy_word[1] = tolower (word[-2]);
00353 copy_word[2] = tolower (word[-1]);
00354 copy_word[3] = '\0';
00355 for (index = 0; allowed_alpha_strs[index] != NULL; index++) {
00356 if (strcmp (copy_word, allowed_alpha_strs[index]) == 0)
00357 return (-new_state) << kStateShift;
00358 }
00359 return -1;
00360 }
00361
00362
00370 CHOICES number_permute(int state,
00371 CHOICES_LIST choices,
00372 int char_index,
00373 float *limit,
00374 char *word,
00375 float rating,
00376 float certainty,
00377 float *certainty_array) {
00378 CHOICES result = NIL;
00379 CHOICES c;
00380 int depth = 0;
00381
00382 if (number_debug) {
00383 cprintf ("number_permute (state=%d, char_index=%d, limit=%4.2f, ",
00384 state, char_index, *limit);
00385 cprintf ("word=%s, rating=%4.2f, certainty=%4.2f)\n",
00386 word, rating, certainty);
00387 }
00388 if (char_index < array_count (choices)) {
00389 iterate_list (c, (CHOICES) array_index (choices, char_index)) {
00390 if (depth++ < number_depth)
00391 append_number_choices (state, word, choices, char_index,
00392 (A_CHOICE *) first (c), limit, rating,
00393 certainty, certainty_array, &result);
00394 }
00395 }
00396 if (result && number_debug == 1)
00397 print_choices ("number_permute:", result);
00398 return (result);
00399 }
00400
00401
00406 A_CHOICE *number_permute_and_select(CHOICES_LIST char_choices,
00407 float rating_limit) {
00408 CHOICES result = NIL;
00409 char word[MAX_WERD_LENGTH + 1];
00410 float certainty_array[MAX_WERD_LENGTH + 1];
00411 float rating = rating_limit;
00412 A_CHOICE *best_choice;
00413
00414 best_choice = new_choice (NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM);
00415
00416 if (array_count (char_choices) <= MAX_WERD_LENGTH) {
00417 word[0] = '\0';
00418 result = number_permute (0, char_choices, 0, &rating,
00419 word, 0.0, 0.0, certainty_array);
00420
00421 if (display_ratings && result)
00422 print_choices ("number_permuter", result);
00423
00424 while (result != NIL) {
00425 if (best_probability (result) < class_probability (best_choice)) {
00426 clone_choice (best_choice, first (result));
00427 }
00428 free_choice (first (result));
00429 pop_off(result);
00430 }
00431 }
00432 return (best_choice);
00433 }
00434
00435
00440 int pure_number(const char *string) {
00441 int x;
00442
00443 for (x = strlen (string) - 1; x >= 0; x--) {
00444 if (isdigit (string[x])) {
00445 return (TRUE);
00446 }
00447 else if (isalpha (string[x]))
00448 return (FALSE);
00449 }
00450 return (FALSE);
00451 }
00452
00453
00459 int valid_number(const char *string) {
00460 int state = 0;
00461 int char_index;
00462 int num_chars = strlen (string);
00463 int num_digits = 0;
00464
00465 for (char_index = 0; char_index < num_chars; char_index++) {
00466
00467 state = number_state_change (state, string + char_index);
00468 if (state == -1)
00469 return (FALSE);
00470 if (isdigit (string[char_index]))
00471 num_digits++;
00472 }
00473 return num_digits > num_chars - num_digits;
00474 }