dict/context.cpp

Go to the documentation of this file.
00001 
00020 #include "context.h"
00021 #include "tordvars.h"
00022 #include "callcpp.h"
00023 
00024 #include <stdio.h>
00025 #include <ctype.h>
00026 #include <string.h>
00027 #include <math.h>
00028 
00029 /*----------------------------------------------------------------------
00030               V a r i a b l e s
00031 ----------------------------------------------------------------------*/
00033 static FILE *choice_file = NULL;
00034 
00035 /*----------------------------------------------------------------------
00036               F u n c t i o n s
00037 ----------------------------------------------------------------------*/
00041 void close_choices() { 
00042   if (choice_file)
00043     fclose(choice_file); 
00044 }
00045 
00046 
00050 void fix_quotes(char *str) { 
00051   int i;
00052   for (i = 0; i < strlen (str); i++) {
00053 
00054     if (((str[i] == '\'') || (str[i] == '`')) &&
00055     ((str[i + 1] == '\'') || (str[i + 1] == '`'))) {
00056       str[i] = '\"';
00057       strcpy (str + i + 1, str + i + 2);
00058     }
00059   }
00060 }
00061 
00062 
00066 int punctuation_ok(const char *word) { 
00067   int punctuation_types[5];
00068   int trailing = 0;
00069   int num_puncts = 0;
00070   register int x;
00071   register char ch;
00072 
00073   for (x = 0; x < 5; x++)
00074     punctuation_types[x] = 0;
00075 
00076   for (x = 0; x < strlen (word); x++) {
00077 
00078     if (isalpha (word[x])) {
00079       if (trailing &&
00080         !(isalpha (word[x - 1]) ||
00081         (word[x - 1] == '\'' &&
00082         (word[x] == 's' || word[x] == 'd' || word[x] == 'l')) ||
00083         (word[x - 1] == '-')))
00084         return (-1);
00085       trailing = 1;
00086     }
00087     else {
00088       ch = word[x];
00089 
00090       if (ch == '.' && trailing) {
00091         if (punctuation_types[0])
00092           return (-1);
00093         (punctuation_types[0])++;
00094       }
00095 
00096       else if (((ch == '{') || (ch == '[') || (ch == '(')) && !trailing) {
00097         if (punctuation_types[1])
00098           return (-1);
00099         (punctuation_types[1])++;
00100       }
00101 
00102       else if (((ch == '}') || (ch == ']') || (ch == ')')) && trailing) {
00103         if (punctuation_types[2])
00104           return (-1);
00105         (punctuation_types[2])++;
00106       }
00107 
00108       else if (((ch == ':') ||
00109         (ch == ';') ||
00110         (ch == '!') ||
00111       (ch == '-') || (ch == ',') || (ch == '?')) && trailing) {
00112         if (punctuation_types[3])
00113           return (-1);
00114         (punctuation_types[3])++;
00115         if (ch == '-')
00116           punctuation_types[3] = 0;
00117       }
00118 
00119       else if ((ch == '`') || (ch == '\"') || (ch == '\'')) {
00120         if ((word[x + 1] == '`') || (word[x + 1] == '\'')) {
00121           x++;
00122         }
00123         (punctuation_types[4])++;
00124         if (punctuation_types[4] > 2)
00125           return (-1);
00126       }
00127 
00128       else if (!isdigit (ch))
00129         return (-1);
00130     }
00131   }
00132 
00133   for (x = 0; x < 5; x++) {
00134     if (punctuation_types[x])
00135       num_puncts++;
00136   }
00137 
00138   return (num_puncts);
00139 }
00140 
00141 
00145 int case_ok(const char *word) { 
00150   static int case_state_table[6][4] = { {
00151                                  /*  0. Begining of word         */
00152     /*P  U  L  D            */
00153     /* -1 = Error with case */
00154       0, 1, 5, 4
00155     },
00156     {                            /*  1. After initial capital    */
00157       0, 3, 2, 4
00158     },
00159     {                            /*  2. After lower case         */
00160       0, -1, 2, -1
00161     },
00162     {                            /*  3. After upper case         */
00163       0, 3, -1, 4
00164     },
00165     {                            /*  4. After a digit            */
00166       0, -1, -1, 4
00167     },
00168     {                            /*  5. After initial lower case */
00169       5, -1, 2, -1
00170     },
00171   };
00172 
00173   register int last_state = 0;
00174   register int state = 0;
00175   register int x;
00176 
00177   for (x = 0; x < strlen (word); x++) {
00178 
00179     if (islower (word[x]))
00180       state = case_state_table[state][2];
00181     else if (isupper (word[x]))
00182       state = case_state_table[state][1];
00183     else if (isdigit (word[x]))
00184       state = case_state_table[state][3];
00185     else
00186       state = case_state_table[state][0];
00187 
00188     if (debug_3)
00189       cprintf ("Case state = %d, char = %c\n", state, word[x]);
00190 
00191     if (state == -1) {
00192                                  /* Handle ACCRONYMs */
00193       if (word[x] == 's' &&
00194         !isalpha (word[x + 1]) && !isdigit (word[x + 1]))
00195         state = last_state;
00196       else
00197         return (FALSE);
00198     }
00199 
00200     last_state = state;
00201   }
00202   return state != 5;             /*single lower is bad */
00203 }
00204 
00205 
00211 void write_choice_line() { 
00212   if (choice_file) {
00213     fprintf (choice_file, "\n");
00214     fflush(choice_file); 
00215   }
00216 }

Generated on Wed Feb 28 19:49:10 2007 for Tesseract by  doxygen 1.5.1