00001
00020
00021
00022
00023 #include <stdio.h>
00024 #ifdef __UNIX__
00025 #include <assert.h>
00026 #endif
00027
00028 #include "wordclass.h"
00029 #include "fxid.h"
00030 #include "tordvars.h"
00031 #include "associate.h"
00032 #include "render.h"
00033 #include "metrics.h"
00034 #include "matchtab.h"
00035
00036 #include "permute.h"
00037 #include "context.h"
00038 #include "badwords.h"
00039 #include "callcpp.h"
00040
00041 extern TBLOB *newblob();
00042
00043
00044
00045
00046 INT16 first_pass;
00047
00048
00049
00050
00051
00053 #define BOLD_ON "&dB(s3B"
00054 #define BOLD_OFF "&d@(s0B"
00055 #define UNDERLINE_ON "&dD"
00056 #define UNDERLINE_OFF "&d@"
LIST call_matcher( //call a matcher
TBLOB *ptblob, //previous
TBLOB *tessblob, //blob to match
TBLOB *ntblob, //next
void *, //unused parameter
TEXTROW * //always null anyway
);
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
/* ================== */
00085 CHOICES classify_blob(TBLOB *pblob,
TBLOB *blob,
TBLOB *nblob,
TEXTROW *row,
int fx,
const char *string,
C_COL color,
STATE *this_state,
STATE *best_state,
INT32 pass,
INT32 blob_index) {
CHOICES rating;
INT32 old_index;
chars_classified++; /* Global value */
if (blob_skip)
return (NIL);
#ifndef GRAPHICS_DISABLED
if (display_all_blobs)
display_blob(blob, color);
#endif
rating = get_match (blob);
if (rating == NIL) {
if (pass) {
old_index = blob_index;
//?cast to int*
blob_type = compare_states (best_state, this_state, (int *) &blob_index);
blob_answer = word_answer[blob_index];
if (blob_answer < '!')
fprintf (matcher_fp,
"Bad compare states: best state=0x%x%x, this=0x%x%x, bits="
00086 INT32FORMAT ", index=" INT32FORMAT ", outdex="
00087 INT32FORMAT ", word=%s\n", best_state->part1,
00088 best_state->part2, this_state->part1, this_state->part2,
00089 bits_in_states, old_index, blob_index, word_answer);
00090 }
00091 else
00092 blob_type = 0;
00093 rating = (CHOICES) call_matcher (pblob, blob,
00094 nblob, NULL,
00095 row);
00096 put_match(blob, rating);
00097 }
00098
00099 #ifndef GRAPHICS_DISABLED
00100 if (display_ratings && string)
00101 print_choices(string, rating);
00102
00103 if (blob_pause)
00104 window_wait(blob_window);
00105 #endif
00106
00107 return (rating);
00108 }
00109
00110
00111
00119 void write_text_files(TWERD *word,
00120 char *raw_choice,
00121 int same_row,
00122 int good_word,
00123 int firstpass) {
00124 int x;
00125
00126 if (write_raw_output) {
00127 if (same_row)
00128 fprintf (rawfile, "\n");
00129 if (raw_choice && strlen (raw_choice)) {
00130 fprintf (rawfile, "%s ", raw_choice);
00131 fflush(rawfile);
00132 }
00133 }
00134
00135 if (write_output) {
00136 if (same_row)
00137 fprintf (textfile, "\n");
00138 if (word->guess && strlen (word->guess)) {
00139 for (x = 0; x < word->blanks; x++)
00140 fprintf (textfile, " ");
00141 if (!firstpass)
00142 fprintf(textfile, BOLD_ON);
00143 if (!good_word)
00144 fprintf(textfile, UNDERLINE_ON);
00145 fprintf (textfile, "%s", word->guess);
00146 if (!good_word)
00147 fprintf(textfile, UNDERLINE_OFF);
00148 if (!firstpass)
00149 fprintf(textfile, BOLD_OFF);
00150 fflush(textfile);
00151 }
00152 }
00153
00154 character_count += (word->guess ? strlen (word->guess) : 0);
00155 word_count++;
00156 }
00157
00158
00159
00166 void save_answer(TWERD *word,
00167 TEXTROW *row,
00168 A_CHOICE *best_choice,
00169 A_CHOICE *raw_choice,
00170 int firstpass) {
00171 static TEXTROW *last_row;
00172 char raw_answer[CHARS_PER_LINE];
00173 int answer_already;
00174 int good_answer;
00175 char *string = NULL;
00176
00177 if (best_choice) {
00178 good_answer = AcceptableResult (best_choice, raw_choice);
00179 string = class_string (best_choice);
00180 }
00181 else {
00182 good_answer = FALSE;
00183 }
00184
00185 if (firstpass) {
00186
00187 if (string) {
00188
00189 add_document_word(best_choice);
00190
00191 word->guess = string;
00192 fix_quotes (word->guess);
00193 strcpy (raw_answer, word->guess);
00194
00195 record_certainty (class_certainty (best_choice), 1);
00196
00197 if (good_answer) {
00198 record_certainty (class_certainty (best_choice), 2);
00199 strcat (raw_answer, " ");
00200 strcat (raw_answer, class_string (raw_choice));
00201 word->guess = strsave (raw_answer);
00202 word->guess[strlen (string)] = 0;
00203 if (string) {
00204 strfree(string);
00205 class_string (best_choice) = NULL;
00206 }
00207 }
00208 else {
00209
00210 if (word->guess)
00211 strfree (word->guess);
00212 word->guess = NULL;
00213 }
00214 }
00215 else {
00216 word->guess = NULL;
00217 raw_answer[0] = '\0';
00218 }
00219 }
00220 else {
00221
00222 answer_already = (word->guess != NULL);
00223 if (answer_already) {
00224 write_text_files (word,
00225 &word->guess[strlen (word->guess) + 1],
00226 (row != last_row), TRUE, TRUE);
00227 }
00228 else {
00229
00230 if (string) {
00231 if (!good_answer && tessedit_save_stats) {
00232 SaveBadWord (string, class_certainty (best_choice));
00233 }
00234 record_certainty (class_certainty (best_choice), 2);
00235 word->guess = class_string (best_choice);
00236 fix_quotes (word->guess);
00237 write_text_files (word, class_string (raw_choice),
00238 (row != last_row), good_answer, FALSE);
00239 }
00240 }
00241 }
00242
00243 if (display_text) {
00244 if (row != last_row)
00245 cprintf ("\n");
00246 if (word->guess && strlen (word->guess))
00247 cprintf ("%s ", word->guess);
00248 else
00249 cprintf ("%s ", raw_answer);
00250 fflush(stdout);
00251 }
00252
00253 last_row = row;
00254 }
00255