wordrec/wordclass.cpp

Go to the documentation of this file.
00001 
00020 /*----------------------------------------------------------------------
00021           I N C L U D E S
00022 ----------------------------------------------------------------------*/
00023 #include <stdio.h>
00024 #ifdef __UNIX__
00025 #include <assert.h>
00026 #endif
00027 
00028 #include "wordclass.h"
00029 #include "fxid.h"
00030 #include "tordvars.h"
00031 #include "associate.h"
00032 #include "render.h"
00033 #include "metrics.h"
00034 #include "matchtab.h"
00035 //#include "tfacepp.h"
00036 #include "permute.h"
00037 #include "context.h"
00038 #include "badwords.h"
00039 #include "callcpp.h"
00040 
00041 extern TBLOB *newblob(); 
00042 
00043 /*----------------------------------------------------------------------
00044             Variables
00045 ----------------------------------------------------------------------*/
00046 INT16 first_pass;
00047 
00048 /*----------------------------------------------------------------------
00049           C o n s t a n t s
00050 ----------------------------------------------------------------------*/
00051 
00053 #define BOLD_ON              "&dB(s3B"
00054 #define BOLD_OFF             "&d@(s0B"
00055 #define UNDERLINE_ON         "&dD"
00056 #define UNDERLINE_OFF        "&d@"
LIST call_matcher(                  //call a matcher
                  TBLOB *ptblob,    //previous
                  TBLOB *tessblob,  //blob to match
                  TBLOB *ntblob,    //next
                  void *,           //unused parameter
                  TEXTROW *         //always null anyway
                 );

/*----------------------------------------------------------------------
          F u n c t i o n s
----------------------------------------------------------------------*/
/* ================== */
00085 CHOICES classify_blob(TBLOB *pblob,
                      TBLOB *blob,
                      TBLOB *nblob,
                      TEXTROW *row,
                      int fx,
                      const char *string,
                      C_COL color,
                      STATE *this_state,
                      STATE *best_state,
                      INT32 pass,
                      INT32 blob_index) {
  CHOICES rating;
  INT32 old_index;

  chars_classified++;            /* Global value */
  if (blob_skip)
    return (NIL);

#ifndef GRAPHICS_DISABLED
  if (display_all_blobs)
    display_blob(blob, color); 
#endif
  rating = get_match (blob);
  if (rating == NIL) {
    if (pass) {
      old_index = blob_index;
                                 //?cast to int*
      blob_type = compare_states (best_state, this_state, (int *) &blob_index);
      blob_answer = word_answer[blob_index];
      if (blob_answer < '!')
        fprintf (matcher_fp,
          "Bad compare states: best state=0x%x%x, this=0x%x%x, bits="
00086           INT32FORMAT ", index=" INT32FORMAT ", outdex="
00087           INT32FORMAT ", word=%s\n", best_state->part1,
00088           best_state->part2, this_state->part1, this_state->part2,
00089           bits_in_states, old_index, blob_index, word_answer);
00090     }
00091     else
00092       blob_type = 0;
00093     rating = /*(*blob_matchers [fx]) */ (CHOICES) call_matcher (pblob, blob,
00094       nblob, NULL,
00095       row);
00096     put_match(blob, rating); 
00097   }
00098 
00099 #ifndef GRAPHICS_DISABLED
00100   if (display_ratings && string)
00101     print_choices(string, rating); 
00102 
00103   if (blob_pause)
00104     window_wait(blob_window); 
00105 #endif
00106 
00107   return (rating);
00108 }
00109 
00110 
00111 /* ================== */
00119 void write_text_files(TWERD *word,
00120                       char *raw_choice,
00121                       int same_row,
00122                       int good_word,
00123                       int firstpass) {
00124   int x;
00125   /* Raw output */
00126   if (write_raw_output) {
00127     if (same_row)
00128       fprintf (rawfile, "\n");
00129     if (raw_choice && strlen (raw_choice)) {
00130       fprintf (rawfile, "%s ", raw_choice);
00131       fflush(rawfile); 
00132     }
00133   }
00134   /* Text file output */
00135   if (write_output) {
00136     if (same_row)
00137       fprintf (textfile, "\n");
00138     if (word->guess && strlen (word->guess)) {
00139       for (x = 0; x < word->blanks; x++)
00140         fprintf (textfile, " ");
00141       if (!firstpass)
00142         fprintf(textfile, BOLD_ON); 
00143       if (!good_word)
00144         fprintf(textfile, UNDERLINE_ON); 
00145       fprintf (textfile, "%s", word->guess);
00146       if (!good_word)
00147         fprintf(textfile, UNDERLINE_OFF); 
00148       if (!firstpass)
00149         fprintf(textfile, BOLD_OFF); 
00150       fflush(textfile); 
00151     }
00152   }
00153   /* Global counters */
00154   character_count += (word->guess ? strlen (word->guess) : 0);
00155   word_count++;
00156 }
00157 
00158 
00159 /* ================== */
00166 void save_answer(TWERD *word,
00167                  TEXTROW *row,
00168                  A_CHOICE *best_choice,
00169                  A_CHOICE *raw_choice,
00170                  int firstpass) {
00171   static TEXTROW *last_row;
00172   char raw_answer[CHARS_PER_LINE];
00173   int answer_already;
00174   int good_answer;
00175   char *string = NULL;
00176 
00177   if (best_choice) {
00178     good_answer = AcceptableResult (best_choice, raw_choice);
00179     string = class_string (best_choice);
00180   }
00181   else {
00182     good_answer = FALSE;
00183   }
00184 
00185   if (firstpass) {
00186                                  /* First pass */
00187     if (string) {
00188                                  /* Got answer */
00189       add_document_word(best_choice); 
00190 
00191       word->guess = string;
00192       fix_quotes (word->guess);
00193       strcpy (raw_answer, word->guess);
00194 
00195       record_certainty (class_certainty (best_choice), 1);
00196 
00197       if (good_answer) {
00198         record_certainty (class_certainty (best_choice), 2);
00199         strcat (raw_answer, " ");
00200         strcat (raw_answer, class_string (raw_choice));
00201         word->guess = strsave (raw_answer);
00202         word->guess[strlen (string)] = 0;
00203         if (string) {
00204           strfree(string); 
00205           class_string (best_choice) = NULL;
00206         }
00207       }
00208       else {
00209                                  /* Not good enough */
00210         if (word->guess)
00211           strfree (word->guess);
00212         word->guess = NULL;
00213       }
00214     }
00215     else {
00216       word->guess = NULL;
00217       raw_answer[0] = '\0';
00218     }
00219   }
00220   else {
00221                                  /* Second pass */
00222     answer_already = (word->guess != NULL);
00223     if (answer_already) {
00224       write_text_files (word,
00225         &word->guess[strlen (word->guess) + 1],
00226         (row != last_row), TRUE, TRUE);
00227     }
00228     else {
00229                                  /* Required second pass */
00230       if (string) {
00231         if (!good_answer && tessedit_save_stats) {
00232           SaveBadWord (string, class_certainty (best_choice));
00233         }
00234         record_certainty (class_certainty (best_choice), 2);
00235         word->guess = class_string (best_choice);
00236         fix_quotes (word->guess);
00237         write_text_files (word, class_string (raw_choice),
00238           (row != last_row), good_answer, FALSE);
00239       }
00240     }
00241   }
00242   /* Word Display */
00243   if (display_text) {
00244     if (row != last_row)
00245       cprintf ("\n");
00246     if (word->guess && strlen (word->guess))
00247       cprintf ("%s ", word->guess);
00248     else
00249       cprintf ("%s ", raw_answer);
00250     fflush(stdout); 
00251   }
00252 
00253   last_row = row;
00254 }
00255 

Generated on Wed Feb 28 19:49:13 2007 for Tesseract by  doxygen 1.5.1