#include <stdio.h>
#include "wordclass.h"
#include "fxid.h"
#include "tordvars.h"
#include "associate.h"
#include "render.h"
#include "metrics.h"
#include "matchtab.h"
#include "permute.h"
#include "context.h"
#include "badwords.h"
#include "callcpp.h"
Go to the source code of this file.
#define BOLD_OFF "&d@(s0B" |
Definition at line 54 of file wordclass.cpp.
#define BOLD_ON "&dB(s3B" |
Doesn't look too portable
Definition at line 53 of file wordclass.cpp.
#define UNDERLINE_OFF "&d@" |
Definition at line 56 of file wordclass.cpp.
#define UNDERLINE_ON "&dD" |
Definition at line 55 of file wordclass.cpp.
Call a matcher.
ptblob | previous | |
tessblob | blob to match | |
ntblob | next |
Definition at line 350 of file tfacepp.cpp.
References append_choice(), BLOB_CHOICE::certainty(), BLOB_CHOICE::char_class(), BLOB_CHOICE::config(), make_ed_blob(), NULL, BLOB_CHOICE::rating(), ratings, tess_denorm, and tess_word.
00356 { 00357 PBLOB *pblob; //converted blob 00358 PBLOB *blob; //converted blob 00359 PBLOB *nblob; //converted blob 00360 LIST result; //tess output 00361 BLOB_CHOICE *choice; //current choice 00362 char string[2]; //char converted 00363 BLOB_CHOICE_LIST ratings; //matcher result 00364 BLOB_CHOICE_IT it; //iterator 00365 00366 blob = make_ed_blob (tessblob);//convert blob 00367 if (blob == NULL) 00368 return NULL; //can't do it 00369 pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL; 00370 nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL; 00371 (*tess_matcher) (pblob, blob, nblob, tess_word, tess_denorm, ratings); 00372 //match it 00373 delete blob; //don't need that now 00374 if (pblob != NULL) 00375 delete pblob; 00376 if (nblob != NULL) 00377 delete nblob; 00378 it.set_to_list (&ratings); //get list 00379 result = NULL; 00380 string[1] = '\0'; 00381 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { 00382 choice = it.data (); 00383 string[0] = choice->char_class (); 00384 result = append_choice (result, string, 00385 choice->rating (), choice->certainty (), 00386 choice->config ()); 00387 } 00388 return result; //converted list 00389 }
CHOICES classify_blob | ( | TBLOB * | pblob, | |
TBLOB * | blob, | |||
TBLOB * | nblob, | |||
TEXTROW * | row, | |||
int | fx, | |||
const char * | string, | |||
C_COL | color, | |||
STATE * | this_state, | |||
STATE * | best_state, | |||
INT32 | pass, | |||
INT32 | blob_index | |||
) |
Classify blob if not already recorded in the match table.
pblob | Pointer to previous TBLOB | |
blob | Pointer to TBLOB in question | |
nblob | Pointer to next TBLOB | |
row | Row of this blob | |
fx | IGNORED, number of feature extractor to use, from cc_recog() | |
string | 'rebuild', 'improve 1:', 'improve 2:', 'chop_word:', or 'pieces:' | |
color | 'Orange', 'Red', 'Yellow', 'Green', or 'White' respectively | |
this_state | FIX: still not sure | |
best_state | FIX: still not sure | |
pass | 1 or 2, how hard to try | |
blob_index | FIX: set by compare_states() and word_answer[blob_index] |
This value will also be returned to the caller.
Definition at line 97 of file wordclass.cpp.
Referenced by chop_word_main(), classify_piece(), improve_one_blob(), and rebuild_current_state().
00107 { 00108 CHOICES rating; 00109 INT32 old_index; 00110 00111 chars_classified++; /* Global value */ 00112 if (blob_skip) 00113 return (NIL); 00114 00115 #ifndef GRAPHICS_DISABLED 00116 if (display_all_blobs) 00117 display_blob(blob, color); 00118 #endif 00119 rating = get_match (blob); 00120 if (rating == NIL) { 00121 if (pass) { 00122 old_index = blob_index; 00123 //?cast to int* 00124 blob_type = compare_states (best_state, this_state, (int *) &blob_index); 00125 blob_answer = word_answer[blob_index]; 00126 if (blob_answer < '!') 00127 fprintf (matcher_fp, 00128 "Bad compare states: best state=0x%x%x, this=0x%x%x, bits=" 00129 INT32FORMAT ", index=" INT32FORMAT ", outdex=" 00130 INT32FORMAT ", word=%s\n", best_state->part1, 00131 best_state->part2, this_state->part1, this_state->part2, 00132 bits_in_states, old_index, blob_index, word_answer); 00133 } 00134 else 00135 blob_type = 0; 00136 rating = /*(*blob_matchers [fx]) */ (CHOICES) call_matcher (pblob, blob, 00137 nblob, NULL, 00138 row); 00139 put_match(blob, rating); 00140 } 00141 00142 #ifndef GRAPHICS_DISABLED 00143 if (display_ratings && string) 00144 print_choices(string, rating); 00145 00146 if (blob_pause) 00147 window_wait(blob_window); 00148 #endif 00149 00150 return (rating); 00151 }
TBLOB* newblob | ( | ) |
* (c) Copyright 1990, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
void save_answer | ( | TWERD * | word, | |
TEXTROW * | row, | |||
A_CHOICE * | best_choice, | |||
A_CHOICE * | raw_choice, | |||
int | firstpass | |||
) |
Write an answer to the output file that is the raw guess (without context) directly from the classifier.
Not actually called from anywhere in tesseract 1.02...
Definition at line 209 of file wordclass.cpp.
00213 { 00214 static TEXTROW *last_row; 00215 char raw_answer[CHARS_PER_LINE]; 00216 int answer_already; 00217 int good_answer; 00218 char *string = NULL; 00219 00220 if (best_choice) { 00221 good_answer = AcceptableResult (best_choice, raw_choice); 00222 string = class_string (best_choice); 00223 } 00224 else { 00225 good_answer = FALSE; 00226 } 00227 00228 if (firstpass) { 00229 /* First pass */ 00230 if (string) { 00231 /* Got answer */ 00232 add_document_word(best_choice); 00233 00234 word->guess = string; 00235 fix_quotes (word->guess); 00236 strcpy (raw_answer, word->guess); 00237 00238 record_certainty (class_certainty (best_choice), 1); 00239 00240 if (good_answer) { 00241 record_certainty (class_certainty (best_choice), 2); 00242 strcat (raw_answer, " "); 00243 strcat (raw_answer, class_string (raw_choice)); 00244 word->guess = strsave (raw_answer); 00245 word->guess[strlen (string)] = 0; 00246 if (string) { 00247 strfree(string); 00248 class_string (best_choice) = NULL; 00249 } 00250 } 00251 else { 00252 /* Not good enough */ 00253 if (word->guess) 00254 strfree (word->guess); 00255 word->guess = NULL; 00256 } 00257 } 00258 else { 00259 word->guess = NULL; 00260 raw_answer[0] = '\0'; 00261 } 00262 } 00263 else { 00264 /* Second pass */ 00265 answer_already = (word->guess != NULL); 00266 if (answer_already) { 00267 write_text_files (word, 00268 &word->guess[strlen (word->guess) + 1], 00269 (row != last_row), TRUE, TRUE); 00270 } 00271 else { 00272 /* Required second pass */ 00273 if (string) { 00274 if (!good_answer && tessedit_save_stats) { 00275 SaveBadWord (string, class_certainty (best_choice)); 00276 } 00277 record_certainty (class_certainty (best_choice), 2); 00278 word->guess = class_string (best_choice); 00279 fix_quotes (word->guess); 00280 write_text_files (word, class_string (raw_choice), 00281 (row != last_row), good_answer, FALSE); 00282 } 00283 } 00284 } 00285 /* Word Display */ 00286 if (display_text) { 00287 if (row != last_row) 00288 cprintf ("\n"); 00289 if (word->guess && strlen (word->guess)) 00290 cprintf ("%s ", word->guess); 00291 else 00292 cprintf ("%s ", raw_answer); 00293 fflush(stdout); 00294 } 00295 00296 last_row = row; 00297 }
void write_text_files | ( | TWERD * | word, | |
char * | raw_choice, | |||
int | same_row, | |||
int | good_word, | |||
int | firstpass | |||
) |
Write an answer to the output file that is the raw guess (without context) directly from the classifier.
Not actually called from anywhere in tesseract 1.02 because save_answer() never called in the first place...
Definition at line 162 of file wordclass.cpp.
00166 { 00167 int x; 00168 /* Raw output */ 00169 if (write_raw_output) { 00170 if (same_row) 00171 fprintf (rawfile, "\n"); 00172 if (raw_choice && strlen (raw_choice)) { 00173 fprintf (rawfile, "%s ", raw_choice); 00174 fflush(rawfile); 00175 } 00176 } 00177 /* Text file output */ 00178 if (write_output) { 00179 if (same_row) 00180 fprintf (textfile, "\n"); 00181 if (word->guess && strlen (word->guess)) { 00182 for (x = 0; x < word->blanks; x++) 00183 fprintf (textfile, " "); 00184 if (!firstpass) 00185 fprintf(textfile, BOLD_ON); 00186 if (!good_word) 00187 fprintf(textfile, UNDERLINE_ON); 00188 fprintf (textfile, "%s", word->guess); 00189 if (!good_word) 00190 fprintf(textfile, UNDERLINE_OFF); 00191 if (!firstpass) 00192 fprintf(textfile, BOLD_OFF); 00193 fflush(textfile); 00194 } 00195 } 00196 /* Global counters */ 00197 character_count += (word->guess ? strlen (word->guess) : 0); 00198 word_count++; 00199 }
Flag: classifying with first pass heuristics
Definition at line 46 of file wordclass.cpp.
Referenced by attempt_blob_chop(), chop_word_main(), improve_by_chopping(), record_search_status(), set_pass1(), and set_pass2().