#include <charsample.h>
Inheritance diagram for CHAR_SAMPLES:
See CHAR_SAMPLES::build_prototype()
Definition at line 122 of file charsample.h.
CHAR_SAMPLES::CHAR_SAMPLES | ( | ) |
Definition at line 174 of file charsample.cpp.
References best_sample, ch, NULL, samples, and UNKNOWN.
00174 { 00175 type = UNKNOWN; 00176 samples.clear (); 00177 ch = '\0'; 00178 best_sample = NULL; 00179 proto = NULL; 00180 }
CHAR_SAMPLES::CHAR_SAMPLES | ( | CHAR_SAMPLE * | sample | ) |
Definition at line 185 of file charsample.cpp.
References ASSERT_HOST, best_sample, CHAR_SAMPLE::blob(), BLOB_CLUSTER, ch, CHAR_SAMPLE::character(), CHAR_SAMPLE::image(), IMAGE_CLUSTER, NULL, and samples.
00185 { 00186 CHAR_SAMPLE_IT sample_it = &samples; 00187 00188 ASSERT_HOST (sample->image () != NULL || sample->blob () != NULL); 00189 00190 if (sample->image () != NULL) 00191 type = IMAGE_CLUSTER; 00192 else if (sample->blob () != NULL) 00193 type = BLOB_CLUSTER; 00194 00195 samples.clear (); 00196 sample_it.add_to_end (sample); 00197 if (tessedit_mm_only_match_same_char) 00198 ch = sample->character (); 00199 else 00200 ch = '\0'; 00201 best_sample = NULL; 00202 proto = NULL; 00203 }
CHAR_SAMPLES::~CHAR_SAMPLES | ( | ) | [inline] |
void CHAR_SAMPLES::add_sample | ( | CHAR_SAMPLE * | sample | ) |
Definition at line 208 of file charsample.cpp.
References IMAGE_CLUSTER, CHAR_SAMPLE::match_sample(), samples, and TRUE.
Referenced by check_wait_list(), and cluster_sample().
00208 { 00209 CHAR_SAMPLE_IT sample_it = &samples; 00210 00211 if (tessedit_use_best_sample || tessedit_cluster_debug) 00212 for (sample_it.mark_cycle_pt (); 00213 !sample_it.cycled_list (); sample_it.forward ()) { 00214 sample_it.data ()->match_sample (sample, TRUE); 00215 sample->match_sample (sample_it.data (), TRUE); 00216 } 00217 00218 sample_it.add_to_end (sample); 00219 00220 if (tessedit_mm_use_prototypes && type == IMAGE_CLUSTER) 00221 if (samples.length () == tessedit_mm_prototype_min_size) 00222 this->build_prototype (); 00223 else if (samples.length () > tessedit_mm_prototype_min_size) 00224 this->add_sample_to_prototype (sample); 00225 }
void CHAR_SAMPLES::add_sample_to_prototype | ( | CHAR_SAMPLE * | sample | ) |
Definition at line 230 of file charsample.cpp.
References FALSE, IMAGE::get_xsize(), IMAGE::get_ysize(), CHAR_SAMPLE::image(), and TRUE.
00230 { 00231 BOOL8 rebuild = FALSE; 00232 INT32 new_xsize = proto->x_size (); 00233 INT32 new_ysize = proto->y_size (); 00234 INT32 sample_xsize = sample->image ()->get_xsize (); 00235 INT32 sample_ysize = sample->image ()->get_ysize (); 00236 00237 if (sample_xsize > new_xsize) { 00238 new_xsize = sample_xsize; 00239 rebuild = TRUE; 00240 } 00241 if (sample_ysize > new_ysize) { 00242 new_ysize = sample_ysize; 00243 rebuild = TRUE; 00244 } 00245 00246 if (rebuild) 00247 proto->enlarge_prototype (new_xsize, new_ysize); 00248 00249 proto->add_sample (sample); 00250 }
void CHAR_SAMPLES::assign_to_char | ( | ) |
Definition at line 348 of file charsample.cpp.
References STATS::add(), ch, FIRST_CHAR, LAST_CHAR, STATS::pile_count(), and samples.
00348 { 00349 STATS char_frequency(FIRST_CHAR, LAST_CHAR); 00350 CHAR_SAMPLE_IT sample_it = &samples; 00351 INT32 i; 00352 INT32 max_index = 0; 00353 INT32 max_freq = 0; 00354 00355 if (samples.length () == 0 || tessedit_mm_only_match_same_char) 00356 return; 00357 00358 for (sample_it.mark_cycle_pt (); 00359 !sample_it.cycled_list (); sample_it.forward ()) 00360 char_frequency.add ((INT32) sample_it.data ()->character (), 1); 00361 00362 for (i = FIRST_CHAR; i <= LAST_CHAR; i++) 00363 if (char_frequency.pile_count (i) > max_freq) { 00364 max_index = i; 00365 max_freq = char_frequency.pile_count (i); 00366 } 00367 00368 if (samples.length () >= tessedit_cluster_min_size 00369 && max_freq > samples.length () * tessedit_cluster_accept_fraction) 00370 ch = (char) max_index; 00371 }
void CHAR_SAMPLES::build_prototype | ( | ) |
FIX: Builds prototype for character
Definition at line 256 of file charsample.cpp.
References IMAGE::get_xsize(), IMAGE::get_ysize(), CHAR_SAMPLE::image(), IMAGE_CLUSTER, and samples.
00256 { 00257 CHAR_SAMPLE_IT sample_it = &samples; 00258 CHAR_SAMPLE *sample; 00259 INT32 proto_xsize = 0; 00260 INT32 proto_ysize = 0; 00261 00262 if (type != IMAGE_CLUSTER 00263 || samples.length () < tessedit_mm_prototype_min_size) 00264 return; 00265 00266 /* Finds x_max and y_max of sample */ 00267 for (sample_it.mark_cycle_pt (); 00268 !sample_it.cycled_list (); sample_it.forward ()) { 00269 sample = sample_it.data (); 00270 if (sample->image ()->get_xsize () > proto_xsize) 00271 proto_xsize = sample->image ()->get_xsize (); 00272 if (sample->image ()->get_ysize () > proto_ysize) 00273 proto_ysize = sample->image ()->get_ysize (); 00274 } 00275 00276 proto = new CHAR_PROTO (proto_xsize, proto_ysize, 0, 0, '\0'); 00277 00278 /* Adds sample data to new prototype */ 00279 for (sample_it.mark_cycle_pt (); 00280 !sample_it.cycled_list (); sample_it.forward ()) 00281 this->add_sample_to_prototype (sample_it.data ()); 00282 }
char CHAR_SAMPLES::character | ( | ) | [inline] |
Definition at line 154 of file charsample.h.
Referenced by adapt_to_good_samples().
00154 { 00155 return ch; 00156 }
void ELIST_LINK::de_serialise_asc | ( | FILE * | f | ) | [inherited] |
Reimplemented in ICOORDELT.
Definition at line 39 of file elst.cpp.
References ABORT, ERRCODE::error(), and SERIALISE_LINKS.
00040 { 00041 SERIALISE_LINKS.error ("ELIST_LINK::de_serialise_asc", ABORT, 00042 "Don't call this, override!"); 00043 }
void CHAR_SAMPLES::find_best_sample | ( | ) |
Definition at line 287 of file charsample.cpp.
References best_sample, ch, debug_fp, MAX_INT32, CHAR_SAMPLE::print(), samples, and tprintf().
00287 { 00288 CHAR_SAMPLE_IT sample_it = &samples; 00289 double score; 00290 double best_score = MAX_INT32; 00291 00292 if (ch == '\0' || samples.length () < tessedit_mm_prototype_min_size) 00293 return; 00294 00295 for (sample_it.mark_cycle_pt (); 00296 !sample_it.cycled_list (); sample_it.forward ()) { 00297 score = sample_it.data ()->mean_score (); 00298 if (score < best_score) { 00299 best_score = score; 00300 best_sample = sample_it.data (); 00301 } 00302 } 00303 #ifndef SECURE_NAMES 00304 if (tessedit_cluster_debug) { 00305 tprintf ("Best sample for this %c cluster:\n", ch); 00306 best_sample->print (debug_fp); 00307 } 00308 #endif 00309 }
float CHAR_SAMPLES::match_score | ( | CHAR_SAMPLE * | sample | ) |
Definition at line 314 of file charsample.cpp.
References BAD_SCORE, best_sample, ch, CHAR_SAMPLE::character(), FALSE, CHAR_SAMPLE::match_sample(), nn_match_score(), and NULL.
Referenced by adapt_to_good_ems(), adapt_to_good_samples(), and check_wait_list().
00314 { 00315 if (tessedit_mm_only_match_same_char && sample->character () != ch) 00316 return BAD_SCORE; 00317 00318 if (tessedit_use_best_sample && best_sample != NULL) 00319 return best_sample->match_sample (sample, FALSE); 00320 else if ((tessedit_mm_use_prototypes 00321 || tessedit_mm_adapt_using_prototypes) && proto != NULL) 00322 return proto->match_sample (sample); 00323 else 00324 return this->nn_match_score (sample); 00325 }
INT32 CHAR_SAMPLES::n_samples | ( | ) | [inline] |
float CHAR_SAMPLES::nn_match_score | ( | CHAR_SAMPLE * | sample | ) |
Definition at line 330 of file charsample.cpp.
References FALSE, MAX_INT32, and samples.
Referenced by match_score().
00330 { 00331 CHAR_SAMPLE_IT sample_it = &samples; 00332 float score; 00333 float min_score = MAX_INT32; 00334 00335 for (sample_it.mark_cycle_pt (); 00336 !sample_it.cycled_list (); sample_it.forward ()) { 00337 score = sample_it.data ()->match_sample (sample, FALSE); 00338 if (score < min_score) 00339 min_score = score; 00340 } 00341 00342 return min_score; 00343 }
void CHAR_SAMPLES::print | ( | FILE * | f | ) |
Definition at line 376 of file charsample.cpp.
References ch, INT32FORMAT, and samples.
00376 { 00377 CHAR_SAMPLE_IT sample_it = &samples; 00378 00379 fprintf (f, "Collected " INT32FORMAT " samples\n", samples.length ()); 00380 00381 #ifndef SECURE_NAMES 00382 if (tessedit_cluster_debug) 00383 for (sample_it.mark_cycle_pt (); 00384 !sample_it.cycled_list (); sample_it.forward ()) 00385 sample_it.data ()->print (f); 00386 00387 if (ch == '\0') 00388 fprintf (f, "\nCluster not used for adaption\n"); 00389 else 00390 fprintf (f, "\nCluster used to adapt to '%c's\n", ch); 00391 #endif 00392 }
CHAR_PROTO* CHAR_SAMPLES::prototype | ( | ) | [inline] |
void ELIST_LINK::serialise_asc | ( | FILE * | f | ) | [inherited] |
Generates an error, as it should never be called.
Definition at line 32 of file elst.cpp.
References ABORT, ERRCODE::error(), and SERIALISE_LINKS.
00033 { 00034 SERIALISE_LINKS.error ("ELIST_LINK::serialise_asc", ABORT, 00035 "Don't call this, override!"); 00036 }
Definition at line 166 of file charsample.h.
Referenced by CHAR_SAMPLES(), find_best_sample(), and match_score().
NEWDELETE2 (CHAR_SAMPLES) private char CHAR_SAMPLES::ch |
Definition at line 162 of file charsample.h.
Referenced by assign_to_char(), CHAR_SAMPLES(), find_best_sample(), match_score(), and print().
Definition at line 165 of file charsample.h.
CHAR_SAMPLE_LIST CHAR_SAMPLES::samples |
Definition at line 167 of file charsample.h.
Referenced by add_sample(), assign_to_char(), build_prototype(), CHAR_SAMPLES(), find_best_sample(), nn_match_score(), and print().