CHAR_SAMPLES Class Reference

#include <charsample.h>

Inheritance diagram for CHAR_SAMPLES:

ELIST_LINK List of all members.

Detailed Description

Class manages LIST of character samples and match scores, used during adaption.

See CHAR_SAMPLES::build_prototype()

Definition at line 122 of file charsample.h.

Public Member Functions

Public Attributes


Constructor & Destructor Documentation

CHAR_SAMPLES::CHAR_SAMPLES (  ) 

Definition at line 174 of file charsample.cpp.

References best_sample, ch, NULL, samples, and UNKNOWN.

00174                            { 
00175   type = UNKNOWN;
00176   samples.clear ();
00177   ch = '\0';
00178   best_sample = NULL;
00179   proto = NULL;
00180 }

CHAR_SAMPLES::CHAR_SAMPLES ( CHAR_SAMPLE sample  ) 

Definition at line 185 of file charsample.cpp.

References ASSERT_HOST, best_sample, CHAR_SAMPLE::blob(), BLOB_CLUSTER, ch, CHAR_SAMPLE::character(), CHAR_SAMPLE::image(), IMAGE_CLUSTER, NULL, and samples.

00185                                               { 
00186   CHAR_SAMPLE_IT sample_it = &samples;
00187 
00188   ASSERT_HOST (sample->image () != NULL || sample->blob () != NULL);
00189 
00190   if (sample->image () != NULL)
00191     type = IMAGE_CLUSTER;
00192   else if (sample->blob () != NULL)
00193     type = BLOB_CLUSTER;
00194 
00195   samples.clear ();
00196   sample_it.add_to_end (sample);
00197   if (tessedit_mm_only_match_same_char)
00198     ch = sample->character ();
00199   else
00200     ch = '\0';
00201   best_sample = NULL;
00202   proto = NULL;
00203 }

CHAR_SAMPLES::~CHAR_SAMPLES (  )  [inline]

Definition at line 129 of file charsample.h.

00129                      {           //destructor
00130     }


Member Function Documentation

void CHAR_SAMPLES::add_sample ( CHAR_SAMPLE sample  ) 

Definition at line 208 of file charsample.cpp.

References IMAGE_CLUSTER, CHAR_SAMPLE::match_sample(), samples, and TRUE.

Referenced by check_wait_list(), and cluster_sample().

00208                                                  { 
00209   CHAR_SAMPLE_IT sample_it = &samples;
00210 
00211   if (tessedit_use_best_sample || tessedit_cluster_debug)
00212     for (sample_it.mark_cycle_pt ();
00213   !sample_it.cycled_list (); sample_it.forward ()) {
00214     sample_it.data ()->match_sample (sample, TRUE);
00215     sample->match_sample (sample_it.data (), TRUE);
00216   }
00217 
00218   sample_it.add_to_end (sample);
00219 
00220   if (tessedit_mm_use_prototypes && type == IMAGE_CLUSTER)
00221     if (samples.length () == tessedit_mm_prototype_min_size)
00222       this->build_prototype ();
00223   else if (samples.length () > tessedit_mm_prototype_min_size)
00224     this->add_sample_to_prototype (sample);
00225 }

void CHAR_SAMPLES::add_sample_to_prototype ( CHAR_SAMPLE sample  ) 

Definition at line 230 of file charsample.cpp.

References FALSE, IMAGE::get_xsize(), IMAGE::get_ysize(), CHAR_SAMPLE::image(), and TRUE.

00230                                                               { 
00231   BOOL8 rebuild = FALSE;
00232   INT32 new_xsize = proto->x_size ();
00233   INT32 new_ysize = proto->y_size ();
00234   INT32 sample_xsize = sample->image ()->get_xsize ();
00235   INT32 sample_ysize = sample->image ()->get_ysize ();
00236 
00237   if (sample_xsize > new_xsize) {
00238     new_xsize = sample_xsize;
00239     rebuild = TRUE;
00240   }
00241   if (sample_ysize > new_ysize) {
00242     new_ysize = sample_ysize;
00243     rebuild = TRUE;
00244   }
00245 
00246   if (rebuild)
00247     proto->enlarge_prototype (new_xsize, new_ysize);
00248 
00249   proto->add_sample (sample);
00250 }

void CHAR_SAMPLES::assign_to_char (  ) 

Definition at line 348 of file charsample.cpp.

References STATS::add(), ch, FIRST_CHAR, LAST_CHAR, STATS::pile_count(), and samples.

00348                                   { 
00349   STATS char_frequency(FIRST_CHAR, LAST_CHAR); 
00350   CHAR_SAMPLE_IT sample_it = &samples;
00351   INT32 i;
00352   INT32 max_index = 0;
00353   INT32 max_freq = 0;
00354 
00355   if (samples.length () == 0 || tessedit_mm_only_match_same_char)
00356     return;
00357 
00358   for (sample_it.mark_cycle_pt ();
00359     !sample_it.cycled_list (); sample_it.forward ())
00360   char_frequency.add ((INT32) sample_it.data ()->character (), 1);
00361 
00362   for (i = FIRST_CHAR; i <= LAST_CHAR; i++)
00363   if (char_frequency.pile_count (i) > max_freq) {
00364     max_index = i;
00365     max_freq = char_frequency.pile_count (i);
00366   }
00367 
00368   if (samples.length () >= tessedit_cluster_min_size
00369     && max_freq > samples.length () * tessedit_cluster_accept_fraction)
00370     ch = (char) max_index;
00371 }

void CHAR_SAMPLES::build_prototype (  ) 

FIX: Builds prototype for character

Definition at line 256 of file charsample.cpp.

References IMAGE::get_xsize(), IMAGE::get_ysize(), CHAR_SAMPLE::image(), IMAGE_CLUSTER, and samples.

00256                                    { 
00257   CHAR_SAMPLE_IT sample_it = &samples;
00258   CHAR_SAMPLE *sample;
00259   INT32 proto_xsize = 0;
00260   INT32 proto_ysize = 0;
00261 
00262   if (type != IMAGE_CLUSTER
00263     || samples.length () < tessedit_mm_prototype_min_size)
00264     return;
00265 
00266   /* Finds x_max and y_max of sample */
00267   for (sample_it.mark_cycle_pt ();
00268   !sample_it.cycled_list (); sample_it.forward ()) {
00269     sample = sample_it.data ();
00270     if (sample->image ()->get_xsize () > proto_xsize)
00271       proto_xsize = sample->image ()->get_xsize ();
00272     if (sample->image ()->get_ysize () > proto_ysize)
00273       proto_ysize = sample->image ()->get_ysize ();
00274   }
00275 
00276   proto = new CHAR_PROTO (proto_xsize, proto_ysize, 0, 0, '\0');
00277 
00278   /* Adds sample data to new prototype */
00279   for (sample_it.mark_cycle_pt ();
00280     !sample_it.cycled_list (); sample_it.forward ())
00281   this->add_sample_to_prototype (sample_it.data ());
00282 }

char CHAR_SAMPLES::character (  )  [inline]

Definition at line 154 of file charsample.h.

Referenced by adapt_to_good_samples().

00154                      {
00155       return ch;
00156     }

void ELIST_LINK::de_serialise_asc ( FILE *  f  )  [inherited]

Reimplemented in ICOORDELT.

Definition at line 39 of file elst.cpp.

References ABORT, ERRCODE::error(), and SERIALISE_LINKS.

00040                                            {
00041   SERIALISE_LINKS.error ("ELIST_LINK::de_serialise_asc", ABORT,
00042     "Don't call this, override!");
00043 }

void CHAR_SAMPLES::find_best_sample (  ) 

Definition at line 287 of file charsample.cpp.

References best_sample, ch, debug_fp, MAX_INT32, CHAR_SAMPLE::print(), samples, and tprintf().

00287                                     { 
00288   CHAR_SAMPLE_IT sample_it = &samples;
00289   double score;
00290   double best_score = MAX_INT32;
00291 
00292   if (ch == '\0' || samples.length () < tessedit_mm_prototype_min_size)
00293     return;
00294 
00295   for (sample_it.mark_cycle_pt ();
00296   !sample_it.cycled_list (); sample_it.forward ()) {
00297     score = sample_it.data ()->mean_score ();
00298     if (score < best_score) {
00299       best_score = score;
00300       best_sample = sample_it.data ();
00301     }
00302   }
00303   #ifndef SECURE_NAMES
00304   if (tessedit_cluster_debug) {
00305     tprintf ("Best sample for this %c cluster:\n", ch);
00306     best_sample->print (debug_fp);
00307   }
00308   #endif
00309 }

float CHAR_SAMPLES::match_score ( CHAR_SAMPLE sample  ) 

Definition at line 314 of file charsample.cpp.

References BAD_SCORE, best_sample, ch, CHAR_SAMPLE::character(), FALSE, CHAR_SAMPLE::match_sample(), nn_match_score(), and NULL.

Referenced by adapt_to_good_ems(), adapt_to_good_samples(), and check_wait_list().

00314                                                    { 
00315   if (tessedit_mm_only_match_same_char && sample->character () != ch)
00316     return BAD_SCORE;
00317 
00318   if (tessedit_use_best_sample && best_sample != NULL)
00319     return best_sample->match_sample (sample, FALSE);
00320   else if ((tessedit_mm_use_prototypes
00321     || tessedit_mm_adapt_using_prototypes) && proto != NULL)
00322     return proto->match_sample (sample);
00323   else
00324     return this->nn_match_score (sample);
00325 }

INT32 CHAR_SAMPLES::n_samples (  )  [inline]

Definition at line 132 of file charsample.h.

00132                       {
00133       return samples.length ();
00134     }

float CHAR_SAMPLES::nn_match_score ( CHAR_SAMPLE sample  ) 

Definition at line 330 of file charsample.cpp.

References FALSE, MAX_INT32, and samples.

Referenced by match_score().

00330                                                       { 
00331   CHAR_SAMPLE_IT sample_it = &samples;
00332   float score;
00333   float min_score = MAX_INT32;
00334 
00335   for (sample_it.mark_cycle_pt ();
00336   !sample_it.cycled_list (); sample_it.forward ()) {
00337     score = sample_it.data ()->match_sample (sample, FALSE);
00338     if (score < min_score)
00339       min_score = score;
00340   }
00341 
00342   return min_score;
00343 }

void CHAR_SAMPLES::print ( FILE *  f  ) 

Definition at line 376 of file charsample.cpp.

References ch, INT32FORMAT, and samples.

00376                                 { 
00377   CHAR_SAMPLE_IT sample_it = &samples;
00378 
00379   fprintf (f, "Collected " INT32FORMAT " samples\n", samples.length ());
00380 
00381   #ifndef SECURE_NAMES
00382   if (tessedit_cluster_debug)
00383     for (sample_it.mark_cycle_pt ();
00384     !sample_it.cycled_list (); sample_it.forward ())
00385   sample_it.data ()->print (f);
00386 
00387   if (ch == '\0')
00388     fprintf (f, "\nCluster not used for adaption\n");
00389   else
00390     fprintf (f, "\nCluster used to adapt to '%c's\n", ch);
00391   #endif
00392 }

CHAR_PROTO* CHAR_SAMPLES::prototype (  )  [inline]

Definition at line 144 of file charsample.h.

00144                             {
00145       return proto;
00146     }

void CHAR_SAMPLES::rebuild_prototype ( INT32  new_xsize,
INT32  new_ysize 
)

void ELIST_LINK::serialise_asc ( FILE *  f  )  [inherited]

Generates an error, as it should never be called.

Definition at line 32 of file elst.cpp.

References ABORT, ERRCODE::error(), and SERIALISE_LINKS.

00033                                         {
00034   SERIALISE_LINKS.error ("ELIST_LINK::serialise_asc", ABORT,
00035     "Don't call this, override!");
00036 }


Member Data Documentation

CHAR_SAMPLE* CHAR_SAMPLES::best_sample

Definition at line 166 of file charsample.h.

Referenced by CHAR_SAMPLES(), find_best_sample(), and match_score().

NEWDELETE2 (CHAR_SAMPLES) private char CHAR_SAMPLES::ch

Definition at line 162 of file charsample.h.

Referenced by assign_to_char(), CHAR_SAMPLES(), find_best_sample(), match_score(), and print().

CHAR_PROTO* CHAR_SAMPLES::proto

Definition at line 165 of file charsample.h.

CHAR_SAMPLE_LIST CHAR_SAMPLES::samples

Definition at line 167 of file charsample.h.

Referenced by add_sample(), assign_to_char(), build_prototype(), CHAR_SAMPLES(), find_best_sample(), nn_match_score(), and print().


The documentation for this class was generated from the following files:
Generated on Wed Feb 28 19:49:30 2007 for Tesseract by  doxygen 1.5.1