textord/makerow.cpp

Go to the documentation of this file.
00001 
00020 #include          "mfcpch.h"
00021 #ifdef __UNIX__
00022 #include          <assert.h>
00023 #endif
00024 #include          "stderr.h"
00025 #include          "blobbox.h"
00026 #include          "lmedsq.h"
00027 #include          "statistc.h"
00028 #include          "drawtord.h"
00029 #include          "blkocc.h"
00030 #ifdef TEXT_VERBOSE
00031 #include          "../cutil/callcpp.h"
00032 #endif
00033 #include          "sortflts.h"
00034 #include          "oldbasel.h"
00035 #include          "tordmain.h"
00036 #include          "underlin.h"
00037 #include          "makerow.h"
00038 #include          "tprintf.h"
00039 
00040 #define EXTERN
00041 
00044 EXTERN BOOL_VAR (textord_heavy_nr, FALSE, "Vigorously remove noise");
00045 EXTERN BOOL_VAR (textord_show_initial_rows, FALSE,
00046 "Display row accumulation");
00047 EXTERN BOOL_VAR (textord_show_parallel_rows, FALSE,
00048 "Display page correlated rows");
00049 EXTERN BOOL_VAR (textord_show_expanded_rows, FALSE,
00050 "Display rows after expanding");
00051 EXTERN BOOL_VAR (textord_show_final_rows, FALSE,
00052 "Display rows after final fittin");
00053 EXTERN BOOL_VAR (textord_show_final_blobs, FALSE,
00054 "Display blob bounds after pre-ass");
00055 EXTERN BOOL_VAR (textord_test_landscape, FALSE, "Tests refer to land/port");
00056 EXTERN BOOL_VAR (textord_parallel_baselines, TRUE,
00057 "Force parallel baselines");
00058 EXTERN BOOL_VAR (textord_straight_baselines, FALSE,
00059 "Force straight baselines");
00060 EXTERN BOOL_VAR (textord_quadratic_baselines, FALSE, "Use quadratic splines");
00061 EXTERN BOOL_VAR (textord_old_baselines, TRUE, "Use old baseline algorithm");
00062 EXTERN BOOL_VAR (textord_old_xheight, TRUE, "Use old xheight algorithm");
00063 EXTERN BOOL_VAR (textord_fix_xheight_bug, TRUE, "Use spline baseline");
00064 EXTERN BOOL_VAR (textord_fix_makerow_bug, TRUE, "Prevent multiple baselines");
00065 EXTERN BOOL_VAR (textord_row_xheights, FALSE, "Use row height policy");
00066 EXTERN BOOL_VAR (textord_block_xheights, TRUE, "Use block height policy");
00067 EXTERN BOOL_VAR (textord_xheight_tweak, FALSE, "New min condition on height");
00068 EXTERN BOOL_VAR (textord_cblob_blockocc, TRUE,
00069 "Use new projection for underlines");
00070 EXTERN BOOL_VAR (textord_debug_xheights, FALSE, "Test xheight algorithms");
00071 EXTERN BOOL_VAR (textord_biased_skewcalc, TRUE,
00072 "Bias skew estimates with line length");
00073 EXTERN BOOL_VAR (textord_interpolating_skew, TRUE, "Interpolate across gaps");
00074 EXTERN INT_VAR (textord_skewsmooth_offset, 2, "For smooth factor");
00075 EXTERN INT_VAR (textord_test_x, 0, "coord of test pt");
00076 EXTERN INT_VAR (textord_test_y, 0, "coord of test pt");
00077 EXTERN INT_VAR (textord_min_blobs_in_row, 4,
00078 "Min blobs before gradient counted");
00079 EXTERN INT_VAR (textord_spline_minblobs, 8,
00080 "Min blobs in each spline segment");
00081 EXTERN INT_VAR (textord_spline_medianwin, 6,
00082 "Size of window for spline segmentation");
00083 EXTERN INT_VAR (textord_min_xheight, 10, "Min credible pixel xheight");
00084 EXTERN double_VAR (textord_spline_shift_fraction, 0.02,
00085 "Fraction of line spacing for quad");
00086 EXTERN double_VAR (textord_spline_outlier_fraction, 0.1,
00087 "Fraction of line spacing for outlier");
00088 EXTERN double_VAR (textord_skew_ile, 0.5, "Ile of gradients for page skew");
00089 EXTERN double_VAR (textord_skew_lag, 0.01,
00090 "Lag for skew on row accumulation");
00091 EXTERN double_VAR (textord_linespace_iqrlimit, 0.2,
00092 "Max iqr/median for linespace");
00093 EXTERN double_VAR (textord_width_limit, 8, "Max width of blobs to make rows");
00094 EXTERN double_VAR (textord_chop_width, 1.5, "Max width before chopping");
00095 EXTERN double_VAR (textord_merge_desc, 0.25,
00096 "Fraction of linespace for desc drop");
00097 EXTERN double_VAR (textord_merge_x, 0.5,
00098 "Fraction of linespace for x height");
00099 EXTERN double_VAR (textord_merge_asc, 0.25,
00100 "Fraction of linespace for asc height");
00101 EXTERN double_VAR (textord_minxh, 0.25,
00102 "fraction of linesize for min xheight");
00103 EXTERN double_VAR (textord_min_linesize, 1.25,
00104 "* blob height for initial linesize");
00105 EXTERN double_VAR (textord_excess_blobsize, 1.3,
00106 "New row made if blob makes row this big");
00107 EXTERN double_VAR (textord_occupancy_threshold, 0.4,
00108 "Fraction of neighbourhood");
00109 EXTERN double_VAR (textord_underline_width, 2.0,
00110 "Multiple of line_size for underline");
00111 EXTERN double_VAR (textord_xheight_mode_fraction, 0.4,
00112 "Min pile height to make xheight");
00113 EXTERN double_VAR (textord_ascheight_mode_fraction, 0.15,
00114 "Min pile height to make ascheight");
00115 EXTERN double_VAR (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
00116 EXTERN double_VAR (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
00117 EXTERN double_VAR (textord_descx_ratio_min, 0.15, "Min desc/xheight");
00118 EXTERN double_VAR (textord_descx_ratio_max, 0.6, "Max desc/xheight");
00119 EXTERN double_VAR (textord_xheight_error_margin, 0.1, "Accepted variation");
00123 #define MAX_HEIGHT_MODES  12
00124 
00130 float make_rows(                             //make rows
00131                 ICOORD page_tr,              //top right
00132                 BLOCK_LIST *blocks,          //block list
00133                 TO_BLOCK_LIST *land_blocks,  //rotated for landscape
00134                 TO_BLOCK_LIST *port_blocks   //output list
00135                ) {
00136   float port_m;                  //global skew
00137   float port_err;                //global noise
00138   //float land_m;                  //global skew
00139   //float land_err;                //global noise
00140   TO_BLOCK_IT block_it;          //iterator
00141 
00142   //don't do landscape for now
00143   //  block_it.set_to_list(land_blocks);
00144   //  for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward())
00145   //     make_initial_textrows(page_tr,block_it.data(),FCOORD(0,-1),
00146   //        (BOOL8)textord_test_landscape);
00147 
00148 #ifdef TEXT_VERBOSE
00149   // gets a 'r', see ccmain/tesseractmain.dox
00150   cprintf("r\n");
00151 #endif
00152   block_it.set_to_list (port_blocks);
00153   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00154     block_it.forward ())
00155   make_initial_textrows (page_tr, block_it.data (), FCOORD (1.0f, 0.0f),
00156       !(BOOL8) textord_test_landscape);
00157                                  //compute globally
00158   compute_page_skew(port_blocks, port_m, port_err); 
00159   //  compute_page_skew(land_blocks,land_m,land_err);  //compute globally
00160   //  tprintf("Portrait skew gradient=%g, error=%g.\n",
00161   //          port_m,port_err);
00162   //  tprintf("Landscape skew gradient=%g, error=%g.\n",
00163   //          land_m,land_err);
00164   block_it.set_to_list (port_blocks);
00165   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00166   block_it.forward ()) {
00167     cleanup_rows (page_tr, block_it.data (), port_m, FCOORD (1.0f, 0.0f),
00168       block_it.data ()->block->bounding_box ().left (),
00169       !(BOOL8) textord_test_landscape);
00170   }
00171   block_it.set_to_list (land_blocks);
00172   //  for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward())
00173   //  {
00174   //     cleanup_rows(page_tr,block_it.data(),land_m,FCOORD(0,-1),
00175   //                  -block_it.data()->block->bounding_box().top(),
00176   //                  (BOOL8)textord_test_landscape);
00177   //  }
00178   return port_m;                 //global skew
00179 }
00180 
00181 
00187 void make_initial_textrows(                  //find lines
00188                            ICOORD page_tr,
00189                            TO_BLOCK *block,  //block to do
00190                            FCOORD rotation,  //for drawing
00191                            BOOL8 testing_on  //correct orientation
00192                           ) {
00193   TO_ROW_IT row_it = block->get_rows ();
00194 
00195 #ifndef GRAPHICS_DISABLED
00196   COLOUR colour;                 //of row
00197 
00198   if (textord_show_initial_rows && testing_on) {
00199     if (to_win == NO_WINDOW)
00200       create_to_win(page_tr);
00201   }
00202 #endif
00203                                  //guess skew
00204   assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
00205   row_it.move_to_first ();
00206   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00207     fit_lms_line (row_it.data ());
00208 #ifndef GRAPHICS_DISABLED
00209   if (textord_show_initial_rows && testing_on) {
00210     colour = RED;
00211     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00212       plot_to_row (row_it.data (), colour, rotation);
00213       colour = (COLOUR) (colour + 1);
00214       if (colour > MAGENTA)
00215         colour = RED;
00216     }
00217   }
00218 #endif
00219 }
00220 
00221 
00227 void fit_lms_line(             //sort function
00228                   TO_ROW *row  //row to fit
00229                  ) {
00230   float m, c;                    //fitted line
00231   BOX box;                       //blob box
00232   LMS lms (row->blob_list ()->length ());
00233                                  //blobs
00234   BLOBNBOX_IT blob_it = row->blob_list ();
00235 
00236   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00237     box = blob_it.data ()->bounding_box ();
00238     lms.add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ()));
00239   }
00240   lms.fit (m, c);
00241   row->set_line (m, c, lms.error ());
00242 }
00243 
00244 
00251 void compute_page_skew(
00252                        TO_BLOCK_LIST *blocks,  //list of blocks
00253                        float &page_m,          //average gradient
00254                        float &page_err         //average error
00255                       ) {
00256   INT32 row_count;               //total rows
00257   INT32 blob_count;              //total_blobs
00258   INT32 row_err;                 //integer error
00259   float *gradients;              //of rows
00260   float *errors;                 //of rows
00261   INT32 row_index;               //of total
00262   TO_ROW *row;                   //current row
00263   TO_BLOCK_IT block_it = blocks; //iterator
00264   TO_ROW_IT row_it;
00265 
00266   row_count = 0;
00267   blob_count = 0;
00268 
00269 #ifdef TEXT_VERBOSE
00270   // gets a 'q', see ccmain/tesseractmain.dox
00271   cprintf("q");
00272 #endif
00273   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00274   block_it.forward ()) {
00275     row_count += block_it.data ()->get_rows ()->length ();
00276     //count up rows
00277     row_it.set_to_list (block_it.data ()->get_rows ());
00278     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00279       blob_count += row_it.data ()->blob_list ()->length ();
00280   }
00281   if (row_count == 0) {
00282     page_m = 0.0f;
00283     page_err = 0.0f;
00284     return;
00285   }
00286   gradients = (float *) alloc_mem (blob_count * sizeof (float));
00287   //get mem
00288   errors = (float *) alloc_mem (blob_count * sizeof (float));
00289   if (gradients == NULL || errors == NULL)
00290     MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
00291 
00292   row_index = 0;
00293   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00294   block_it.forward ()) {
00295     row_it.set_to_list (block_it.data ()->get_rows ());
00296     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00297       row = row_it.data ();
00298       blob_count = row->blob_list ()->length ();
00299       row_err = (INT32) ceil (row->line_error ());
00300       if (row_err <= 0)
00301         row_err = 1;
00302       if (textord_biased_skewcalc) {
00303         blob_count /= row_err;
00304         for (blob_count /= row_err; blob_count > 0; blob_count--) {
00305           gradients[row_index] = row->line_m ();
00306           errors[row_index] = row->line_error ();
00307           row_index++;
00308         }
00309       }
00310       else if (blob_count >= textord_min_blobs_in_row) {
00311                                  //get gradient
00312         gradients[row_index] = row->line_m ();
00313         errors[row_index] = row->line_error ();
00314         row_index++;
00315       }
00316     }
00317   }
00318   if (row_index == 0) {
00319                                  //desperate
00320     for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00321     block_it.forward ()) {
00322       row_it.set_to_list (block_it.data ()->get_rows ());
00323       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00324       row_it.forward ()) {
00325         row = row_it.data ();
00326         gradients[row_index] = row->line_m ();
00327         errors[row_index] = row->line_error ();
00328         row_index++;
00329       }
00330     }
00331   }
00332   row_count = row_index;
00333   row_index = choose_nth_item ((INT32) (row_count * textord_skew_ile),
00334     gradients, row_count);
00335   page_m = gradients[row_index];
00336   row_index = choose_nth_item ((INT32) (row_count * textord_skew_ile),
00337     errors, row_count);
00338   page_err = errors[row_index];
00339   free_mem(gradients);
00340   free_mem(errors);
00341 }
00342 
00344 const double kNoiseSize = 0.5;
00346 const int kMinSize = 8;
00347 
00360 static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) {
00361   const BOX& ibox = i->bounding_box();
00362   const BOX& dotbox = dot->bounding_box();
00363 
00364   // Must overlap horizontally by enough and be high enough.
00365   int overlap = MIN(dotbox.right(), ibox.right()) -
00366                 MAX(dotbox.left(), ibox.left());
00367   if (ibox.height() <= 2 * dotbox.height() ||
00368       (overlap * 2 < ibox.width() && overlap < dotbox.width()))
00369     return false;
00370 
00371   // The i or ! must be tall and thin to be good.
00372   if (ibox.height() > ibox.width() * 2)
00373     return true;
00374 
00375   // It might still be tall and thin, but it might be joined to something.
00376   // So search the outline for a piece of large height close to the edges
00377   // of the dot.
00378   const double kHeightFraction = 0.6;
00379   double target_height = MIN(dotbox.bottom(), ibox.top());
00380   target_height -= row->line_m()*dotbox.left() + row->line_c();
00381   target_height *= kHeightFraction;
00382   int left_min = dotbox.left() - dotbox.width();
00383   int middle = (dotbox.left() + dotbox.right())/2;
00384   int right_max = dotbox.right() + dotbox.width();
00385   int left_miny = 0;
00386   int left_maxy = 0;
00387   int right_miny = 0;
00388   int right_maxy = 0;
00389   bool found_left = false;
00390   bool found_right = false;
00391   bool in_left = false;
00392   bool in_right = false;
00393   C_BLOB* blob = i->cblob();
00394   C_OUTLINE_IT o_it = blob->out_list();
00395   for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) {
00396     C_OUTLINE* outline = o_it.data();
00397     int length = outline->pathlength();
00398     ICOORD pos = outline->start_pos();
00399     for (int step = 0; step < length; pos += outline->step(step++)) {
00400       int x = pos.x();
00401       int y = pos.y();
00402       if (x >= left_min && x < middle && !found_left) {
00403         // We are in the left part so find min and max y.
00404         if (in_left) {
00405           if (y > left_maxy) left_maxy = y;
00406           if (y < left_miny) left_miny = y;
00407         } else {
00408           left_maxy = left_miny = y;
00409           in_left = true;
00410         }
00411       } else if (in_left) {
00412         // We just left the left so look for size.
00413         if (left_maxy - left_miny > target_height) {
00414           if (found_right)
00415             return true;
00416           found_left = true;
00417         }
00418         in_left = false;
00419       }
00420       if (x <= right_max && x > middle && !found_right) {
00421         // We are in the right part so find min and max y.
00422         if (in_right) {
00423           if (y > right_maxy) right_maxy = y;
00424           if (y < right_miny) right_miny = y;
00425         } else {
00426           right_maxy = right_miny = y;
00427           in_right = true;
00428         }
00429       } else if (in_right) {
00430         // We just left the right so look for size.
00431         if (right_maxy - right_miny > target_height) {
00432           if (found_left)
00433             return true;
00434           found_right = true;
00435         }
00436         in_right = false;
00437       }
00438     }
00439   }
00440   return false;
00441 }
00442 
00453 static void vigorous_noise_removal(TO_BLOCK* block) {
00454   TO_ROW_IT row_it = block->get_rows ();
00455   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00456     TO_ROW* row = row_it.data();
00457     BLOBNBOX_IT b_it = row->blob_list();
00458     // Estimate the xheight on the row.
00459     int max_height = 0;
00460     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00461       BLOBNBOX* blob = b_it.data();
00462       if (blob->bounding_box().height() > max_height)
00463         max_height = blob->bounding_box().height();
00464     }
00465     STATS hstats(0, max_height + 1);
00466     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00467       BLOBNBOX* blob = b_it.data();
00468       int height = blob->bounding_box().height();
00469       if (height >= kMinSize)
00470         hstats.add(blob->bounding_box().height(), 1);
00471     }
00472     float xheight = hstats.median();
00473     // Delete small objects.
00474     BLOBNBOX* prev = NULL;
00475     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00476       BLOBNBOX* blob = b_it.data();
00477       const BOX& box = blob->bounding_box();
00478       if (box.height() < kNoiseSize * xheight) {
00479         // Small so delete unless it looks like an i dot.
00480         if (prev != NULL) {
00481           if (dot_of_i(blob, prev, row))
00482             continue;  // Looks OK.
00483         }
00484         if (!b_it.at_last()) {
00485           BLOBNBOX* next = b_it.data_relative(1);
00486           if (dot_of_i(blob, next, row))
00487             continue;  // Looks OK.
00488         }
00489         // It might be noise so get rid of it.
00490         if (blob->blob() != NULL)
00491           delete blob->blob();
00492         if (blob->cblob() != NULL)
00493           delete blob->cblob();
00494         delete b_it.extract();
00495       } else {
00496         prev = blob;
00497       }
00498     }
00499   }
00500 }
00501 
00507 void cleanup_rows(
00508                   ICOORD page_tr,    //top right
00509                   TO_BLOCK *block,   //block to do
00510                   float gradient,    //gradient to fit
00511                   FCOORD rotation,   //for drawing
00512                   INT32 block_edge,  //edge of block
00513                   BOOL8 testing_on   //correct orientation
00514                  ) {
00515                                  //iterators
00516   BLOBNBOX_IT blob_it = &block->blobs;
00517   TO_ROW_IT row_it = block->get_rows ();
00518 
00519 #ifndef GRAPHICS_DISABLED
00520   if (textord_show_parallel_rows && testing_on) {
00521     if (to_win == NO_WINDOW)
00522       create_to_win(page_tr);
00523   }
00524 #endif
00525                                  //get row coords
00526   fit_parallel_rows(block,
00527                     gradient,
00528                     rotation,
00529                     block_edge,
00530                     textord_show_parallel_rows &&testing_on);
00531   delete_non_dropout_rows(block,
00532                           gradient,
00533                           rotation,
00534                           block_edge,
00535                           textord_show_parallel_rows &&testing_on);
00536   expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
00537   blob_it.set_to_list (&block->blobs);
00538   row_it.set_to_list (block->get_rows ());
00539   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00540     blob_it.add_list_after (row_it.data ()->blob_list ());
00541   //give blobs back
00542   assign_blobs_to_rows (block, &gradient, 1, TRUE, TRUE, FALSE);
00543   //now new rows must be genuine
00544   blob_it.set_to_list (&block->blobs);
00545   blob_it.add_list_after (&block->large_blobs);
00546   assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
00547   //safe to use big ones now
00548   blob_it.set_to_list (&block->blobs);
00549                                  //throw all blobs in
00550   blob_it.add_list_after (&block->noise_blobs);
00551   blob_it.add_list_after (&block->small_blobs);
00552   assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
00553   //no rows for noise
00554   row_it.set_to_list (block->get_rows ());
00555   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00556     row_it.data ()->blob_list ()->sort (blob_x_order);
00557   fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);
00558   // if () new in v1.03
00559   if (textord_heavy_nr) {
00560     vigorous_noise_removal(block);
00561   }
00562   separate_underlines(block, gradient, rotation, testing_on);
00563   pre_associate_blobs(page_tr, block, rotation, testing_on);
00564 
00565 #ifndef GRAPHICS_DISABLED
00566   if (textord_show_final_rows && testing_on) {
00567     if (to_win == NO_WINDOW)
00568       create_to_win(page_tr);
00569   }
00570 #endif
00571 
00572   fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);
00573   //              textord_show_final_rows && testing_on);
00574   make_spline_rows(block,
00575                    gradient,
00576                    rotation,
00577                    block_edge,
00578                    textord_show_final_rows &&testing_on);
00579   if (!textord_old_xheight || !textord_old_baselines)
00580     compute_block_xheight(block, gradient);
00581   if (textord_restore_underlines)
00582                                  //fix underlines
00583     restore_underlined_blobs(block);
00584 #ifndef GRAPHICS_DISABLED
00585   if (textord_show_final_rows && testing_on) {
00586     plot_blob_list (to_win, &block->blobs, MAGENTA, WHITE);
00587     //show discarded blobs
00588     plot_blob_list (to_win, &block->underlines, YELLOW, CORAL);
00589   }
00590   if (textord_show_final_rows && testing_on && block->blobs.length () > 0)
00591     tprintf ("%d blobs discarded as noise\n", block->blobs.length ());
00592   if (textord_show_final_rows && testing_on) {
00593     draw_meanlines(block, gradient, block_edge, WHITE, rotation);
00594   }
00595 #endif
00596 }
00597 
00598 
00604 void delete_non_dropout_rows(                   //find lines
00605                              TO_BLOCK *block,   //block to do
00606                              float gradient,    //global skew
00607                              FCOORD rotation,   //deskew vector
00608                              INT32 block_edge,  //left edge
00609                              BOOL8 testing_on   //correct orientation
00610                             ) {
00611   BOX block_box;                 //deskewed block
00612   INT32 *deltas;                 //change in occupation
00613   INT32 *occupation;             //of pixel coords
00614   INT32 max_y;                   //in block
00615   INT32 min_y;
00616   INT32 line_index;              //of scan line
00617   INT32 line_count;              //no of scan lines
00618   INT32 distance;                //to drop-out
00619   INT32 xleft;                   //of block
00620   INT32 ybottom;                 //of block
00621   TO_ROW *row;                   //current row
00622   TO_ROW_IT row_it = block->get_rows ();
00623   BLOBNBOX_IT blob_it = &block->blobs;
00624 
00625 #ifdef TEXT_VERBOSE
00626   // gets a 'n', see ccmain/tesseractmain.dox
00627   cprintf("n");
00628 #endif
00629   if (row_it.length () == 0)
00630     return;                      //empty block
00631   block_box = deskew_block_coords (block, gradient);
00632   xleft = block->block->bounding_box ().left ();
00633   ybottom = block->block->bounding_box ().bottom ();
00634   min_y = block_box.bottom () - 1;
00635   max_y = block_box.top () + 1;
00636   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00637     line_index = (INT32) floor (row_it.data ()->intercept ());
00638     if (line_index <= min_y)
00639       min_y = line_index - 1;
00640     if (line_index >= max_y)
00641       max_y = line_index + 1;
00642   }
00643   line_count = max_y - min_y + 1;
00644   if (line_count <= 0)
00645     return;                      //empty block
00646   deltas = (INT32 *) alloc_mem (line_count * sizeof (INT32));
00647   occupation = (INT32 *) alloc_mem (line_count * sizeof (INT32));
00648   if (deltas == NULL || occupation == NULL)
00649     MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
00650 
00651   compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
00652   compute_occupation_threshold ((INT32)
00653     ceil (block->line_spacing *
00654     (textord_merge_desc +
00655     textord_merge_asc)),
00656     (INT32) ceil (block->line_spacing *
00657     (textord_merge_x +
00658     textord_merge_asc)),
00659     max_y - min_y + 1, occupation, deltas);
00660 #ifndef GRAPHICS_DISABLED
00661   if (testing_on) {
00662     draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
00663   }
00664 #endif
00665   compute_dropout_distances(occupation, deltas, line_count);
00666   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00667     row = row_it.data ();
00668     line_index = (INT32) floor (row->intercept ());
00669     distance = deltas[line_index - min_y];
00670     if (find_best_dropout_row (row, distance, block->line_spacing / 2,
00671     line_index, &row_it, testing_on)) {
00672 #ifndef GRAPHICS_DISABLED
00673       if (testing_on)
00674         plot_parallel_row(row, gradient, block_edge, WHITE, rotation);
00675 #endif
00676       blob_it.add_list_after (row_it.data ()->blob_list ());
00677       delete row_it.extract ();  //too far away
00678     }
00679   }
00680   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00681     blob_it.add_list_after (row_it.data ()->blob_list ());
00682   }
00683 
00684   free_mem(deltas);
00685   free_mem(occupation);
00686 }
00687 
00688 
00697 BOOL8 find_best_dropout_row(                    //find neighbours
00698                             TO_ROW *row,        //row to test
00699                             INT32 distance,     //dropout dist
00700                             float dist_limit,   //threshold distance
00701                             INT32 line_index,   //index of row
00702                             TO_ROW_IT *row_it,  //current position
00703                             BOOL8 testing_on    //correct orientation
00704                            ) {
00705   INT32 next_index;              //of neigbouring row
00706   INT32 row_offset;              //from current row
00707   INT32 abs_dist;                //absolute distance
00708   INT8 row_inc;                  //increment to row_index
00709   TO_ROW *next_row;              //nextious row
00710 
00711   if (testing_on)
00712     tprintf ("Row at %g(%g), dropout dist=%d,",
00713       row->intercept (), row->parallel_c (), distance);
00714   if (distance < 0) {
00715     row_inc = 1;
00716     abs_dist = -distance;
00717   }
00718   else {
00719     row_inc = -1;
00720     abs_dist = distance;
00721   }
00722   if (abs_dist > dist_limit) {
00723     if (testing_on) {
00724       tprintf (" too far - deleting\n");
00725     }
00726     return TRUE;
00727   }
00728   if (distance < 0 && !row_it->at_last ()
00729   || distance >= 0 && !row_it->at_first ()) {
00730     row_offset = row_inc;
00731     do {
00732       next_row = row_it->data_relative (row_offset);
00733       next_index = (INT32) floor (next_row->intercept ());
00734       if (distance < 0
00735         && next_index < line_index
00736         && next_index > line_index + distance + distance
00737         || distance >= 0
00738         && next_index > line_index
00739       && next_index < line_index + distance + distance) {
00740         if (testing_on) {
00741           tprintf (" nearer neighbour (%d) at %g\n",
00742             line_index + distance - next_index,
00743             next_row->intercept ());
00744         }
00745         return TRUE;             //other is nearer
00746       }
00747       else if (next_index == line_index
00748       || next_index == line_index + distance + distance) {
00749         if (row->believability () <= next_row->believability ()) {
00750           if (testing_on) {
00751             tprintf (" equal but more believable at %g (%g/%g)\n",
00752               next_row->intercept (),
00753               row->believability (),
00754               next_row->believability ());
00755           }
00756           return TRUE;           //other is more believable
00757         }
00758       }
00759       row_offset += row_inc;
00760     }
00761     while ((next_index == line_index
00762       || next_index == line_index + distance + distance)
00763       && row_offset < row_it->length ());
00764     if (testing_on)
00765       tprintf (" keeping\n");
00766   }
00767   return FALSE;
00768 }
00769 
00770 
00777 BOX deskew_block_coords(                  //block box
00778                         TO_BLOCK *block,  //block to do
00779                         float gradient    //global skew
00780                        ) {
00781   BOX result;                    //block bounds
00782   BOX blob_box;                  //of block
00783   FCOORD rotation;               //deskew vector
00784   float length;                  //of gradient vector
00785   TO_ROW_IT row_it = block->get_rows ();
00786   TO_ROW *row;                   //current row
00787   BLOBNBOX *blob;                //current blob
00788   BLOBNBOX_IT blob_it;           //iterator
00789 
00790   length = sqrt (gradient * gradient + 1);
00791   rotation = FCOORD (1 / length, -gradient / length);
00792   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00793     row = row_it.data ();
00794     blob_it.set_to_list (row->blob_list ());
00795     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
00796     blob_it.forward ()) {
00797       blob = blob_it.data ();
00798       blob_box = blob->bounding_box ();
00799       blob_box.rotate (rotation);//de-skew it
00800       result += blob_box;
00801     }
00802   }
00803   return result;
00804 }
00805 
00806 
00813 void compute_line_occupation(
00814                              TO_BLOCK *block,    //block to do
00815                              float gradient,     //global skew
00816                              INT32 min_y,        //min coord in block
00817                              INT32 max_y,        //in block
00818                              INT32 *occupation,  //output projection
00819                              INT32 *deltas       //derivative
00820                             ) {
00821   INT32 line_count;              //maxy-miny+1
00822   INT32 line_index;              //of scan line
00823   float top, bottom;             //coords of blob
00824   INT32 width;                   //of blob
00825   INT32 idx;                     //for -O3 bug on some platforms
00826   TO_ROW *row;                   //current row
00827   TO_ROW_IT row_it = block->get_rows ();
00828   BLOBNBOX *blob;                //current blob
00829   BLOBNBOX_IT blob_it;           //iterator
00830   float length;                  //of skew vector
00831   BOX blob_box;                  //bounding box
00832   FCOORD rotation;               //inverse of skew
00833 
00834   line_count = max_y - min_y + 1;
00835   length = sqrt (gradient * gradient + 1);
00836   rotation = FCOORD (1 / length, -gradient / length);
00837   for (line_index = 0; line_index < line_count; line_index++)
00838     deltas[line_index] = 0;
00839   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00840     row = row_it.data ();
00841     blob_it.set_to_list (row->blob_list ());
00842     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
00843     blob_it.forward ()) {
00844       blob = blob_it.data ();
00845       blob_box = blob->bounding_box ();
00846       blob_box.rotate (rotation);//de-skew it
00847       top = blob_box.top ();
00848       bottom = blob_box.bottom ();
00849       width =
00850         (INT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
00851       if ((INT32) floor (bottom) < min_y
00852         || (INT32) floor (bottom) - min_y >= line_count)
00853         fprintf (stderr,
00854           "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
00855           INT32FORMAT ")\n", (INT32) floor (bottom), min_y, max_y);
00856                                  //count transitions
00857       //for -O3 bug on some platforms
00858       //deltas[(INT32) floor (bottom) - min_y] += width;
00859       idx = (INT32) floor (bottom) - min_y;
00860       deltas[idx] += width;
00861       if ((INT32) floor (top) < min_y
00862         || (INT32) floor (top) - min_y >= line_count)
00863         fprintf (stderr,
00864           "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
00865           INT32FORMAT ")\n", (INT32) floor (top), min_y, max_y);
00866       //deltas[(INT32) floor (top) - min_y] -= width;
00867       idx = (INT32) floor (top) - min_y;
00868       deltas[idx] -= width;
00869     }
00870   }
00871   occupation[0] = deltas[0];
00872   for (line_index = 1; line_index < line_count; line_index++)
00873     occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
00874 }
00875 
00876 
00882 void compute_occupation_threshold(
00883                                   INT32 low_window,   //below result point
00884                                   INT32 high_window,  //above result point
00885                                   INT32 line_count,   //array sizes
00886                                   INT32 *occupation,  //input projection
00887                                   INT32 *thresholds   //output thresholds
00888                                  ) {
00889   INT32 line_index;              //of thresholds line
00890   INT32 low_index;               //in occupation
00891   INT32 high_index;              //in occupation
00892   INT32 sum;                     //current average
00893   INT32 divisor;                 //to get thresholds
00894   INT32 min_index;               //of min occ
00895   INT32 min_occ;                 //min in locality
00896   INT32 test_index;              //for finding min
00897 
00898   divisor =
00899     (INT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
00900   if (low_window + high_window < line_count) {
00901     for (sum = 0, high_index = 0; high_index < low_window; high_index++)
00902       sum += occupation[high_index];
00903     for (low_index = 0; low_index < high_window; low_index++, high_index++)
00904       sum += occupation[high_index];
00905     min_occ = occupation[0];
00906     min_index = 0;
00907     for (test_index = 1; test_index < high_index; test_index++) {
00908       if (occupation[test_index] <= min_occ) {
00909         min_occ = occupation[test_index];
00910         min_index = test_index;  //find min in region
00911       }
00912     }
00913     for (line_index = 0; line_index < low_window; line_index++)
00914       thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
00915     //same out to end
00916     for (low_index = 0; high_index < line_count; low_index++, high_index++) {
00917       sum -= occupation[low_index];
00918       sum += occupation[high_index];
00919       if (occupation[high_index] <= min_occ) {
00920                                  //find min in region
00921         min_occ = occupation[high_index];
00922         min_index = high_index;
00923       }
00924                                  //lost min from region
00925       if (min_index <= low_index) {
00926         min_occ = occupation[low_index + 1];
00927         min_index = low_index + 1;
00928         for (test_index = low_index + 2; test_index <= high_index;
00929         test_index++) {
00930           if (occupation[test_index] <= min_occ) {
00931             min_occ = occupation[test_index];
00932                                  //find min in region
00933             min_index = test_index;
00934           }
00935         }
00936       }
00937       thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
00938     }
00939   }
00940   else {
00941     min_occ = occupation[0];
00942     min_index = 0;
00943     for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
00944       if (occupation[low_index] < min_occ) {
00945         min_occ = occupation[low_index];
00946         min_index = low_index;
00947       }
00948       sum += occupation[low_index];
00949     }
00950     line_index = 0;
00951   }
00952   for (; line_index < line_count; line_index++)
00953     thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
00954   //same out to end
00955 }
00956 
00957 
00963 void compute_dropout_distances(
00964                                INT32 *occupation,  //input projection
00965                                INT32 *thresholds,  //output thresholds
00966                                INT32 line_count    //array sizes
00967                               ) {
00968   INT32 line_index;              //of thresholds line
00969   INT32 distance;                //from prev dropout
00970   INT32 next_dist;               //to next dropout
00971   INT32 back_index;              //for back filling
00972   INT32 prev_threshold;          //before overwrite
00973 
00974   distance = -line_count;
00975   line_index = 0;
00976   do {
00977     do {
00978       distance--;
00979       prev_threshold = thresholds[line_index];
00980                                  //distance from prev
00981       thresholds[line_index] = distance;
00982       line_index++;
00983     }
00984     while (line_index < line_count
00985       && (occupation[line_index] < thresholds[line_index]
00986       || occupation[line_index - 1] >= prev_threshold));
00987     if (line_index < line_count) {
00988       back_index = line_index - 1;
00989       next_dist = 1;
00990       while (next_dist < -distance && back_index >= 0) {
00991         thresholds[back_index] = next_dist;
00992         back_index--;
00993         next_dist++;
00994         distance++;
00995       }
00996       distance = 1;
00997     }
00998   }
00999   while (line_index < line_count);
01000 }
01001 
01002 
01015 void expand_rows(                   //find lines
01016                  ICOORD page_tr,    //top right
01017                  TO_BLOCK *block,   //block to do
01018                  float gradient,    //gradient to fit
01019                  FCOORD rotation,   //for drawing
01020                  INT32 block_edge,  //edge of block
01021                  BOOL8 testing_on   //correct orientation
01022                 ) {
01023   BOOL8 swallowed_row;           //eaten a neighbour
01024   float y_max, y_min;            //new row limits
01025   float y_bottom, y_top;         //allowed limits
01026   TO_ROW *test_row;              //next row
01027   TO_ROW *row;                   //current row
01028                                  //iterators
01029   BLOBNBOX_IT blob_it = &block->blobs;
01030   TO_ROW_IT row_it = block->get_rows ();
01031 
01032 #ifdef TEXT_VERBOSE
01033   // gets a 'x', see ccmain/tesseractmain.dox
01034   cprintf("x");
01035 #endif
01036 #ifndef GRAPHICS_DISABLED
01037   if (textord_show_expanded_rows && testing_on) {
01038     if (to_win == NO_WINDOW)
01039       create_to_win(page_tr);
01040   }
01041 #endif
01042 
01043   adjust_row_limits(block);  //shift min,max.
01044   if (textord_new_initial_xheight) {
01045     if (block->get_rows ()->length () == 0)
01046       return;
01047     compute_row_stats(block, textord_show_expanded_rows &&testing_on);
01048   }
01049   assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
01050   //get real membership
01051   if (block->get_rows ()->length () == 0)
01052     return;
01053   fit_parallel_rows(block,
01054                     gradient,
01055                     rotation,
01056                     block_edge,
01057                     textord_show_expanded_rows &&testing_on);
01058   if (!textord_new_initial_xheight)
01059     compute_row_stats(block, textord_show_expanded_rows &&testing_on);
01060   row_it.move_to_last ();
01061   do {
01062     row = row_it.data ();
01063     y_max = row->max_y ();       //get current limits
01064     y_min = row->min_y ();
01065     y_bottom = row->intercept () - block->line_size * textord_merge_desc;
01066     y_top = row->intercept () + block->line_size
01067       * (textord_merge_x + textord_merge_asc);
01068     if (y_min > y_bottom) {      //expansion allowed
01069                                  //expandable
01070       swallowed_row = TRUE;
01071       while (swallowed_row && !row_it.at_last ()) {
01072         swallowed_row = FALSE;
01073                                  //get next one
01074         test_row = row_it.data_relative (1);
01075                                  //overlaps space
01076         if (test_row->max_y () > y_bottom) {
01077           if (test_row->min_y () > y_bottom) {
01078             row_it.forward ();
01079 #ifndef GRAPHICS_DISABLED
01080             if (textord_show_expanded_rows && testing_on)
01081               plot_parallel_row(test_row,
01082                                 gradient,
01083                                 block_edge,
01084                                 WHITE,
01085                                 rotation);
01086 #endif
01087             blob_it.set_to_list (row->blob_list ());
01088             blob_it.add_list_after (test_row->blob_list ());
01089                                  //swallow complete row
01090             delete row_it.extract ();
01091             row_it.backward ();
01092             swallowed_row = TRUE;
01093           }
01094           else if (test_row->max_y () < y_min)
01095                                  //shorter limit
01096             y_bottom = test_row->max_y ();
01097           else
01098             y_bottom = y_min;    //can't expand it
01099         }
01100       }
01101       y_min = y_bottom;          //expand it
01102     }
01103     if (y_max < y_top) {         //expansion allowed
01104       swallowed_row = TRUE;
01105       while (swallowed_row && !row_it.at_first ()) {
01106         swallowed_row = FALSE;
01107                                  //get one above
01108         test_row = row_it.data_relative (-1);
01109         if (test_row->min_y () < y_top) {
01110           if (test_row->max_y () < y_top) {
01111             row_it.backward ();
01112             blob_it.set_to_list (row->blob_list ());
01113 #ifndef GRAPHICS_DISABLED
01114             if (textord_show_expanded_rows && testing_on)
01115               plot_parallel_row(test_row,
01116                                 gradient,
01117                                 block_edge,
01118                                 WHITE,
01119                                 rotation);
01120 #endif
01121             blob_it.add_list_after (test_row->blob_list ());
01122                                  //swallow complete row
01123             delete row_it.extract ();
01124             row_it.forward ();
01125             swallowed_row = TRUE;
01126           }
01127           else if (test_row->min_y () < y_max)
01128                                  //shorter limit
01129             y_top = test_row->min_y ();
01130           else
01131             y_top = y_max;       //can't expand it
01132 
01133         }
01134       }
01135       y_max = y_top;
01136     }
01137                                  //new limits
01138     row->set_limits (y_min, y_max);
01139     row_it.backward ();
01140   }
01141   while (!row_it.at_last ());
01142 }
01143 
01144 
01150 void adjust_row_limits(                 //tidy limits
01151                        TO_BLOCK *block  //block to do
01152                       ) {
01153   TO_ROW *row;                   //current row
01154   float size;                    //size of row
01155   float ymax;                    //top of row
01156   float ymin;                    //bottom of row
01157   TO_ROW_IT row_it = block->get_rows ();
01158 
01159   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01160     row = row_it.data ();
01161     size = row->max_y () - row->min_y ();
01162     size /= textord_merge_x + textord_merge_asc + textord_merge_desc;
01163     ymax = size * (textord_merge_x + textord_merge_asc);
01164     ymin = -size * textord_merge_desc;
01165     row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
01166     row->merged = FALSE;
01167   }
01168 }
01169 
01170 
01176 void compute_row_stats(                  //find lines
01177                        TO_BLOCK *block,  //block to do
01178                        BOOL8 testing_on  //correct orientation
01179                       ) {
01180   INT32 row_index;               //of median
01181   TO_ROW *row;                   //current row
01182   TO_ROW *prev_row;              //previous row
01183   float iqr;                     //inter quartile range
01184   TO_ROW_IT row_it = block->get_rows ();
01185                                  //number of rows
01186   INT16 rowcount = row_it.length ();
01187   TO_ROW **rows;                 //for choose nth
01188 
01189   rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
01190   if (rows == NULL)
01191     MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
01192   rowcount = 0;
01193   prev_row = NULL;
01194   row_it.move_to_last ();        //start at bottom
01195   do {
01196     row = row_it.data ();
01197     if (prev_row != NULL) {
01198       rows[rowcount++] = prev_row;
01199       prev_row->spacing = row->intercept () - prev_row->intercept ();
01200       if (testing_on)
01201         tprintf ("Row at %g yields spacing of %g\n",
01202           row->intercept (), prev_row->spacing);
01203     }
01204     prev_row = row;
01205     row_it.backward ();
01206   }
01207   while (!row_it.at_last ());
01208   block->key_row = prev_row;
01209   block->baseline_offset =
01210     fmod (prev_row->parallel_c (), block->line_spacing);
01211   if (testing_on)
01212     tprintf ("Blob based spacing=(%g,%g), offset=%g",
01213       block->line_size, block->line_spacing, block->baseline_offset);
01214   if (rowcount > 0) {
01215     row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
01216       sizeof (TO_ROW *), row_spacing_order);
01217     iqr = rows[row_index]->spacing;
01218     row_index = choose_nth_item (rowcount / 4, rows, rowcount,
01219       sizeof (TO_ROW *), row_spacing_order);
01220     iqr -= rows[row_index]->spacing;
01221     row_index = choose_nth_item (rowcount / 2, rows, rowcount,
01222       sizeof (TO_ROW *), row_spacing_order);
01223     block->key_row = rows[row_index];
01224     if (testing_on)
01225       tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
01226     if (rowcount > 2
01227     && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
01228       if (!textord_new_initial_xheight) {
01229         if (rows[row_index]->spacing < block->line_spacing
01230           && rows[row_index]->spacing > block->line_size)
01231           //within range
01232           block->line_size = rows[row_index]->spacing;
01233         //spacing=size
01234         else if (rows[row_index]->spacing > block->line_spacing)
01235           block->line_size = block->line_spacing;
01236         //too big so use max
01237       }
01238       else {
01239         if (rows[row_index]->spacing < block->line_spacing)
01240           block->line_size = rows[row_index]->spacing;
01241         else
01242           block->line_size = block->line_spacing;
01243         //too big so use max
01244       }
01245       if (block->line_size < textord_min_xheight)
01246         block->line_size = (float) textord_min_xheight;
01247       block->line_spacing = rows[row_index]->spacing;
01248       block->max_blob_size =
01249         block->line_spacing * textord_excess_blobsize;
01250     }
01251     block->baseline_offset = fmod (rows[row_index]->intercept (),
01252       block->line_spacing);
01253   }
01254   if (testing_on)
01255     tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
01256       block->line_size, block->line_spacing, block->baseline_offset);
01257   free_mem(rows);
01258 }
01259 
01260 
01267 void compute_block_xheight(                  //find lines
01268                            TO_BLOCK *block,  //block to do
01269                            float gradient    //global skew
01270                           ) {
01271   TO_ROW *row;                   //current row
01272   int xh_count, desc_count;      //no of samples
01273   float block_median;            //median blob size
01274   int asc_count, cap_count;
01275   INT32 min_size, max_size;      //limits on xheight
01276   INT32 evidence;                //no of samples on row
01277   float xh_sum, desc_sum;        //for averages
01278   float asc_sum, cap_sum;
01279   TO_ROW_IT row_it = block->get_rows ();
01280   STATS row_heights;             //block evidence
01281 
01282   if (row_it.empty ())
01283     return;                      //no rows
01284   block_median = median_block_xheight (block, gradient);
01285   block_median *= 2;
01286   if (block_median < block->line_size)
01287     block_median = block->line_size;
01288   //      tprintf("Block median=%g, linesize=%g\n",
01289   //              block_median,block->line_size);
01290   max_size = (INT32) ceil (block_median);
01291   min_size = (INT32) floor (block_median * textord_minxh);
01292   row_heights.set_range (min_size, max_size + 1);
01293   xh_count = desc_count = asc_count = cap_count = 0;
01294   xh_sum = desc_sum = asc_sum = cap_sum = 0.0f;
01295   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01296     row = row_it.data ();
01297     evidence = compute_row_xheight (row, min_size, max_size, gradient);
01298     if (row->xheight > 0 && row->ascrise > 0) {
01299       row_heights.add ((INT32) row->xheight, evidence);
01300       xh_count += evidence;
01301       asc_sum += row->ascrise;
01302       asc_count++;
01303     }
01304     else if (row->xheight > 0) {
01305       cap_sum += row->xheight;   //assume just caps
01306       cap_count++;
01307     }
01308     if (row->descdrop != 0) {
01309       desc_sum += row->descdrop;
01310       desc_count++;
01311     }
01312   }
01313   if (xh_count > 0) {
01314                                  //median
01315     xh_sum = row_heights.ile (0.5);
01316     asc_sum /= asc_count;
01317   }
01318   else if (cap_count > 0) {
01319     cap_sum /= cap_count;        //must assume caps
01320     xh_sum =
01321       cap_sum * textord_merge_x / (textord_merge_x + textord_merge_asc);
01322     asc_sum =
01323       cap_sum * textord_merge_asc / (textord_merge_x + textord_merge_asc);
01324   }
01325   else {
01326                                  //default sizes
01327     xh_sum = block_median * textord_merge_x;
01328     asc_sum = block_median * textord_merge_asc;
01329   }
01330   if (desc_count > 0) {
01331     desc_sum /= desc_count;
01332   }
01333   else {
01334     desc_sum = xh_sum * textord_merge_desc / textord_merge_x;
01335   }
01336   // tprintf("Block average x height=%g, count=%d, asc=%g/%d, desc=%g/%d,cap=%g/%d\n",
01337   //         xh_sum,xh_count,asc_sum,asc_count,desc_sum,desc_count,
01338   //         cap_sum,cap_count);
01339   if (xh_sum < textord_min_xheight)
01340     xh_sum = (float) textord_min_xheight;
01341   block->xheight = xh_sum;
01342   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01343     correct_row_xheight (row_it.data (), xh_sum, asc_sum, desc_sum);
01344   }
01345 }
01346 
01347 
01353 float median_block_xheight(                  //find lines
01354                            TO_BLOCK *block,  //block to do
01355                            float gradient    //global skew
01356                           ) {
01357   TO_ROW *row;                   //current row
01358   float result;                  //output size
01359   float xcentre;                 //centre of blob
01360   TO_ROW_IT row_it = block->get_rows ();
01361   BLOBNBOX_IT blob_it;
01362   BLOBNBOX *blob;                //current blob
01363   float *heights;                //for choose nth
01364   INT32 blob_count;              //blobs in block
01365   INT32 blob_index;              //current blob
01366 
01367   blob_count = 0;
01368   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
01369     blob_count += row_it.data ()->blob_list ()->length ();
01370   heights = (float *) alloc_mem (blob_count * sizeof (float));
01371   if (heights == NULL)
01372     MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
01373 
01374   blob_index = 0;
01375   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01376     row = row_it.data ();
01377     blob_it.set_to_list (row->blob_list ());
01378     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01379     blob_it.forward ()) {
01380       blob = blob_it.data ();
01381       if (!blob->joined_to_prev ()) {
01382         xcentre =
01383           (blob->bounding_box ().left () +
01384           blob->bounding_box ().right ()) / 2.0f;
01385         heights[blob_index] =
01386           blob->bounding_box ().top () - gradient * xcentre -
01387           row->parallel_c ();
01388         if (heights[blob_index] > 0)
01389           blob_index++;
01390       }
01391     }
01392   }
01393   ASSERT_HOST (blob_index > 0);  //dont expect 0
01394   blob_count = blob_index;
01395   blob_index = choose_nth_item (blob_count / 2, heights, blob_count);
01396   result = heights[blob_index];
01397   free_mem(heights);
01398   return result;
01399 }
01400 
01401 
01408 INT32 compute_row_xheight(                   //find lines
01409                           TO_ROW *row,       //row to do
01410                           INT32 min_height,  //min xheight
01411                           INT32 max_height,  //max xheight
01412                           float gradient     //global skew
01413                          ) {
01414   BOOL8 in_best_pile;            //control of mode size
01415   INT32 prev_size;               //previous size
01416   float xcentre;                 //centre of blob
01417   float height;                  //height of blob
01418   BLOBNBOX_IT blob_it = row->blob_list ();
01419   BLOBNBOX *blob;                //current blob
01420   INT32 blob_count;              //blobs in block
01421   INT32 x;                       //xheight index
01422   INT32 asc;                     //ascender index
01423   INT32 blob_index;              //current blob
01424   INT32 mode_count;              //no of modes
01425   INT32 best_count;              //count of best x so far
01426   float ratio;                   //size ratio
01427   INT32 modes[MAX_HEIGHT_MODES]; //biggest piles
01428   STATS heights (min_height, max_height + 1);
01429 
01430   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
01431     blob = blob_it.data ();
01432     if (!blob->joined_to_prev ()) {
01433       xcentre =
01434         (blob->bounding_box ().left () +
01435         blob->bounding_box ().right ()) / 2.0f;
01436       height = blob->bounding_box ().top ();
01437       if (textord_fix_xheight_bug)
01438         height -= row->baseline.y (xcentre);
01439       else
01440         height -= gradient * xcentre + row->parallel_c ();
01441       if (height >= min_height && height <= max_height
01442         && (!textord_xheight_tweak || height > textord_min_xheight))
01443         heights.add ((INT32) floor (height + 0.5), 1);
01444     }
01445   }
01446   blob_index = heights.mode ();  //find mode
01447                                  //get count of mode
01448   blob_count = heights.pile_count (blob_index);
01449   if (textord_debug_xheights)
01450     tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d,%d\n",
01451       min_height, max_height, blob_index, blob_count,
01452       heights.get_total (), row->blob_list ()->length ());
01453   row->ascrise = 0.0f;
01454   row->xheight = 0.0f;
01455   row->descdrop = 0.0f;          //undefined;
01456   in_best_pile = FALSE;
01457   prev_size = -MAX_INT32;
01458   best_count = 0;
01459   if (blob_count > 0) {
01460                                  //get biggest ones
01461     mode_count = compute_height_modes (&heights, min_height, max_height, modes, MAX_HEIGHT_MODES);
01462     for (x = 0; x < mode_count - 1; x++) {
01463       if (modes[x] != prev_size + 1)
01464         in_best_pile = FALSE;    //had empty height
01465       if (heights.pile_count (modes[x])
01466         >= blob_count * textord_xheight_mode_fraction
01467       && (in_best_pile || heights.pile_count (modes[x]) > best_count)) {
01468         for (asc = x + 1; asc < mode_count; asc++) {
01469           ratio = (float) modes[asc] / modes[x];
01470           if (textord_ascx_ratio_min < ratio
01471             && ratio < textord_ascx_ratio_max
01472             && heights.pile_count (modes[asc])
01473           >= blob_count * textord_ascheight_mode_fraction) {
01474             if (heights.pile_count (modes[x]) > best_count) {
01475               in_best_pile = TRUE;
01476               best_count = heights.pile_count (modes[x]);
01477             }
01478             // tprintf("X=%d, asc=%d, count=%d,  ratio=%g\n",
01479             //         modes[x],modes[asc]-modes[x],
01480             //         heights.pile_count(modes[x]),
01481             //         ratio);
01482             prev_size = modes[x];
01483             row->xheight = (float) modes[x];
01484             row->ascrise = (float) (modes[asc] - modes[x]);
01485           }
01486         }
01487       }
01488     }
01489     if (row->xheight == 0) {
01490                                  //single mode
01491       row->xheight = (float) blob_index;
01492       row->ascrise = 0.0f;
01493       if (textord_debug_xheights)
01494         tprintf ("Single mode xheight set to %g\n", row->xheight);
01495     }
01496     else if (textord_debug_xheights)
01497       tprintf ("Multi-mode xheight set to %g, asc=%g\n",
01498           row->xheight, row->ascrise);
01499     row->descdrop = (float) compute_row_descdrop (row, gradient);
01500     //find descenders
01501   }
01502   return best_count;
01503 }
01504 
01505 
01511 INT32 compute_row_descdrop(                //find lines
01512                            TO_ROW *row,    //row to do
01513                            float gradient  //global skew
01514                           ) {
01515   INT32 min_height = (INT32) floor (row->xheight * textord_descx_ratio_min);
01516   INT32 max_height = (INT32) floor (row->xheight * textord_descx_ratio_max);
01517   float xcentre;                 //centre of blob
01518   float height;                  //height of blob
01519   BLOBNBOX_IT blob_it = row->blob_list ();
01520   BLOBNBOX *blob;                //current blob
01521   INT32 blob_count;              //blobs in block
01522   INT32 blob_index;              //current blob
01523   STATS heights (min_height, max_height + 1);
01524 
01525   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
01526     blob = blob_it.data ();
01527     if (!blob->joined_to_prev ()) {
01528       xcentre =
01529         (blob->bounding_box ().left () +
01530         blob->bounding_box ().right ()) / 2.0f;
01531       height =
01532         gradient * xcentre + row->parallel_c () -
01533         blob->bounding_box ().bottom ();
01534       if (height >= min_height && height <= max_height)
01535         heights.add ((INT32) floor (height + 0.5), 1);
01536     }
01537   }
01538   blob_index = heights.mode ();  //find mode
01539                                  //get count of mode
01540   blob_count = heights.pile_count (blob_index);
01541   return blob_count > 0 ? -blob_index : 0;
01542 }
01543 
01544 
01557 INT32 compute_height_modes(                   //find lines
01558                            STATS *heights,    //stats to search
01559                            INT32 min_height,  //bottom of range
01560                            INT32 max_height,  //top of range
01561                            INT32 *modes,      //output array
01562                            INT32 maxmodes     //size of modes
01563                           ) {
01564   INT32 pile_count;              //no in source pile
01565   INT32 src_count;               //no of source entries
01566   INT32 src_index;               //current entry
01567   INT32 least_count;             //height of smalllest
01568   INT32 least_index;             //index of least
01569   INT32 dest_count;              //index in modes
01570 
01571   src_count = max_height + 1 - min_height;
01572   dest_count = 0;
01573   least_count = MAX_INT32;
01574   least_index = -1;
01575   for (src_index = 0; src_index < src_count; src_index++) {
01576     pile_count = heights->pile_count (min_height + src_index);
01577     if (pile_count > 0) {
01578       if (dest_count < maxmodes) {
01579         if (pile_count < least_count) {
01580                                  //find smallest in array
01581           least_count = pile_count;
01582           least_index = dest_count;
01583         }
01584         modes[dest_count++] = min_height + src_index;
01585       }
01586       else if (pile_count >= least_count) {
01587         while (least_index < maxmodes - 1) {
01588           modes[least_index] = modes[least_index + 1];
01589           //shuffle up
01590           least_index++;
01591         }
01592                                  //new one on end
01593         modes[maxmodes - 1] = min_height + src_index;
01594         if (pile_count == least_count) {
01595                                  //new smallest
01596           least_index = maxmodes - 1;
01597         }
01598         else {
01599           least_count = heights->pile_count (modes[0]);
01600           least_index = 0;
01601           for (dest_count = 1; dest_count < maxmodes; dest_count++) {
01602             pile_count = heights->pile_count (modes[dest_count]);
01603             if (pile_count < least_count) {
01604                                  //find smallest
01605               least_count = pile_count;
01606               least_index = dest_count;
01607             }
01608           }
01609         }
01610       }
01611     }
01612   }
01613   return dest_count;
01614 }
01615 
01616 
01623 void correct_row_xheight(                //fix bad values
01624                          TO_ROW *row,    //row to fix
01625                          float xheight,  //average values
01626                          float ascrise,
01627                          float descdrop) {
01628   if (textord_row_xheights) {
01629     if (row->xheight <= 0)
01630       row->xheight = xheight;
01631     if (row->ascrise < row->xheight * (textord_ascx_ratio_min - 1)) {
01632       if (row->xheight >= xheight * (1 - textord_xheight_error_margin)
01633       && row->xheight <= xheight * (1 + textord_xheight_error_margin)) {
01634         row->all_caps = FALSE;
01635         row->ascrise = ascrise;
01636       }
01637       else if (row->xheight >=
01638         (xheight + ascrise) * (1 - textord_xheight_error_margin)
01639         && row->xheight <=
01640       (xheight + ascrise) * (1 + textord_xheight_error_margin)) {
01641         row->all_caps = TRUE;
01642                                  //it was caps
01643         row->ascrise = row->xheight - xheight;
01644         row->xheight = xheight;
01645       }
01646       else {
01647         row->all_caps = TRUE;
01648         row->ascrise = row->xheight * ascrise / (xheight + ascrise);
01649         row->xheight -= row->ascrise;
01650       }
01651     }
01652     else
01653       row->all_caps = FALSE;
01654     row->ascrise = ascrise;
01655     if (row->descdrop >= -row->xheight * (textord_ascx_ratio_min - 1))
01656       row->descdrop = descdrop;
01657   }
01658   else {
01659     if (row->xheight < xheight * (1 - textord_xheight_error_margin)
01660       || row->xheight > xheight * (1 + textord_xheight_error_margin))
01661       row->xheight = xheight;    //set to average
01662     row->all_caps = row->ascrise <= 0;
01663     if (row->ascrise < ascrise * (1 - textord_xheight_error_margin)
01664       || row->ascrise > ascrise * (1 + textord_xheight_error_margin))
01665       row->ascrise = ascrise;    //set to average
01666     if (row->descdrop < descdrop * (1 - textord_xheight_error_margin)
01667       || row->descdrop > descdrop * (1 + textord_xheight_error_margin))
01668       row->descdrop = descdrop;  //set to average
01669   }
01670 }
01671 
01672 
01679 void separate_underlines(                  //make rough chars
01680                          TO_BLOCK *block,  //block to do
01681                          float gradient,   //skew angle
01682                          FCOORD rotation,  //inverse landscape
01683                          BOOL8 testing_on  //correct orientation
01684                         ) {
01685   BLOBNBOX *blob;                //current blob
01686   PBLOB *poly_blob;              //rotated blob
01687   C_BLOB *rotated_blob;          //rotated blob
01688   TO_ROW *row;                   //current row
01689   float length;                  //of g_vec
01690   BOX blob_box;
01691   FCOORD blob_rotation;          //inverse of rotation
01692   FCOORD g_vec;                  //skew rotation
01693   BLOBNBOX_IT blob_it;           //iterator
01694                                  //iterator
01695   BLOBNBOX_IT under_it = &block->underlines;
01696   TO_ROW_IT row_it = block->get_rows ();
01697 
01698 #ifdef TEXT_VERBOSE
01699   // gets a 'u', see ccmain/tesseractmain.dox
01700   cprintf("u");
01701 #endif
01702                                  //length of vector
01703   length = sqrt (1 + gradient * gradient);
01704   g_vec = FCOORD (1 / length, -gradient / length);
01705   blob_rotation = FCOORD (rotation.x (), -rotation.y ());
01706   blob_rotation.rotate (g_vec);  //unoding everything
01707   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01708     row = row_it.data ();
01709                                  //get blobs
01710     blob_it.set_to_list (row->blob_list ());
01711     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01712     blob_it.forward ()) {
01713       blob = blob_it.data ();
01714       blob_box = blob->bounding_box ();
01715       if (blob_box.width () > block->line_size * textord_underline_width) {
01716         if (textord_cblob_blockocc && blob->cblob () != NULL) {
01717           rotated_blob = crotate_cblob (blob->cblob (),
01718             blob_rotation);
01719           if (test_underline (testing_on && textord_show_final_rows,
01720             rotated_blob, (INT16) row->intercept (),
01721             (INT16) (block->line_size *
01722             (textord_merge_x +
01723           textord_merge_asc / 2.0f)))) {
01724             under_it.add_after_then_move (blob_it.extract ());
01725             if (testing_on && textord_show_final_rows) {
01726               tprintf ("Underlined blob at (%d,%d)->(%d,%d) ",
01727                 rotated_blob->bounding_box ().left (),
01728                 rotated_blob->bounding_box ().bottom (),
01729                 rotated_blob->bounding_box ().right (),
01730                 rotated_blob->bounding_box ().top ());
01731               tprintf ("(Was (%d,%d)->(%d,%d))\n",
01732                 blob_box.left (), blob_box.bottom (),
01733                 blob_box.right (), blob_box.top ());
01734             }
01735           }
01736           delete rotated_blob;
01737         }
01738         else {
01739           if (blob->blob () != NULL) {
01740             //  if (testing_on && textord_show_final_rows)
01741             //      tprintf("Rotating by (%g,%g)\n",
01742             //              blob_rotation.x(),blob_rotation.y());
01743             poly_blob = rotate_blob (blob->blob (), blob_rotation);
01744           }
01745           else
01746             poly_blob = rotate_cblob (blob->cblob (),
01747               block->line_size,
01748               blob_rotation);
01749           if (test_underline
01750             (testing_on
01751             && textord_show_final_rows, poly_blob,
01752             row->intercept (),
01753             block->line_size * (textord_merge_x +
01754           textord_merge_asc / 2))) {
01755             if (testing_on && textord_show_final_rows) {
01756               tprintf ("Underlined blob at (%d,%d)->(%d,%d) ",
01757                 poly_blob->bounding_box ().left (),
01758                 poly_blob->bounding_box ().bottom (),
01759                 poly_blob->bounding_box ().right (),
01760                 poly_blob->bounding_box ().top ());
01761               tprintf ("(Was (%d,%d)->(%d,%d))\n",
01762                 blob_box.left (), blob_box.bottom (),
01763                 blob_box.right (), blob_box.top ());
01764             }
01765             under_it.add_after_then_move (blob_it.extract ());
01766           }
01767           delete poly_blob;
01768         }
01769       }
01770     }
01771   }
01772 }
01773 
01774 
01780 void pre_associate_blobs(                  //make rough chars
01781                          ICOORD page_tr,   //top right
01782                          TO_BLOCK *block,  //block to do
01783                          FCOORD rotation,  //inverse landscape
01784                          BOOL8 testing_on  //correct orientation
01785                         ) {
01786 #ifndef GRAPHICS_DISABLED
01787   COLOUR colour;                 //of boxes
01788 #endif
01789   INT16 overlap;                 //of adjacent boxes
01790   BLOBNBOX *blob;                //current blob
01791   BLOBNBOX *nextblob;            //next in list
01792   BOX blob_box;
01793   BOX next_box;                  //next blob
01794   FCOORD blob_rotation;          //inverse of rotation
01795   BLOBNBOX_IT blob_it;           //iterator
01796   BLOBNBOX_IT start_it;          //iterator
01797   TO_ROW_IT row_it = block->get_rows ();
01798 
01799 #ifndef GRAPHICS_DISABLED
01800   colour = RED;
01801 #endif
01802 
01803   blob_rotation = FCOORD (rotation.x (), -rotation.y ());
01804   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01805                                  //get blobs
01806     blob_it.set_to_list (row_it.data ()->blob_list ());
01807     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01808     blob_it.forward ()) {
01809       blob = blob_it.data ();
01810       blob_box = blob->bounding_box ();
01811       start_it = blob_it;        //save start point
01812       //   if (testing_on && textord_show_final_blobs)
01813       //   {
01814       //      tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
01815       //              blob_box.left(),blob_box.bottom(),
01816       //              blob_box.right(),blob_box.top(),
01817       //              (void*)blob,blob_it.length());
01818       //   }
01819       do {
01820         if (!blob_it.at_last ()) {
01821           nextblob = blob_it.data_relative (1);
01822           next_box = nextblob->bounding_box ();
01823           overlap = next_box.width ();
01824           if (blob_box.left () > next_box.left ())
01825             overlap -= blob_box.left () - next_box.left ();
01826           if (blob_box.right () < next_box.right ())
01827             overlap -= next_box.right () - blob_box.right ();
01828           if (overlap >= next_box.width () / 2
01829           || overlap >= blob_box.width () / 2) {
01830                                  //merge new blob
01831             blob->merge (nextblob);
01832                                  //get bigger box
01833             blob_box = blob->bounding_box ();
01834             blob_it.forward ();
01835           }
01836           else
01837             overlap = -1;        //no overlap
01838         }
01839         else
01840           overlap = -1;          //no overlap
01841       }
01842       while (overlap >= 0);
01843       blob->chop (&start_it, &blob_it,
01844         blob_rotation,
01845         block->line_size * textord_merge_x *
01846         textord_chop_width);
01847       //attempt chop
01848     }
01849 #ifndef GRAPHICS_DISABLED
01850     if (testing_on && textord_show_final_blobs) {
01851       if (to_win == NO_WINDOW)
01852         create_to_win(page_tr);
01853       perimeter_color_index(to_win, colour);
01854       interior_style(to_win, INT_HOLLOW, TRUE);
01855       for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01856       blob_it.forward ()) {
01857         blob = blob_it.data ();
01858         blob_box = blob->bounding_box ();
01859         blob_box.rotate (rotation);
01860         if (!blob->joined_to_prev ()) {
01861           rectangle (to_win, blob_box.left (), blob_box.bottom (),
01862             blob_box.right (), blob_box.top ());
01863         }
01864       }
01865       colour = (COLOUR) (colour + 1);
01866       if (colour > MAGENTA)
01867         colour = RED;
01868     }
01869 #endif
01870   }
01871 }
01872 
01873 
01879 void fit_parallel_rows(                   //find lines
01880                        TO_BLOCK *block,   //block to do
01881                        float gradient,    //gradient to fit
01882                        FCOORD rotation,   //for drawing
01883                        INT32 block_edge,  //edge of block
01884                        BOOL8 testing_on   //correct orientation
01885                       ) {
01886 #ifndef GRAPHICS_DISABLED
01887   COLOUR colour;                 //of row
01888 #endif
01889   TO_ROW_IT row_it = block->get_rows ();
01890 
01891   row_it.move_to_first ();
01892   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01893     if (row_it.data ()->blob_list ()->empty ())
01894       delete row_it.extract ();  //nothing in it
01895     else
01896       fit_parallel_lms (gradient, row_it.data ());
01897   }
01898 #ifndef GRAPHICS_DISABLED
01899   if (testing_on) {
01900     colour = RED;
01901     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01902       plot_parallel_row (row_it.data (), gradient,
01903         block_edge, colour, rotation);
01904       colour = (COLOUR) (colour + 1);
01905       if (colour > MAGENTA)
01906         colour = RED;
01907     }
01908   }
01909 #endif
01910   row_it.sort (row_y_order);     //may have gone out of order
01911 }
01912 
01913 
01920 void fit_parallel_lms(                 //sort function
01921                       float gradient,  //forced gradient
01922                       TO_ROW *row      //row to fit
01923                      ) {
01924   float c;                       //fitted line
01925   int blobcount;                 //no of blobs
01926   BOX box;                       //blob box
01927   LMS lms (row->blob_list ()->length ());
01928                                  //blobs
01929   BLOBNBOX_IT blob_it = row->blob_list ();
01930 
01931 #ifdef TEXT_VERBOSE
01932   // gets a 'm', see ccmain/tesseractmain.dox
01933   cprintf("m");
01934 #endif
01935   blobcount = 0;
01936   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
01937     if (!blob_it.data ()->joined_to_prev ()) {
01938       box = blob_it.data ()->bounding_box ();
01939       lms.
01940         add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ()));
01941       blobcount++;
01942     }
01943   }
01944   lms.constrained_fit (gradient, c);
01945   row->set_parallel_line (gradient, c, lms.error ());
01946   if (textord_straight_baselines && blobcount > lms_line_trials) {
01947     lms.fit (gradient, c);
01948   }
01949                                  //set the other too
01950   row->set_line (gradient, c, lms.error ());
01951 }
01952 
01953 
01959 void make_spline_rows(                   //find lines
01960                       TO_BLOCK *block,   //block to do
01961                       float gradient,    //gradient to fit
01962                       FCOORD rotation,   //for drawing
01963                       INT32 block_edge,  //edge of block
01964                       BOOL8 testing_on   //correct orientation
01965                      ) {
01966   COLOUR colour;                 //of row
01967   TO_ROW_IT row_it = block->get_rows ();
01968 
01969   row_it.move_to_first ();
01970   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01971     if (row_it.data ()->blob_list ()->empty ())
01972       delete row_it.extract ();  //nothing in it
01973     else
01974       make_baseline_spline (row_it.data (), block);
01975   }
01976   if (textord_old_baselines) {
01977 #ifndef GRAPHICS_DISABLED
01978     if (testing_on) {
01979       colour = RED;
01980       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
01981       row_it.forward ()) {
01982         row_it.data ()->baseline.plot (to_win, colour);
01983         colour = (COLOUR) (colour + 1);
01984         if (colour > MAGENTA)
01985           colour = RED;
01986       }
01987     }
01988 #endif
01989     make_old_baselines(block, testing_on);
01990   }
01991 #ifndef GRAPHICS_DISABLED
01992   if (testing_on) {
01993     colour = RED;
01994     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01995       row_it.data ()->baseline.plot (to_win, colour);
01996       colour = (COLOUR) (colour + 1);
01997       if (colour > MAGENTA)
01998         colour = RED;
01999     }
02000   }
02001 #endif
02002 }
02003 
02004 
02011 void make_baseline_spline(                 //sort function
02012                           TO_ROW *row,     //row to fit
02013                           TO_BLOCK *block  //block it came from
02014                          ) {
02015   float b, c;                    //fitted curve
02016   float middle;                  //x middle of blob
02017   BOX box;                       //blob box
02018   LMS lms (row->blob_list ()->length ());
02019                                  //blobs
02020   BLOBNBOX_IT blob_it = row->blob_list ();
02021   INT32 *xstarts;                //spline boundaries
02022   double *coeffs;                //quadratic coeffs
02023   INT32 segments;                //no of segments
02024   INT32 segment;                 //current segment
02025 
02026   xstarts =
02027     (INT32 *) alloc_mem ((row->blob_list ()->length () + 1) * sizeof (INT32));
02028   if (segment_baseline (row, block, segments, xstarts)
02029   && !textord_straight_baselines && !textord_parallel_baselines) {
02030     if (textord_quadratic_baselines) {
02031       coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
02032       for (segment = 0; segment < segments; segment++) {
02033         lms.clear ();
02034         for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
02035         blob_it.forward ()) {
02036           if (!blob_it.data ()->joined_to_prev ()) {
02037             box = blob_it.data ()->bounding_box ();
02038             middle = (box.left () + box.right ()) / 2.0;
02039             if (middle >= xstarts[segment]
02040             && middle < xstarts[segment + 1]) {
02041               lms.add (FCOORD (middle, box.bottom ()));
02042             }
02043           }
02044         }
02045         if (textord_quadratic_baselines)
02046           lms.fit_quadratic (block->line_size *
02047             textord_spline_outlier_fraction,
02048             coeffs[segment * 3], b, c);
02049         else {
02050           lms.fit (b, c);
02051           coeffs[segment * 3] = 0;
02052         }
02053         coeffs[segment * 3 + 1] = b;
02054         coeffs[segment * 3 + 2] = c;
02055       }
02056     }
02057     else
02058       coeffs = linear_spline_baseline (row, block, segments, xstarts);
02059   }
02060   else {
02061     xstarts[1] = xstarts[segments];
02062     segments = 1;
02063     coeffs = (double *) alloc_mem (3 * sizeof (double));
02064     coeffs[0] = 0;
02065     coeffs[1] = row->line_m ();
02066     coeffs[2] = row->line_c ();
02067   }
02068   row->baseline = QSPLINE (segments, xstarts, coeffs);
02069   free_mem(coeffs);
02070   free_mem(xstarts);
02071 }
02072 
02073 
02082 BOOL8
02083 segment_baseline (               //split baseline
02084 TO_ROW * row,                    //row to fit
02085 TO_BLOCK * block,                //block it came from
02086 INT32 & segments,                //no fo segments
02087 INT32 xstarts[]                  //coords of segments
02088 ) {
02089   BOOL8 needs_curve;             //needs curved line
02090   int blobcount;                 //no of blobs
02091   int blobindex;                 //current blob
02092   int last_state;                //above, on , below
02093   int state;                     //of current blob
02094   float yshift;                  //from baseline
02095   BOX box;                       //blob box
02096   BOX new_box;                   //new_it box
02097   float middle;                  //xcentre of blob
02098                                  //blobs
02099   BLOBNBOX_IT blob_it = row->blob_list ();
02100   BLOBNBOX_IT new_it = blob_it;  //front end
02101   SORTED_FLOATS yshifts;         //shifts from baseline
02102 
02103   needs_curve = FALSE;
02104   box = box_next_pre_chopped (&blob_it);
02105   xstarts[0] = box.left ();
02106   segments = 1;
02107   blobcount = row->blob_list ()->length ();
02108   if (textord_oldbl_debug)
02109     tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
02110       blobcount, box.left (), box.bottom ());
02111   if (blobcount <= textord_spline_medianwin
02112   || blobcount < textord_spline_minblobs) {
02113     blob_it.move_to_last ();
02114     box = blob_it.data ()->bounding_box ();
02115     xstarts[1] = box.right ();
02116     return FALSE;
02117   }
02118   last_state = 0;
02119   new_it.mark_cycle_pt ();
02120   for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
02121     new_box = box_next_pre_chopped (&new_it);
02122     middle = (new_box.left () + new_box.right ()) / 2.0;
02123     yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
02124                                  //record shift
02125     yshifts.add (yshift, blobindex);
02126     if (new_it.cycled_list ()) {
02127       xstarts[1] = new_box.right ();
02128       return FALSE;
02129     }
02130   }
02131   for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
02132     box = box_next_pre_chopped (&blob_it);
02133   do {
02134     new_box = box_next_pre_chopped (&new_it);
02135                                  //get middle one
02136     yshift = yshifts[textord_spline_medianwin / 2];
02137     if (yshift > textord_spline_shift_fraction * block->line_size)
02138       state = 1;
02139     else if (-yshift > textord_spline_shift_fraction * block->line_size)
02140       state = -1;
02141     else
02142       state = 0;
02143     if (state != 0)
02144       needs_curve = TRUE;
02145     //              tprintf("State=%d, prev=%d, shift=%g\n",
02146     //                      state,last_state,yshift);
02147     if (state != last_state && blobcount > textord_spline_minblobs) {
02148       xstarts[segments++] = box.left ();
02149       blobcount = 0;
02150     }
02151     last_state = state;
02152     yshifts.remove (blobindex - textord_spline_medianwin);
02153     box = box_next_pre_chopped (&blob_it);
02154     middle = (new_box.left () + new_box.right ()) / 2.0;
02155     yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
02156     yshifts.add (yshift, blobindex);
02157     blobindex++;
02158     blobcount++;
02159   }
02160   while (!new_it.cycled_list ());
02161   if (blobcount > textord_spline_minblobs || segments == 1) {
02162     xstarts[segments] = new_box.right ();
02163   }
02164   else {
02165     xstarts[--segments] = new_box.right ();
02166   }
02167   if (textord_oldbl_debug)
02168     tprintf ("Made %d segments on row at (%d,%d)\n",
02169       segments, box.right (), box.bottom ());
02170   return needs_curve;
02171 }
02172 
02173 
02182 double *
02183 linear_spline_baseline (         //split baseline
02184 TO_ROW * row,                    //row to fit
02185 TO_BLOCK * block,                //block it came from
02186 INT32 & segments,                //no fo segments
02187 INT32 xstarts[]                  //coords of segments
02188 ) {
02189   int blobcount;                 //no of blobs
02190   int blobindex;                 //current blob
02191   int index1, index2;            //blob numbers
02192   int blobs_per_segment;         //blobs in each
02193   BOX box;                       //blob box
02194   BOX new_box;                   //new_it box
02195   float middle;                  //xcentre of blob
02196                                  //blobs
02197   BLOBNBOX_IT blob_it = row->blob_list ();
02198   BLOBNBOX_IT new_it = blob_it;  //front end
02199   float b, c;                    //fitted curve
02200   LMS lms (row->blob_list ()->length ());
02201   double *coeffs;                //quadratic coeffs
02202   INT32 segment;                 //current segment
02203 
02204   box = box_next_pre_chopped (&blob_it);
02205   xstarts[0] = box.left ();
02206   blobcount = 1;
02207   while (!blob_it.at_first ()) {
02208     blobcount++;
02209     box = box_next_pre_chopped (&blob_it);
02210   }
02211   segments = blobcount / textord_spline_medianwin;
02212   if (segments < 1)
02213     segments = 1;
02214   blobs_per_segment = blobcount / segments;
02215   coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
02216   if (textord_oldbl_debug)
02217     tprintf
02218       ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
02219       blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
02220   segment = 1;
02221   for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
02222     box_next_pre_chopped(&new_it);
02223   index1 = 0;
02224   blobindex = index2;
02225   do {
02226     blobindex += blobs_per_segment;
02227     lms.clear ();
02228     while (index1 < blobindex || segment == segments && index1 < blobcount) {
02229       box = box_next_pre_chopped (&blob_it);
02230       middle = (box.left () + box.right ()) / 2.0;
02231       lms.add (FCOORD (middle, box.bottom ()));
02232       index1++;
02233       if (index1 == blobindex - blobs_per_segment / 2
02234       || index1 == blobcount - 1) {
02235         xstarts[segment] = box.left ();
02236       }
02237     }
02238     lms.fit (b, c);
02239     coeffs[segment * 3 - 3] = 0;
02240     coeffs[segment * 3 - 2] = b;
02241     coeffs[segment * 3 - 1] = c;
02242     segment++;
02243     if (segment > segments)
02244       break;
02245 
02246     blobindex += blobs_per_segment;
02247     lms.clear ();
02248     while (index2 < blobindex || segment == segments && index2 < blobcount) {
02249       new_box = box_next_pre_chopped (&new_it);
02250       middle = (new_box.left () + new_box.right ()) / 2.0;
02251       lms.add (FCOORD (middle, new_box.bottom ()));
02252       index2++;
02253       if (index2 == blobindex - blobs_per_segment / 2
02254       || index2 == blobcount - 1) {
02255         xstarts[segment] = new_box.left ();
02256       }
02257     }
02258     lms.fit (b, c);
02259     coeffs[segment * 3 - 3] = 0;
02260     coeffs[segment * 3 - 2] = b;
02261     coeffs[segment * 3 - 1] = c;
02262     segment++;
02263   }
02264   while (segment <= segments);
02265   return coeffs;
02266 }
02267 
02268 
02275 void assign_blobs_to_rows(                      //find lines
02276                           TO_BLOCK *block,      //block to do
02277                           float *gradient,      //block skew
02278                           int pass,             //identification
02279                           BOOL8 reject_misses,  //chuck big ones out
02280                           BOOL8 make_new_rows,  //add rows for unmatched
02281                           BOOL8 drawing_skew    //draw smoothed skew
02282                          ) {
02283   OVERLAP_STATE overlap_result;  //what to do with it
02284   float ycoord;                  //current y
02285   float top, bottom;             //of blob
02286   float g_length = 1.0f;         //from gradient
02287   INT16 row_count;               //no of rows
02288   INT16 left_x;                  //left edge
02289   INT16 last_x;                  //previous edge
02290   float block_skew;              //y delta
02291   float smooth_factor;           //for new coords
02292   float near_dist;               //dist to nearest row
02293   ICOORD testpt;                 //testing only
02294   BLOBNBOX *blob;                //current blob
02295   TO_ROW *row;                   //current row
02296   TO_ROW *dest_row;              //row to put blob in
02297                                  //iterators
02298   BLOBNBOX_IT blob_it = &block->blobs;
02299   TO_ROW_IT row_it = block->get_rows ();
02300 
02301 #ifdef TEXT_VERBOSE
02302   // gets a 'l', see ccmain/tesseractmain.dox
02303   cprintf("l");
02304 #endif
02305   ycoord =
02306     (block->block->bounding_box ().bottom () +
02307     block->block->bounding_box ().top ()) / 2.0f;
02308   if (gradient != NULL)
02309     g_length = sqrt (1 + *gradient * *gradient);
02310 #ifndef GRAPHICS_DISABLED
02311   if (drawing_skew)
02312     move2d (to_win, block->block->bounding_box ().left (), ycoord);
02313 #endif
02314   testpt = ICOORD (textord_test_x, textord_test_y);
02315   blob_it.sort (blob_x_order);
02316   smooth_factor = 1.0;
02317   block_skew = 0.0f;
02318   row_count = row_it.length ();  //might have rows
02319   if (!blob_it.empty ()) {
02320     left_x = blob_it.data ()->bounding_box ().left ();
02321   }
02322   else {
02323     left_x = block->block->bounding_box ().left ();
02324   }
02325   last_x = left_x;
02326   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
02327     blob = blob_it.data ();
02328     if (gradient != NULL) {
02329       block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
02330         + *gradient / g_length * blob->bounding_box ().left ();
02331     }
02332     else if (blob->bounding_box ().left () - last_x > block->line_size / 2
02333       && last_x - left_x > block->line_size * 2
02334     && textord_interpolating_skew) {
02335       //                      tprintf("Interpolating skew from %g",block_skew);
02336       block_skew *= (float) (blob->bounding_box ().left () - left_x)
02337         / (last_x - left_x);
02338       //                      tprintf("to %g\n",block_skew);
02339     }
02340     last_x = blob->bounding_box ().left ();
02341     top = blob->bounding_box ().top () - block_skew;
02342     bottom = blob->bounding_box ().bottom () - block_skew;
02343 #ifndef GRAPHICS_DISABLED
02344     if (drawing_skew)
02345       draw2d (to_win, blob->bounding_box ().left (), ycoord + block_skew);
02346 #endif
02347     if (!row_it.empty ()) {
02348       for (row_it.move_to_first ();
02349         !row_it.at_last () && row_it.data ()->min_y () > top;
02350         row_it.forward ());
02351       row = row_it.data ();
02352       if (row->min_y () <= top && row->max_y () >= bottom) {
02353       //any overlap
02354         dest_row = row;
02355         overlap_result = most_overlapping_row (&row_it, dest_row,
02356           top, bottom,
02357           block->line_size,
02358           blob->bounding_box ().
02359           contains (testpt));
02360         if (overlap_result == NEW_ROW && !reject_misses)
02361           overlap_result = ASSIGN;
02362       }
02363       else {
02364         overlap_result = NEW_ROW;
02365         if (!make_new_rows) {
02366           near_dist = row_it.data_relative (-1)->min_y () - top;
02367                                  //below bottom
02368           if (bottom < row->min_y ()) {
02369             if (row->min_y () - bottom <=
02370               (block->line_spacing -
02371             block->line_size) * textord_merge_desc) {
02372                                  //done it
02373               overlap_result = ASSIGN;
02374               dest_row = row;
02375             }
02376           }
02377           else if (near_dist > 0
02378           && near_dist < bottom - row->max_y ()) {
02379             row_it.backward ();
02380             dest_row = row_it.data ();
02381             if (dest_row->min_y () - bottom <=
02382               (block->line_spacing -
02383             block->line_size) * textord_merge_desc) {
02384                                  //done it
02385               overlap_result = ASSIGN;
02386             }
02387           }
02388           else {
02389             if (top - row->max_y () <=
02390               (block->line_spacing -
02391               block->line_size) * (textord_merge_x +
02392             textord_merge_asc)) {
02393                                  //done it
02394               overlap_result = ASSIGN;
02395               dest_row = row;
02396             }
02397           }
02398         }
02399       }
02400       if (overlap_result == ASSIGN)
02401         dest_row->add_blob (blob_it.extract (), top, bottom,
02402           block->line_size);
02403       if (overlap_result == NEW_ROW) {
02404         if (make_new_rows && top - bottom < block->max_blob_size) {
02405           dest_row =
02406             new TO_ROW (blob_it.extract (), top, bottom,
02407             block->line_size);
02408           row_count++;
02409           if (bottom > row_it.data ()->min_y ())
02410             row_it.add_before_then_move (dest_row);
02411           //insert in right place
02412           else
02413             row_it.add_after_then_move (dest_row);
02414           smooth_factor =
02415             1.0 / (row_count * textord_skew_lag +
02416             textord_skewsmooth_offset);
02417         }
02418         else
02419           overlap_result = REJECT;
02420       }
02421     }
02422     else if (make_new_rows && top - bottom < block->max_blob_size) {
02423       overlap_result = NEW_ROW;
02424       dest_row =
02425         new TO_ROW (blob_it.extract (), top, bottom, block->line_size);
02426       row_count++;
02427       row_it.add_after_then_move (dest_row);
02428       smooth_factor = 1.0 / (row_count * textord_skew_lag + 1);
02429     }
02430     else
02431       overlap_result = REJECT;
02432     if (blob->bounding_box ().contains (testpt)) {
02433       if (overlap_result != REJECT) {
02434         tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n",
02435           dest_row->min_y (), dest_row->max_y (), pass);
02436       }
02437       else {
02438         tprintf ("Test blob assigned to no row on pass %d\n", pass);
02439       }
02440     }
02441     if (overlap_result != REJECT) {
02442       while (!row_it.at_first ()
02443         && row_it.data ()->min_y () >
02444       row_it.data_relative (-1)->min_y ()) {
02445         row = row_it.extract ();
02446         row_it.backward ();
02447         row_it.add_before_then_move (row);
02448       }
02449       while (!row_it.at_last ()
02450         && row_it.data ()->min_y () <
02451       row_it.data_relative (1)->min_y ()) {
02452         row = row_it.extract ();
02453         row_it.forward ();
02454                                  //keep rows in order
02455         row_it.add_after_then_move (row);
02456       }
02457       block_skew = (1 - smooth_factor) * block_skew
02458         + smooth_factor * (blob->bounding_box ().bottom () -
02459         dest_row->initial_min_y ());
02460     }
02461   }
02462   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
02463     if (row_it.data ()->blob_list ()->empty ())
02464       delete row_it.extract ();  //discard empty rows
02465   }
02466 }
02467 
02468 
02482 OVERLAP_STATE most_overlapping_row(                    //find best row
02483                                    TO_ROW_IT *row_it,  //iterator
02484                                    TO_ROW *&best_row,  //output row
02485                                    float top,          //top of blob
02486                                    float bottom,       //bottom of blob
02487                                    float rowsize,      //max row size
02488                                    BOOL8 testing_blob  //test stuff
02489                                   ) {
02490   OVERLAP_STATE result;          //result of tests
02491   float overlap;                 //of blob & row
02492   float bestover;                //nearest row
02493   float merge_top, merge_bottom; //size of merged row
02494   ICOORD testpt;                 //testing only
02495   TO_ROW *row;                   //current row
02496   TO_ROW *test_row;              //for multiple overlaps
02497   BLOBNBOX_IT blob_it;           //for merging rows
02498 
02499   result = ASSIGN;
02500   row = row_it->data ();
02501   bestover = top - bottom;
02502   if (top > row->max_y ())
02503     bestover -= top - row->max_y ();
02504   if (bottom < row->min_y ())
02505                                  //compute overlap
02506     bestover -= row->min_y () - bottom;
02507   if (testing_blob) {
02508     tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n",
02509       bottom, top, row->min_y (), row->max_y (), bestover);
02510   }
02511   test_row = row;
02512   do {
02513     if (!row_it->at_last ()) {
02514       row_it->forward ();
02515       test_row = row_it->data ();
02516       if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
02517         merge_top =
02518           test_row->max_y () >
02519           row->max_y ()? test_row->max_y () : row->max_y ();
02520         merge_bottom =
02521           test_row->min_y () <
02522           row->min_y ()? test_row->min_y () : row->min_y ();
02523         if (merge_top - merge_bottom <= rowsize) {
02524           if (testing_blob) {
02525             tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
02526               row->min_y (), row->max_y (),
02527               test_row->min_y (), test_row->max_y ());
02528           }
02529           test_row->set_limits (merge_bottom, merge_top);
02530           blob_it.set_to_list (test_row->blob_list ());
02531           blob_it.add_list_after (row->blob_list ());
02532           blob_it.sort (blob_x_order);
02533           row_it->backward ();
02534           delete row_it->extract ();
02535           row_it->forward ();
02536           bestover = -1.0f;      //force replacement
02537         }
02538         overlap = top - bottom;
02539         if (top > test_row->max_y ())
02540           overlap -= top - test_row->max_y ();
02541         if (bottom < test_row->min_y ())
02542           overlap -= test_row->min_y () - bottom;
02543         if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
02544           result = REJECT;
02545         }
02546         if (overlap > bestover) {
02547           bestover = overlap;    //find biggest overlap
02548           row = test_row;
02549         }
02550         if (testing_blob) {
02551           tprintf
02552             ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n",
02553             bottom, top, test_row->min_y (), test_row->max_y (),
02554             overlap, bestover);
02555         }
02556       }
02557     }
02558   }
02559   while (!row_it->at_last ()
02560     && test_row->min_y () <= top && test_row->max_y () >= bottom);
02561   while (row_it->data () != row)
02562     row_it->backward ();         //make it point to row
02563 
02564   //doesn't overlap much
02565   if (top - bottom - bestover > rowsize * textord_merge_x
02566      && (!textord_fix_makerow_bug || bestover < rowsize * textord_merge_x)
02567      && result == ASSIGN)
02568     result = NEW_ROW;            //doesn't overlap enough
02569   best_row = row;
02570   return result;
02571 }
02572 
02573 
02579 int blob_x_order(                    //sort function
02580                  const void *item1,  //items to compare
02581                  const void *item2) {
02582                                  //converted ptr
02583   BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
02584                                  //converted ptr
02585   BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
02586 
02587   if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
02588     return -1;
02589   else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
02590     return 1;
02591   else
02592     return 0;
02593 }
02594 
02595 
02601 int row_y_order(                    //sort function
02602                 const void *item1,  //items to compare
02603                 const void *item2) {
02604                                  //converted ptr
02605   TO_ROW *row1 = *(TO_ROW **) item1;
02606                                  //converted ptr
02607   TO_ROW *row2 = *(TO_ROW **) item2;
02608 
02609   if (row1->parallel_c () > row2->parallel_c ())
02610     return -1;
02611   else if (row1->parallel_c () < row2->parallel_c ())
02612     return 1;
02613   else
02614     return 0;
02615 }
02616 
02617 
02623 int row_spacing_order(                    //sort function
02624                       const void *item1,  //items to compare
02625                       const void *item2) {
02626                                  //converted ptr
02627   TO_ROW *row1 = *(TO_ROW **) item1;
02628                                  //converted ptr
02629   TO_ROW *row2 = *(TO_ROW **) item2;
02630 
02631   if (row1->spacing < row2->spacing)
02632     return -1;
02633   else if (row1->spacing > row2->spacing)
02634     return 1;
02635   else
02636     return 0;
02637 }

Generated on Wed Feb 28 19:49:11 2007 for Tesseract by  doxygen 1.5.1