textord/tospace.h File Reference

#include "blobbox.h"
#include "gap_map.h"
#include "statistc.h"
#include "notdll.h"

Go to the source code of this file.

Functions


Function Documentation

void block_spacing_stats ( TO_BLOCK block,
GAPMAP gapmap,
BOOL8 old_text_ord_proportional,
INT16 block_space_gap_width,
INT16 block_non_space_gap_width 
)

DEBUG USE ONLY.

block_spacing_stats()

Definition at line 206 of file tospace.cpp.

References STATS::add(), TO_ROW::blob_list(), box_next(), box_next_pre_chopped(), STATS::get_total(), ignore_big_gap(), STATS::ile(), BOX::left(), MAX, MAX_INT16, MAXSPACING, STATS::median(), narrow_blob(), PITCH_CORR_PROP, TO_ROW::pitch_decision, PITCH_DEF_PROP, reduced_box_next(), BOX::right(), TRUE, wide_blob(), BOX::width(), and TO_ROW::xheight.

Referenced by to_spacing().

00212                           {
00213   TO_ROW_IT row_it;              //row iterator
00214   TO_ROW *row;                   //current row
00215   BLOBNBOX_IT blob_it;           //iterator
00216 
00217   STATS centre_to_centre_stats (0, MAXSPACING);
00218   //DEBUG USE ONLY
00219   STATS all_gap_stats (0, MAXSPACING);
00220   STATS space_gap_stats (0, MAXSPACING);
00221   INT16 minwidth = MAX_INT16;    //narrowest blob
00222   BOX blob_box;
00223   BOX prev_blob_box;
00224   INT16 centre_to_centre;
00225   INT16 gap_width;
00226   float real_space_threshold;
00227   float iqr_centre_to_centre;    //DEBUG USE ONLY
00228   float iqr_all_gap_stats;       //DEBUG USE ONLY
00229   INT32 end_of_row;
00230   INT32 row_length;
00231 
00232   row_it.set_to_list (block->get_rows ());
00233   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00234     row = row_it.data ();
00235     if (!row->blob_list ()->empty () &&
00236       (!tosp_only_use_prop_rows ||
00237       (row->pitch_decision == PITCH_DEF_PROP) ||
00238     (row->pitch_decision == PITCH_CORR_PROP))) {
00239       blob_it.set_to_list (row->blob_list ());
00240       blob_it.mark_cycle_pt ();
00241       end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
00242       if (tosp_use_pre_chopping)
00243         blob_box = box_next_pre_chopped (&blob_it);
00244       else if (tosp_stats_use_xht_gaps)
00245         blob_box = reduced_box_next (row, &blob_it);
00246       else
00247         blob_box = box_next (&blob_it);
00248       row_length = end_of_row - blob_box.left ();
00249       if (blob_box.width () < minwidth)
00250         minwidth = blob_box.width ();
00251       prev_blob_box = blob_box;
00252       while (!blob_it.cycled_list ()) {
00253         if (tosp_use_pre_chopping)
00254           blob_box = box_next_pre_chopped (&blob_it);
00255         else if (tosp_stats_use_xht_gaps)
00256           blob_box = reduced_box_next (row, &blob_it);
00257         else
00258           blob_box = box_next (&blob_it);
00259         if (blob_box.width () < minwidth)
00260           minwidth = blob_box.width ();
00261         gap_width = blob_box.left () - prev_blob_box.right ();
00262         if (!ignore_big_gap (row, row_length, gapmap,
00263         prev_blob_box.right (), blob_box.left ())) {
00264           all_gap_stats.add (gap_width, 1);
00265 
00266           centre_to_centre = (blob_box.left () + blob_box.right () -
00267             (prev_blob_box.left () +
00268             prev_blob_box.right ())) / 2;
00269           //DEBUG
00270           centre_to_centre_stats.add (centre_to_centre, 1);
00271           // DEBUG
00272         }
00273         prev_blob_box = blob_box;
00274       }
00275     }
00276   }
00277 
00278                                  //Inadequate samples
00279   if (all_gap_stats.get_total () <= 1) {
00280     block_non_space_gap_width = minwidth;
00281     block_space_gap_width = -1;  //No est. space width
00282                                  //DEBUG
00283     old_text_ord_proportional = TRUE;
00284   }
00285   else {
00286     /* For debug only ..... */
00287     iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
00288       centre_to_centre_stats.ile (0.25);
00289     iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
00290     old_text_ord_proportional =
00291       iqr_centre_to_centre * 2 > iqr_all_gap_stats;
00292     /* .......For debug only */
00293 
00294 /*
00295 The median of the gaps is used as an estimate of the NON-SPACE gap width.
00296 This RELIES on the assumption that there are more gaps WITHIN words than
00297 BETWEEN words in a block
00298 
00299 Now try to estimate the width of a real space for all real spaces in the
00300 block. Do this by using a crude threshold to ignore "narrow" gaps, then
00301 find the median of the "wide" gaps and use this.
00302 */
00303     block_non_space_gap_width = (INT16) floor (all_gap_stats.median ());
00304     // median gap
00305 
00306     row_it.set_to_list (block->get_rows ());
00307     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00308       row = row_it.data ();
00309       if (!row->blob_list ()->empty () &&
00310         (!tosp_only_use_prop_rows ||
00311         (row->pitch_decision == PITCH_DEF_PROP) ||
00312       (row->pitch_decision == PITCH_CORR_PROP))) {
00313         real_space_threshold =
00314           MAX (tosp_init_guess_kn_mult * block_non_space_gap_width,
00315           tosp_init_guess_xht_mult * row->xheight);
00316         blob_it.set_to_list (row->blob_list ());
00317         blob_it.mark_cycle_pt ();
00318         end_of_row =
00319           blob_it.data_relative (-1)->bounding_box ().right ();
00320         if (tosp_use_pre_chopping)
00321           blob_box = box_next_pre_chopped (&blob_it);
00322         else if (tosp_stats_use_xht_gaps)
00323           blob_box = reduced_box_next (row, &blob_it);
00324         else
00325           blob_box = box_next (&blob_it);
00326         row_length = blob_box.left () - end_of_row;
00327         prev_blob_box = blob_box;
00328         while (!blob_it.cycled_list ()) {
00329           if (tosp_use_pre_chopping)
00330             blob_box = box_next_pre_chopped (&blob_it);
00331           else if (tosp_stats_use_xht_gaps)
00332             blob_box = reduced_box_next (row, &blob_it);
00333           else
00334             blob_box = box_next (&blob_it);
00335           gap_width = blob_box.left () - prev_blob_box.right ();
00336           if ((gap_width > real_space_threshold) &&
00337             !ignore_big_gap (row, row_length, gapmap,
00338             prev_blob_box.right (),
00339           blob_box.left ())) {
00340   /*
00341   If tosp_use_cert_spaces is enabled, the estimate of the space gap is
00342   restricted to obvious spaces - those wider than half the xht or those
00343   with wide blobs on both sides - i.e not things that are suspect 1's or
00344   punctiation that is sometimes widely spaced.
00345   */
00346             if (!tosp_block_use_cert_spaces ||
00347               (gap_width >
00348               tosp_fuzzy_space_factor2 * row->xheight)
00349               ||
00350               ((gap_width >
00351               tosp_fuzzy_space_factor1 * row->xheight)
00352               && (!tosp_narrow_blobs_not_cert
00353               || (!narrow_blob (row, prev_blob_box)
00354               && !narrow_blob (row, blob_box))))
00355               || (wide_blob (row, prev_blob_box)
00356               && wide_blob (row, blob_box)))
00357               space_gap_stats.add (gap_width, 1);
00358           }
00359           prev_blob_box = blob_box;
00360         }
00361       }
00362     }
00363                                  //Inadequate samples
00364     if (space_gap_stats.get_total () <= 2)
00365       block_space_gap_width = -1;//No est. space width
00366     else
00367       block_space_gap_width =
00368         MAX ((INT16) floor (space_gap_stats.median ()),
00369         3 * block_non_space_gap_width);
00370   }
00371 }

float find_mean_blob_spacing ( WERD word  ) 

Find mean of blob spacing.

Definition at line 1790 of file tospace.cpp.

References WERD::blob_list(), WERD::cblob_list(), WERD::flag(), BOX::left(), BOX::right(), and W_POLYGON.

Referenced by make_prop_words().

01790                                          { 
01791   PBLOB_IT blob_it;
01792   C_BLOB_IT cblob_it;
01793   BOX blob_box;
01794   INT32 gap_sum = 0;
01795   INT16 gap_count = 0;
01796   INT16 prev_right;
01797 
01798   if (word->flag (W_POLYGON)) {
01799     blob_it.set_to_list (word->blob_list ());
01800     if (!blob_it.empty ()) {
01801       blob_it.mark_cycle_pt ();
01802       prev_right = blob_it.data ()->bounding_box ().right ();
01803       //first blob
01804       blob_it.forward ();
01805       for (; !blob_it.cycled_list (); blob_it.forward ()) {
01806         blob_box = blob_it.data ()->bounding_box ();
01807         gap_sum += blob_box.left () - prev_right;
01808         gap_count++;
01809         prev_right = blob_box.right ();
01810       }
01811     }
01812   }
01813   else {
01814     cblob_it.set_to_list (word->cblob_list ());
01815     if (!cblob_it.empty ()) {
01816       cblob_it.mark_cycle_pt ();
01817       prev_right = cblob_it.data ()->bounding_box ().right ();
01818       //first blob
01819       cblob_it.forward ();
01820       for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
01821         blob_box = cblob_it.data ()->bounding_box ();
01822         gap_sum += blob_box.left () - prev_right;
01823         gap_count++;
01824         prev_right = blob_box.right ();
01825       }
01826     }
01827   }
01828   if (gap_count > 0)
01829     return (gap_sum / (float) gap_count);
01830   else
01831     return 0.0f;
01832 }

BOOL8 ignore_big_gap ( TO_ROW row,
INT32  row_length,
GAPMAP gapmap,
INT16  left,
INT16  right 
)

Decide if big gaps should be ignored

Definition at line 1838 of file tospace.cpp.

References FALSE, GAPMAP::table_gap(), TRUE, and TO_ROW::xheight.

Referenced by block_spacing_stats(), isolated_row_stats(), and row_spacing_stats().

01842                                   {
01843   INT16 gap = right - left + 1;
01844 
01845   if (tosp_ignore_big_gaps > 999)
01846     return FALSE;                //Dont ignore
01847   if (tosp_ignore_big_gaps > 0)
01848     return (gap > tosp_ignore_big_gaps * row->xheight);
01849   if (gap > tosp_ignore_very_big_gaps * row->xheight)
01850     return TRUE;
01851   if (tosp_ignore_big_gaps == 0) {
01852     if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight))
01853       return TRUE;
01854     if ((gap > 1.75 * row->xheight) &&
01855       ((row_length > 35 * row->xheight) ||
01856       gapmap->table_gap (left, right)))
01857       return TRUE;
01858   }
01859   else {
01860   /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */
01861     if ((gap > gapmap_big_gaps * row->xheight) &&
01862       gapmap->table_gap (left, right))
01863       return TRUE;
01864   }
01865   return FALSE;
01866 }

void improve_row_threshold ( TO_ROW row,
STATS all_gap_stats 
)

Try to recognise a "normal line".

The following conditions are used:

       \> 25 gaps
 &&    space \> 3 * kn  && space \> 10
          (I.e. reasonably large space and kn:sp ratio)
 &&    > 3/4 # gaps \< kn + (sp \- kn)/3
          (I.e. most gaps are well away from space estimate)
 &&    a gap of max( 3, (sp \- kn)/3 ) empty histogram positions is found
       somewhere in the histogram between kn and sp

 THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies

NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!! try moving the default threshold to within this band but leave the fuzzy limit calculation as at present.

Definition at line 905 of file tospace.cpp.

References STATS::get_total(), TO_ROW::kern_size, TO_ROW::space_size, TO_ROW::space_threshold, stats_count_under(), and tprintf().

Referenced by row_spacing_stats().

00905                                                               { 
00906   float sp = row->space_size;
00907   float kn = row->kern_size;
00908   INT16 reqd_zero_width = 0;
00909   INT16 zero_width = 0;
00910   INT16 zero_start = 0;
00911   INT16 index = 0;
00912 
00913   if (tosp_debug_level > 10)
00914     tprintf ("Improve row threshold 0");
00915   if ((all_gap_stats->get_total () <= 25) ||
00916     (sp <= 10) ||
00917     (sp <= 3 * kn) ||
00918     (stats_count_under (all_gap_stats,
00919     (INT16) ceil (kn + (sp - kn) / 3 + 0.5)) <
00920     (0.75 * all_gap_stats->get_total ())))
00921     return;
00922   if (tosp_debug_level > 10)
00923     tprintf (" 1");
00924   /*
00925   Look for the first region of all 0's in the histogram which is wider than
00926   max( 3, (sp - kn)/3 ) and starts between kn and sp. If found, and current
00927   threshold is not within it, move the threshold so that is is just inside it.
00928   */
00929   reqd_zero_width = (INT16) floor ((sp - kn) / 3 + 0.5);
00930   if (reqd_zero_width < 3)
00931     reqd_zero_width = 3;
00932 
00933   for (index = INT16 (ceil (kn)); index < INT16 (floor (sp)); index++) {
00934     if (all_gap_stats->pile_count (index) == 0) {
00935       if (zero_width == 0)
00936         zero_start = index;
00937       zero_width++;
00938     }
00939     else {
00940       if (zero_width >= reqd_zero_width)
00941         break;
00942       else {
00943         zero_width = 0;
00944       }
00945     }
00946   }
00947   index--;
00948   if (tosp_debug_level > 10)
00949     tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
00950       reqd_zero_width, zero_width, zero_start, row->space_threshold);
00951   if ((zero_width < reqd_zero_width) ||
00952     ((row->space_threshold >= zero_start) &&
00953     (row->space_threshold <= index)))
00954     return;
00955   if (tosp_debug_level > 10)
00956     tprintf (" 2");
00957   if (row->space_threshold < zero_start) {
00958     if (tosp_debug_level > 5)
00959       tprintf
00960         ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d  thresh:%d -> %d\n",
00961         kn, sp, zero_start, index, row->space_threshold, zero_start);
00962     row->space_threshold = zero_start;
00963   }
00964   if (row->space_threshold > index) {
00965     if (tosp_debug_level > 5)
00966       tprintf
00967         ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d  thresh:%d -> %d\n",
00968         kn, sp, zero_start, index, row->space_threshold, index);
00969     row->space_threshold = index;
00970   }
00971 }

BOOL8 isolated_row_stats ( TO_ROW row,
GAPMAP gapmap,
STATS all_gap_stats,
BOOL8  suspected_table,
INT16  block_idx,
INT16  row_idx 
)

Set values for min_space, max_non_space based on isolated row's stats only

Definition at line 754 of file tospace.cpp.

References STATS::add(), TO_ROW::blob_list(), box_next(), box_next_pre_chopped(), FALSE, STATS::get_total(), ignore_big_gap(), TO_ROW::kern_size, BOX::left(), MAX, MAXSPACING, STATS::mean(), STATS::median(), narrow_blob(), reduced_box_next(), BOX::right(), TO_ROW::space_size, TO_ROW::space_threshold, stats_count_under(), tprintf(), TRUE, wide_blob(), and TO_ROW::xheight.

Referenced by row_spacing_stats().

00759                                         {
00760   float kern_estimate;
00761   float crude_threshold_estimate;
00762   INT16 small_gaps_count;
00763   INT16 total;
00764                                  //iterator
00765   BLOBNBOX_IT blob_it = row->blob_list ();
00766   STATS cert_space_gap_stats (0, MAXSPACING);
00767   STATS all_space_gap_stats (0, MAXSPACING);
00768   STATS small_gap_stats (0, MAXSPACING);
00769   BOX blob_box;
00770   BOX prev_blob_box;
00771   INT16 gap_width;
00772   INT32 end_of_row;
00773   INT32 row_length;
00774 
00775   kern_estimate = all_gap_stats->median ();
00776   crude_threshold_estimate = MAX (tosp_init_guess_kn_mult * kern_estimate,
00777     tosp_init_guess_xht_mult * row->xheight);
00778   small_gaps_count = stats_count_under (all_gap_stats,
00779     (INT16)
00780     ceil (crude_threshold_estimate));
00781   total = all_gap_stats->get_total ();
00782 
00783   if ((total <= tosp_redo_kern_limit) ||
00784     ((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||
00785   (total - small_gaps_count < 1)) {
00786     if (tosp_debug_level > 5)
00787       tprintf ("B:%d R:%d -- Cant do isolated row stats.\n",
00788         block_idx, row_idx);
00789     return FALSE;
00790   }
00791   blob_it.set_to_list (row->blob_list ());
00792   blob_it.mark_cycle_pt ();
00793   end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
00794   if (tosp_use_pre_chopping)
00795     blob_box = box_next_pre_chopped (&blob_it);
00796   else if (tosp_stats_use_xht_gaps)
00797     blob_box = reduced_box_next (row, &blob_it);
00798   else
00799     blob_box = box_next (&blob_it);
00800   row_length = end_of_row - blob_box.left ();
00801   prev_blob_box = blob_box;
00802   while (!blob_it.cycled_list ()) {
00803     if (tosp_use_pre_chopping)
00804       blob_box = box_next_pre_chopped (&blob_it);
00805     else if (tosp_stats_use_xht_gaps)
00806       blob_box = reduced_box_next (row, &blob_it);
00807     else
00808       blob_box = box_next (&blob_it);
00809     gap_width = blob_box.left () - prev_blob_box.right ();
00810     if (!ignore_big_gap (row, row_length, gapmap,
00811       prev_blob_box.right (), blob_box.left ()) &&
00812     (gap_width > crude_threshold_estimate)) {
00813       if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
00814         ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) &&
00815         (!tosp_narrow_blobs_not_cert ||
00816         (!narrow_blob (row, prev_blob_box) &&
00817         !narrow_blob (row, blob_box)))) ||
00818         (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
00819         cert_space_gap_stats.add (gap_width, 1);
00820       all_space_gap_stats.add (gap_width, 1);
00821     }
00822     if (gap_width < crude_threshold_estimate)
00823       small_gap_stats.add (gap_width, 1);
00824 
00825     prev_blob_box = blob_box;
00826   }
00827   if (cert_space_gap_stats.get_total () >=
00828     tosp_enough_space_samples_for_median)
00829                                  //median
00830     row->space_size = cert_space_gap_stats.median ();
00831   else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
00832                                  //to avoid spaced
00833     row->space_size = cert_space_gap_stats.mean ();
00834   //      1's in tables
00835   else if (all_space_gap_stats.get_total () >=
00836     tosp_enough_space_samples_for_median)
00837                                  //median
00838     row->space_size = all_space_gap_stats.median ();
00839   else
00840     row->space_size = all_space_gap_stats.mean ();
00841 
00842   if (tosp_only_small_gaps_for_kern)
00843     row->kern_size = small_gap_stats.median ();
00844   else
00845     row->kern_size = all_gap_stats->median ();
00846   row->space_threshold =
00847     INT32 (floor ((row->space_size + row->kern_size) / 2));
00848   /* Sanity check */
00849   if ((row->kern_size >= row->space_threshold) ||
00850     (row->space_threshold >= row->space_size) ||
00851   (row->space_threshold <= 0)) {
00852     if (tosp_debug_level > 0)
00853       tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
00854         block_idx, row_idx,
00855         row->kern_size, row->space_threshold, row->space_size);
00856     row->kern_size = 0.0f;
00857     row->space_threshold = 0;
00858     row->space_size = 0.0f;
00859     return FALSE;
00860   }
00861 
00862   if (tosp_debug_level > 5)
00863     tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n",
00864       block_idx, row_idx,
00865       row->kern_size, row->space_threshold, row->space_size);
00866   return TRUE;
00867 }

BOOL8 make_a_word_break ( TO_ROW row,
BOX  blob_box,
INT16  prev_gap,
BOX  prev_blob_box,
INT16  real_current_gap,
INT16  within_xht_current_gap,
BOX  next_blob_box,
INT16  next_gap,
UINT8 blanks,
BOOL8 fuzzy_sp,
BOOL8 fuzzy_non 
)

Decide on word break.

Parameters:
row Row being made
blob_box For next_blob
prev_gap How many blanks?
prev_blob_box 
real_current_gap 
within_xht_current_gap 
next_blob_box 
next_gap 
blanks 
fuzzy_sp 
fuzzy_non 
Returns:
TRUE or FALSE
Inhibit using the reduced gap if:

Definition at line 1319 of file tospace.cpp.

References FALSE, TO_ROW::kern_size, mark_gap(), MAX, MAX_INT16, TO_ROW::max_nonspace, TO_ROW::min_space, narrow_blob(), BOX::null_box(), TO_ROW::space_size, TO_ROW::space_threshold, suspected_punct_blob(), TRUE, wide_blob(), BOX::width(), and TO_ROW::xheight.

Referenced by make_prop_words().

01330                                           {
01331   static BOOL8 prev_gap_was_a_space;
01332   BOOL8 space;
01333   INT16 current_gap;
01334   float fuzzy_sp_to_kn_limit;
01335 
01336   /*
01337    Inhibit using the reduced gap
01338   */
01339   if ((row->kern_size > tosp_large_kerning * row->xheight) ||
01340     ((tosp_dont_fool_with_small_kerns >= 0) &&
01341     (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
01342                                  //Ignore the difference
01343     within_xht_current_gap = real_current_gap;
01344 
01345   if (tosp_use_xht_gaps && tosp_only_use_xht_gaps)
01346     current_gap = within_xht_current_gap;
01347   else
01348     current_gap = real_current_gap;
01349 
01350   if (tosp_old_to_method) {
01351                                  //Boring old method
01352     space = current_gap > row->max_nonspace;
01353     if (space && (current_gap < MAX_INT16)) {
01354       if (current_gap < row->min_space) {
01355         if (current_gap > row->space_threshold) {
01356           blanks = 1;
01357           fuzzy_sp = TRUE;
01358           fuzzy_non = FALSE;
01359         }
01360         else {
01361           blanks = 0;
01362           fuzzy_sp = FALSE;
01363           fuzzy_non = TRUE;
01364         }
01365       }
01366       else {
01367         blanks = (UINT8) (current_gap / row->space_size);
01368         if (blanks < 1)
01369           blanks = 1;
01370         fuzzy_sp = FALSE;
01371         fuzzy_non = FALSE;
01372       }
01373     }
01374     return space;
01375   }
01376   else {
01377   /* New exciting heuristic method */
01378     if (prev_blob_box.null_box ())
01379                                  //Beginning of row
01380         prev_gap_was_a_space = TRUE;
01381 
01382                                  //Default as old TO
01383     space = current_gap > row->space_threshold;
01384 
01385     /* Set defaults for the word break incase we find one.  Currently there are
01386     no fuzzy spaces. Depending on the reliability of the different heuristics
01387     we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY
01388     be used if the function returns TRUE - ie the word is to be broken.
01389     */
01390     blanks = (UINT8) (current_gap / row->space_size);
01391     if (blanks < 1)
01392       blanks = 1;
01393     fuzzy_sp = FALSE;
01394     fuzzy_non = FALSE;
01395     /*
01396     If xht measure causes gap to flip one of the 3 thresholds act accordingly -
01397     despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to
01398     context.
01399     */
01400     if (tosp_use_xht_gaps &&
01401       (real_current_gap <= row->max_nonspace) &&
01402     (within_xht_current_gap > row->max_nonspace)) {
01403       space = TRUE;
01404       fuzzy_non = TRUE;
01405 #ifndef GRAPHICS_DISABLED
01406       mark_gap (blob_box, 20,
01407         prev_gap, prev_blob_box.width (),
01408         current_gap, next_blob_box.width (), next_gap);
01409 #endif
01410     }
01411     else if (tosp_use_xht_gaps &&
01412       (real_current_gap <= row->space_threshold) &&
01413     (within_xht_current_gap > row->space_threshold)) {
01414       space = TRUE;
01415       if (tosp_flip_fuzz_kn_to_sp)
01416         fuzzy_sp = TRUE;
01417       else
01418         fuzzy_non = TRUE;
01419 #ifndef GRAPHICS_DISABLED
01420       mark_gap (blob_box, 21,
01421         prev_gap, prev_blob_box.width (),
01422         current_gap, next_blob_box.width (), next_gap);
01423 #endif
01424     }
01425     else if (tosp_use_xht_gaps &&
01426       (real_current_gap < row->min_space) &&
01427     (within_xht_current_gap >= row->min_space)) {
01428       space = TRUE;
01429 #ifndef GRAPHICS_DISABLED
01430       mark_gap (blob_box, 22,
01431         prev_gap, prev_blob_box.width (),
01432         current_gap, next_blob_box.width (), next_gap);
01433 #endif
01434     }
01435     /* Now continue with normal heuristics */
01436     else if ((current_gap < row->min_space) &&
01437     (current_gap > row->space_threshold)) {
01438       /* Heuristics to turn dubious spaces to kerns */
01439       if (tosp_pass_wide_fuzz_sp_to_context > 0)
01440         fuzzy_sp_to_kn_limit = row->kern_size +
01441           tosp_pass_wide_fuzz_sp_to_context *
01442           (row->space_size - row->kern_size);
01443       else
01444         fuzzy_sp_to_kn_limit = 99999.0f;
01445 
01446       /* If current gap is significantly smaller than the previous space the other
01447       side of a narrow blob then this gap is a kern. */
01448       if ((prev_blob_box.width () > 0) &&
01449         narrow_blob (row, prev_blob_box) &&
01450         prev_gap_was_a_space &&
01451       (current_gap <= tosp_gap_factor * prev_gap)) {
01452         if ((tosp_all_flips_fuzzy) ||
01453         (current_gap > fuzzy_sp_to_kn_limit)) {
01454           if (tosp_flip_fuzz_sp_to_kn)
01455             fuzzy_non = TRUE;
01456           else
01457             fuzzy_sp = TRUE;
01458         }
01459         else
01460           space = FALSE;
01461 #ifndef GRAPHICS_DISABLED
01462         mark_gap (blob_box, 1,
01463           prev_gap, prev_blob_box.width (),
01464           current_gap, next_blob_box.width (), next_gap);
01465 #endif
01466       }
01467       /* If current gap not much bigger than the previous kern the other side of a
01468       narrow blob then this gap is a kern as well */
01469       else if ((prev_blob_box.width () > 0) &&
01470         narrow_blob (row, prev_blob_box) &&
01471         !prev_gap_was_a_space &&
01472       (current_gap * tosp_gap_factor <= prev_gap)) {
01473         if ((tosp_all_flips_fuzzy) ||
01474         (current_gap > fuzzy_sp_to_kn_limit)) {
01475           if (tosp_flip_fuzz_sp_to_kn)
01476             fuzzy_non = TRUE;
01477           else
01478             fuzzy_sp = TRUE;
01479         }
01480         else
01481           space = FALSE;
01482 #ifndef GRAPHICS_DISABLED
01483         mark_gap (blob_box, 2,
01484           prev_gap, prev_blob_box.width (),
01485           current_gap, next_blob_box.width (), next_gap);
01486 #endif
01487       }
01488       else if ((next_blob_box.width () > 0) &&
01489         narrow_blob (row, next_blob_box) &&
01490         (next_gap > row->space_threshold) &&
01491       (current_gap <= tosp_gap_factor * next_gap)) {
01492         if ((tosp_all_flips_fuzzy) ||
01493         (current_gap > fuzzy_sp_to_kn_limit)) {
01494           if (tosp_flip_fuzz_sp_to_kn)
01495             fuzzy_non = TRUE;
01496           else
01497             fuzzy_sp = TRUE;
01498         }
01499         else
01500           space = FALSE;
01501 #ifndef GRAPHICS_DISABLED
01502         mark_gap (blob_box, 3,
01503           prev_gap, prev_blob_box.width (),
01504           current_gap, next_blob_box.width (), next_gap);
01505 #endif
01506       }
01507       else if ((next_blob_box.width () > 0) &&
01508         narrow_blob (row, next_blob_box) &&
01509         (next_gap <= row->space_threshold) &&
01510       (current_gap * tosp_gap_factor <= next_gap)) {
01511         if ((tosp_all_flips_fuzzy) ||
01512         (current_gap > fuzzy_sp_to_kn_limit)) {
01513           if (tosp_flip_fuzz_sp_to_kn)
01514             fuzzy_non = TRUE;
01515           else
01516             fuzzy_sp = TRUE;
01517         }
01518         else
01519           space = FALSE;
01520 #ifndef GRAPHICS_DISABLED
01521         mark_gap (blob_box, 4,
01522           prev_gap, prev_blob_box.width (),
01523           current_gap, next_blob_box.width (), next_gap);
01524 #endif
01525       }
01526       else if ((((next_blob_box.width () > 0) &&
01527         narrow_blob (row, next_blob_box)) ||
01528         ((prev_blob_box.width () > 0) &&
01529       narrow_blob (row, prev_blob_box)))) {
01530         fuzzy_sp = TRUE;
01531 #ifndef GRAPHICS_DISABLED
01532         mark_gap (blob_box, 6,
01533           prev_gap, prev_blob_box.width (),
01534           current_gap, next_blob_box.width (), next_gap);
01535 #endif
01536       }
01537     }
01538     else if ((current_gap > row->max_nonspace) &&
01539     (current_gap <= row->space_threshold)) {
01540 
01541       /* Heuristics to turn dubious kerns to spaces */
01542       /* TRIED THIS BUT IT MADE THINGS WORSE
01543           if ( prev_gap == MAX_INT16 )
01544             prev_gap = 0;                       //start of row
01545           if ( next_gap == MAX_INT16 )
01546             next_gap = 0;                       //end of row
01547       */
01548       if ((prev_blob_box.width () > 0) &&
01549         (next_blob_box.width () > 0) &&
01550         (current_gap >=
01551         tosp_kern_gap_factor1 * MAX (prev_gap, next_gap)) &&
01552         wide_blob (row, prev_blob_box) &&
01553       wide_blob (row, next_blob_box)) {
01554 
01555         space = TRUE;
01556         /*
01557         tosp_flip_caution is an attempt to stop the default changing in cases
01558         where there is a large difference between the kern and space estimates.
01559           See problem in 'chiefs' where "have" gets split in the quotation.
01560         */
01561         if ((tosp_flip_fuzz_kn_to_sp) &&
01562           ((tosp_flip_caution <= 0) ||
01563           (tosp_flip_caution * row->kern_size > row->space_size)))
01564           fuzzy_sp = TRUE;
01565         else
01566           fuzzy_non = TRUE;
01567 #ifndef GRAPHICS_DISABLED
01568         mark_gap (blob_box, 7,
01569           prev_gap, prev_blob_box.width (),
01570           current_gap, next_blob_box.width (), next_gap);
01571 #endif
01572       }
01573       else if ((prev_blob_box.width () > 0) &&
01574         (next_blob_box.width () > 0) &&
01575         (current_gap >=
01576         tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) &&
01577         !(narrow_blob (row, prev_blob_box) ||
01578         suspected_punct_blob (row, prev_blob_box)) &&
01579         !(narrow_blob (row, next_blob_box) ||
01580       suspected_punct_blob (row, next_blob_box))) {
01581         space = TRUE;
01582         fuzzy_non = TRUE;
01583 #ifndef GRAPHICS_DISABLED
01584         mark_gap (blob_box, 8,
01585           prev_gap, prev_blob_box.width (),
01586           current_gap, next_blob_box.width (), next_gap);
01587 #endif
01588       }
01589       else if ((tosp_kern_gap_factor3 > 0) &&
01590         (prev_blob_box.width () > 0) &&
01591         (next_blob_box.width () > 0) &&
01592         (current_gap >=
01593         tosp_kern_gap_factor3 * MAX (prev_gap, next_gap)) &&
01594         (!tosp_rule_9_test_punct ||
01595         (!suspected_punct_blob (row, prev_blob_box) &&
01596       !suspected_punct_blob (row, next_blob_box)))) {
01597         space = TRUE;
01598         fuzzy_non = TRUE;
01599 #ifndef GRAPHICS_DISABLED
01600         mark_gap (blob_box, 9,
01601           prev_gap, prev_blob_box.width (),
01602           current_gap, next_blob_box.width (), next_gap);
01603 #endif
01604       }
01605     }
01606     prev_gap_was_a_space = space && !(fuzzy_non);
01607     return space;
01608   }
01609 }

ROW* make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Find lines.

Parameters:
row Row to make
rotation For drawing
Returns:
ROW
Convert a TO_BLOCK to a BLOCK.

prev_ values are for start of word being built. non prev_ values are for the gap between the word being built and the next one.

Definition at line 983 of file tospace.cpp.

References TO_ROW::blob_list(), BOX::bottom(), ROW::bounding_box(), WERD::bounding_box(), FALSE, find_mean_blob_spacing(), TO_ROW::kern_size, BOX::left(), TO_ROW::line_c(), TO_ROW::line_m(), make_a_word_break(), MAX_INT16, MAX_INT32, NULL, peek_at_next_gap(), ROW::recalc_bounding_box(), TO_ROW::rep_words, BOX::right(), WERD::set_blanks(), WERD::set_flag(), TO_ROW::space_size, BOX::top(), tprintf(), TRUE, W_BOL, W_EOL, W_FUZZY_NON, W_FUZZY_SP, word_count, and ROW::word_list().

Referenced by make_real_words().

00986                       {
00987   BOOL8 bol;                     // start of line
00993   BOOL8 prev_fuzzy_sp;           // probably space
00994   BOOL8 prev_fuzzy_non;          // probably not
00995   UINT8 prev_blanks;             // in front of word
00996   BOOL8 fuzzy_sp;                // probably space
00997   BOOL8 fuzzy_non;               // probably not
00998   UINT8 blanks;                  // in front of word
00999   ROW *real_row;                 // output row
01000   OUTLINE_IT out_it;             // outlines
01001   C_OUTLINE_IT cout_it;
01002   PBLOB_LIST blobs;              // blobs in word
01003   C_BLOB_LIST cblobs;
01004   PBLOB_IT blob_it = &blobs;     // iterator
01005   C_BLOB_IT cblob_it = &cblobs;
01006   WERD_LIST words;
01007   WERD_IT word_it;               // new words
01008   WERD *word;                    // new word
01009   WERD_IT rep_char_it;           // repeated char words
01010   INT32 next_rep_char_word_right = MAX_INT32;
01011   float repetition_spacing;      // gap between repetitions
01012   INT32 xstarts[2];              // row ends
01013   double coeffs[3];              // quadratic
01014   INT32 prev_x;                  // end of prev blob
01015   BLOBNBOX *bblob;               // current blob
01016   BOX blob_box;                  // bounding box
01017   BLOBNBOX_IT box_it;            // iterator
01018   BOX prev_blob_box;
01019   BOX next_blob_box;
01020   INT16 prev_gap = MAX_INT16;
01021   INT16 current_gap = MAX_INT16;
01022   INT16 next_gap = MAX_INT16;
01023   INT16 prev_within_xht_gap = MAX_INT16;
01024   INT16 current_within_xht_gap = MAX_INT16;
01025   INT16 next_within_xht_gap = MAX_INT16;
01026   INT16 word_count = 0;
01027   static INT16 row_count = 0;
01028 
01029   row_count++;
01030   rep_char_it.set_to_list (&(row->rep_words));
01031   if (!rep_char_it.empty ()) {
01032     next_rep_char_word_right =
01033       rep_char_it.data ()->bounding_box ().right ();
01034   }
01035 
01036   prev_x = -MAX_INT16;
01037   blob_it.set_to_list (&blobs);
01038   cblob_it.set_to_list (&cblobs);
01039   box_it.set_to_list (row->blob_list ());
01040   word_it.set_to_list (&words);
01041   bol = TRUE;
01042   prev_blanks = 0;
01043   prev_fuzzy_sp = FALSE;
01044   prev_fuzzy_non = FALSE;
01045   if (!box_it.empty ()) {
01046     xstarts[0] = box_it.data ()->bounding_box ().left ();
01047     if (xstarts[0] > next_rep_char_word_right) {
01048       /* We need to insert a repeated char word at the start of the row */
01049       word = rep_char_it.extract ();
01050       word_it.add_after_then_move (word);
01051       /* Set spaces before repeated char word */
01052       word->set_flag (W_BOL, TRUE);
01053       bol = FALSE;
01054       word->set_blanks (0);
01055                                  //NO uncertainty
01056       word->set_flag (W_FUZZY_SP, FALSE);
01057       word->set_flag (W_FUZZY_NON, FALSE);
01058       xstarts[0] = word->bounding_box ().left ();
01059       /* Set spaces after repeated char word (and leave current word set) */
01060       repetition_spacing = find_mean_blob_spacing (word);
01061       current_gap = box_it.data ()->bounding_box ().left () -
01062         next_rep_char_word_right;
01063       current_within_xht_gap = current_gap;
01064       if (current_gap > tosp_rep_space * repetition_spacing) {
01065         prev_blanks = (UINT8) floor (current_gap / row->space_size);
01066         if (prev_blanks < 1)
01067           prev_blanks = 1;
01068       }
01069       else
01070         prev_blanks = 0;
01071       if (tosp_debug_level > 5)
01072         tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f;  Rgap:%d  ",
01073           box_it.data ()->bounding_box ().left (),
01074           box_it.data ()->bounding_box ().bottom (),
01075           repetition_spacing, current_gap);
01076       prev_fuzzy_sp = FALSE;
01077       prev_fuzzy_non = FALSE;
01078       if (rep_char_it.empty ()) {
01079         next_rep_char_word_right = MAX_INT32;
01080       }
01081       else {
01082         rep_char_it.forward ();
01083         next_rep_char_word_right =
01084           rep_char_it.data ()->bounding_box ().right ();
01085       }
01086     }
01087 
01088     peek_at_next_gap(row,
01089                      box_it,
01090                      next_blob_box,
01091                      next_gap,
01092                      next_within_xht_gap);
01093     do {
01094       bblob = box_it.data ();
01095       blob_box = bblob->bounding_box ();
01096       if (bblob->joined_to_prev ()) {
01097         if (bblob->blob () != NULL) {
01098           out_it.set_to_list (blob_it.data ()->out_list ());
01099           out_it.move_to_last ();
01100           out_it.add_list_after (bblob->blob ()->out_list ());
01101           delete bblob->blob ();
01102         }
01103         else if (bblob->cblob () != NULL) {
01104           cout_it.set_to_list (cblob_it.data ()->out_list ());
01105           cout_it.move_to_last ();
01106           cout_it.add_list_after (bblob->cblob ()->out_list ());
01107           delete bblob->cblob ();
01108         }
01109       }
01110       else {
01111         if (bblob->blob () != NULL)
01112           blob_it.add_after_then_move (bblob->blob ());
01113         else if (bblob->cblob () != NULL)
01114           cblob_it.add_after_then_move (bblob->cblob ());
01115         prev_x = blob_box.right ();
01116       }
01117       box_it.forward ();         //next one
01118       bblob = box_it.data ();
01119       blob_box = bblob->bounding_box ();
01120 
01121       if (!bblob->joined_to_prev () &&
01122       (bblob->blob () != NULL || bblob->cblob () != NULL)) {
01123         /* Real Blob - not multiple outlines or pre-chopped */
01124         prev_gap = current_gap;
01125         prev_within_xht_gap = current_within_xht_gap;
01126         prev_blob_box = next_blob_box;
01127         current_gap = next_gap;
01128         current_within_xht_gap = next_within_xht_gap;
01129         peek_at_next_gap(row,
01130                          box_it,
01131                          next_blob_box,
01132                          next_gap,
01133                          next_within_xht_gap);
01134 
01135         if ((blob_box.left () > next_rep_char_word_right) ||
01136           (!tosp_only_use_xht_gaps &&
01137           make_a_word_break (row, blob_box, prev_gap, prev_blob_box,
01138           current_gap, current_within_xht_gap,
01139           next_blob_box, next_gap,
01140           blanks, fuzzy_sp, fuzzy_non)) ||
01141           (tosp_only_use_xht_gaps &&
01142           make_a_word_break (row, blob_box, prev_within_xht_gap,
01143           prev_blob_box,
01144           current_gap, current_within_xht_gap,
01145           next_blob_box, next_within_xht_gap,
01146           blanks, fuzzy_sp, fuzzy_non)) ||
01147         box_it.at_first ()) {
01148           /* Form a new word out of the blobs collected */
01149           if (!blob_it.empty ()) {
01150             word = new WERD (&blobs, prev_blanks, NULL);
01151             //make real word
01152             word_count++;
01153           }
01154           else {
01155             word = new WERD (&cblobs, prev_blanks, NULL);
01156             word_count++;
01157           }
01158           word_it.add_after_then_move (word);
01159           if (bol) {
01160             word->set_flag (W_BOL, TRUE);
01161             bol = FALSE;
01162           }
01163           if (prev_fuzzy_sp)
01164                                  //probably space
01165             word->set_flag (W_FUZZY_SP, TRUE);
01166           else if (prev_fuzzy_non)
01167             word->set_flag (W_FUZZY_NON, TRUE);
01168           //probably not
01169 
01170           if (blob_box.left () > next_rep_char_word_right) {
01171             /* We need to insert a repeated char word */
01172             word = rep_char_it.extract ();
01173             word_it.add_after_then_move (word);
01174 
01175             /* Set spaces before repeated char word */
01176             repetition_spacing = find_mean_blob_spacing (word);
01177             current_gap = word->bounding_box ().left () - prev_x;
01178             current_within_xht_gap = current_gap;
01179             if (current_gap > tosp_rep_space * repetition_spacing) {
01180               blanks =
01181                 (UINT8) floor (current_gap / row->space_size);
01182               if (blanks < 1)
01183                 blanks = 1;
01184             }
01185             else
01186               blanks = 0;
01187             if (tosp_debug_level > 5)
01188               tprintf
01189                 ("Repch wd (%d,%d) rep gap %5.2f;  Lgap:%d (%d blanks);",
01190                 word->bounding_box ().left (),
01191                 word->bounding_box ().bottom (),
01192                 repetition_spacing, current_gap, blanks);
01193             word->set_blanks (blanks);
01194                                  //NO uncertainty
01195             word->set_flag (W_FUZZY_SP, FALSE);
01196             word->set_flag (W_FUZZY_NON, FALSE);
01197 
01198             /* Set spaces after repeated char word (and leave current word set) */
01199             current_gap =
01200               blob_box.left () - next_rep_char_word_right;
01201             if (current_gap > tosp_rep_space * repetition_spacing) {
01202               blanks = (UINT8) (current_gap / row->space_size);
01203               if (blanks < 1)
01204                 blanks = 1;
01205             }
01206             else
01207               blanks = 0;
01208             if (tosp_debug_level > 5)
01209               tprintf (" Rgap:%d (%d blanks)\n",
01210                 current_gap, blanks);
01211             fuzzy_sp = FALSE;
01212             fuzzy_non = FALSE;
01213 
01214             if (rep_char_it.empty ()) {
01215               next_rep_char_word_right = MAX_INT32;
01216             }
01217             else {
01218               rep_char_it.forward ();
01219               next_rep_char_word_right =
01220                 rep_char_it.data ()->bounding_box ().right ();
01221             }
01222           }
01223 
01224           if (box_it.at_first () && rep_char_it.empty ()) {
01225                                  //at end of line
01226             word->set_flag (W_EOL, TRUE);
01227             xstarts[1] = prev_x;
01228           }
01229           else {
01230             prev_blanks = blanks;
01231             prev_fuzzy_sp = fuzzy_sp;
01232             prev_fuzzy_non = fuzzy_non;
01233           }
01234         }
01235       }
01236     }
01237     while (!box_it.at_first ()); //until back at start
01238 
01239     /* Insert any further repeated char words */
01240     while (!rep_char_it.empty ()) {
01241       word = rep_char_it.extract ();
01242       word_it.add_after_then_move (word);
01243 
01244       /* Set spaces before repeated char word */
01245       repetition_spacing = find_mean_blob_spacing (word);
01246       current_gap = word->bounding_box ().left () - prev_x;
01247       if (current_gap > tosp_rep_space * repetition_spacing) {
01248         blanks = (UINT8) floor (current_gap / row->space_size);
01249         if (blanks < 1)
01250           blanks = 1;
01251       }
01252       else
01253         blanks = 0;
01254       if (tosp_debug_level > 5)
01255         tprintf
01256           ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
01257           word->bounding_box ().left (), word->bounding_box ().bottom (),
01258           repetition_spacing, current_gap, blanks);
01259       word->set_blanks (blanks);
01260                                  //NO uncertainty
01261       word->set_flag (W_FUZZY_SP, FALSE);
01262       word->set_flag (W_FUZZY_NON, FALSE);
01263       prev_x = word->bounding_box ().right ();
01264       if (rep_char_it.empty ()) {
01265                                  //at end of line
01266         word->set_flag (W_EOL, TRUE);
01267         xstarts[1] = prev_x;
01268       }
01269       else {
01270         rep_char_it.forward ();
01271       }
01272     }
01273     coeffs[0] = 0;
01274     coeffs[1] = row->line_m ();
01275     coeffs[2] = row->line_c ();
01276     real_row = new ROW (row,
01277       (INT16) row->kern_size, (INT16) row->space_size);
01278     word_it.set_to_list (real_row->word_list ());
01279                                  //put words in row
01280     word_it.add_list_after (&words);
01281     real_row->recalc_bounding_box ();
01282     if (tosp_debug_level > 9) {
01283       tprintf ("Row %d Made %d words in row ((%d,%d)(%d,%d))\n",
01284         row_count,
01285         word_count,
01286         real_row->bounding_box ().left (),
01287         real_row->bounding_box ().bottom (),
01288         real_row->bounding_box ().right (),
01289         real_row->bounding_box ().top ());
01290     }
01291     return real_row;
01292   }
01293   return NULL;
01294 }

void mark_gap ( BOX  blob,
INT16  rule,
INT16  prev_gap,
INT16  prev_blob_width,
INT16  current_gap,
INT16  next_blob_width,
INT16  next_gap 
)

Debug stuff using user interface.

Definition at line 1710 of file tospace.cpp.

References BLACK, BLUE, BOX::bottom(), CYAN, ellipse, f, FALSE, fill_color_index, GREEN, BOX::height(), INT_HOLLOW, INT_SOLID, interior_style, BOX::left(), MAGENTA, perimeter_color_index, RED, to_win, tprintf(), TRUE, WHITE, and YELLOW.

Referenced by make_a_word_break().

01717                               {
01718   COLOUR col;                    //of ellipse marking flipped gap
01719 
01720   switch (rule) {
01721     case 1:
01722       col = RED;
01723       break;
01724     case 2:
01725       col = CYAN;
01726       break;
01727     case 3:
01728       col = GREEN;
01729       break;
01730     case 4:
01731       col = BLACK;
01732       break;
01733     case 5:
01734       col = MAGENTA;
01735       break;
01736     case 6:
01737       col = BLUE;
01738       break;
01739 
01740     case 7:
01741       col = WHITE;
01742       break;
01743     case 8:
01744       col = YELLOW;
01745       break;
01746     case 9:
01747       col = BLACK;
01748       break;
01749 
01750     case 20:
01751       col = CYAN;
01752       break;
01753     case 21:
01754       col = GREEN;
01755       break;
01756     case 22:
01757       col = MAGENTA;
01758       break;
01759     default:
01760       col = BLACK;
01761   }
01762   if (textord_show_initial_words) {
01763     fill_color_index(to_win, col);
01764     perimeter_color_index(to_win, col);
01765     if (rule < 20)
01766       interior_style(to_win, INT_SOLID, FALSE);
01767     else
01768       interior_style(to_win, INT_HOLLOW, TRUE);
01769                                  //x radius
01770     ellipse (to_win, current_gap / 2.0f,
01771       blob.height () / 2.0f,     //y radius
01772                                  //x centre
01773       blob.left () - current_gap / 2.0f,
01774                                  //y centre
01775       blob.bottom () + blob.height () / 2.0f,
01776       0.0f);
01777   }
01778   if (tosp_debug_level > 0)
01779     tprintf ("  (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n",
01780       blob.left () - current_gap / 2, blob.bottom (), rule,
01781       prev_gap, prev_blob_width, current_gap,
01782       next_blob_width, next_gap);
01783 }

BOOL8 narrow_blob ( TO_ROW row,
BOX  blob_box 
)

Determine if blob is narrow.

Parameters:
row Row being made
blob_box For next_blob
Returns:
TRUE or FALSE

Definition at line 1619 of file tospace.cpp.

References BOX::height(), BOX::width(), and TO_ROW::xheight.

Referenced by block_spacing_stats(), isolated_row_stats(), make_a_word_break(), row_spacing_stats(), and wide_blob().

01619                                              { 
01620   BOOL8 result;
01621 
01622   result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) ||
01623     (((float) blob_box.width () / blob_box.height ()) <=
01624     tosp_narrow_aspect_ratio));
01625   return result;
01626 }

void old_to_method ( TO_ROW row,
STATS all_gap_stats,
STATS space_gap_stats,
STATS small_gap_stats,
INT16  block_space_gap_width,
INT16  block_non_space_gap_width 
)

Estimate space size for block in row

Definition at line 680 of file tospace.cpp.

References STATS::get_total(), TO_ROW::kern_size, STATS::mean(), STATS::median(), TO_ROW::space_size, and TO_ROW::space_threshold.

Referenced by row_spacing_stats().

00687                     {
00688   /* Old to condition was > 2 */
00689   if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) {
00690   //Adequate samples
00691   /* Set space size to median of spaces BUT limits it if it seems wildly out */
00692     row->space_size = space_gap_stats->median ();
00693     if (row->space_size > block_space_gap_width * 1.5) {
00694       if (tosp_old_to_bug_fix)
00695         row->space_size = block_space_gap_width * 1.5;
00696       else
00697                                  //BUG??? should be *1.5
00698         row->space_size = block_space_gap_width;
00699     }
00700     if (row->space_size < (block_non_space_gap_width * 2) + 1)
00701       row->space_size = (block_non_space_gap_width * 2) + 1;
00702   }
00703                                  //Only 1 or 2 samples
00704   else if (space_gap_stats->get_total () >= 1) {
00705                                  //hence mean not median
00706     row->space_size = space_gap_stats->mean ();
00707     if (row->space_size > block_space_gap_width * 1.5) {
00708       if (tosp_old_to_bug_fix)
00709         row->space_size = block_space_gap_width * 1.5;
00710       else
00711                                  //BUG??? should be *1.5
00712         row->space_size = block_space_gap_width;
00713     }
00714     if (row->space_size < (block_non_space_gap_width * 3) + 1)
00715       row->space_size = (block_non_space_gap_width * 3) + 1;
00716   }
00717   else
00718                                  //Use block default
00719     row->space_size = block_space_gap_width;
00720 
00721   if ((tosp_only_small_gaps_for_kern) &&
00722     (small_gap_stats->get_total () > tosp_redo_kern_limit))
00723     row->kern_size = small_gap_stats->median ();
00724   else if (all_gap_stats->get_total () > tosp_redo_kern_limit)
00725     row->kern_size = all_gap_stats->median ();
00726   else
00727                                  //old TO -SAME FOR ALL ROWS
00728     row->kern_size = block_non_space_gap_width;
00729 
00730   if (tosp_threshold_bias2 > 0)
00731     row->space_threshold =
00732       INT32 (floor (0.5 + row->kern_size +
00733       tosp_threshold_bias2 * (row->space_size -
00734       row->kern_size)));
00735   else
00736 /*
00737 NOTE old text ord uses (space_size + kern_size + 1)/2  as the threshold
00738 and holds this in a float. The use is with a >= test
00739 NEW textord uses an integer threshold and a > test
00740 
00741 It comes to the same thing.
00742 
00743 (Though there is a difference in that old textor has integer space_size
00744 and kern_size.)
00745 */
00746     row->space_threshold =
00747       INT32 (floor ((row->space_size + row->kern_size) / 2));
00748 }

void peek_at_next_gap ( TO_ROW row,
BLOBNBOX_IT  box_it,
BOX next_blob_box,
INT16 next_gap,
INT16 next_within_xht_gap 
)

Get a copy of next gap for peeking.

Definition at line 1680 of file tospace.cpp.

References box_next(), BOX::left(), MAX_INT16, reduced_box_next(), and BOX::right().

Referenced by make_prop_words().

01685                                                   {
01686   BOX next_reduced_blob_box;
01687   BOX bit_beyond;
01688   BLOBNBOX_IT reduced_box_it = box_it;
01689 
01690   next_blob_box = box_next (&box_it);
01691   next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
01692   if (box_it.at_first ()) {
01693     next_gap = MAX_INT16;
01694     next_within_xht_gap = MAX_INT16;
01695   }
01696   else {
01697     bit_beyond = box_it.data ()->bounding_box ();
01698     next_gap = bit_beyond.left () - next_blob_box.right ();
01699     bit_beyond = reduced_box_next (row, &reduced_box_it);
01700     next_within_xht_gap =
01701       bit_beyond.left () - next_reduced_blob_box.right ();
01702   }
01703 }

BOX reduced_box_for_blob ( BLOBNBOX blob,
TO_ROW row,
INT16 left_above_xht 
)

Find bounding box for blob.

Find box for blob which is the same height and y position as the whole blob, but whose left limit is the left most position of the blob ABOVE the baseline and whose right limit is the right most position of the blob BELOW the xheight.

WONT WORK WITH LARGE UPPER CASE CHARS - T F V W: Look at examples on "home". ???

Perhaps we need something which say if the width ABOVE the xht alone includes the whole of the reduced width, then use the full blob box - Might still fail on italic F

Alternatively we could be a little less severe and only reduce the left and right edges by half the difference between the full box and the reduced box.

NOTE that we need to rotate all the coordinates as find_blob_limits finds the y min and max within a specified x band

Definition at line 1958 of file tospace.cpp.

References TO_ROW::baseline, baseline, BOX::bottom(), find_blob_limits(), find_cblob_hlimits(), BOX::left(), MAX_INT16, MAX_INT32, NULL, BOX::right(), BOX::top(), TO_ROW::xheight, and QSPLINE::y().

Referenced by reduced_box_next().

01961                           { 
01962   float baseline;
01963   float blob_x_centre;
01964   float left_limit;
01965   float right_limit;
01966   float junk;
01967   BOX blob_box;
01968 
01969   /* Find baseline of centre of blob */
01970 
01971   blob_box = blob->bounding_box ();
01972   blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
01973   baseline = row->baseline.y (blob_x_centre);
01974 
01975   /*
01976   Find LH limit of blob ABOVE the xht. This is so that we can detect certain
01977   caps ht chars which should NOT have their box reduced: T, Y, V, W etc
01978   */
01979   left_limit = (float) MAX_INT32;
01980   junk = (float) -MAX_INT32;
01981   if (blob->blob () != NULL)
01982                                  //blob to test
01983     find_blob_limits (blob->blob (),
01984       (float) -MAX_INT16,        //rotated lower limit
01985       -(baseline + 1.1 * row->xheight),
01986     //rotated upper limit
01987       FCOORD (0.0, 1.0),         //90deg anticlock rot
01988       left_limit, junk);         //min y max_y
01989   else
01990                                  //blob to test
01991     find_cblob_hlimits (blob->cblob (),
01992                                  //rotated lower limit
01993       (baseline + 1.1 * row->xheight), (float) MAX_INT16,
01994     //rotated upper limit
01995     // FCOORD( 0.0, 1.0 ),       //90deg anticlock rot
01996       left_limit, junk);         //min y max_y
01997   if (left_limit > junk)
01998     *left_above_xht = MAX_INT16; //No area above xht
01999   else
02000     *left_above_xht = (INT16) floor (left_limit);
02001   /*
02002   Find reduced LH limit of blob - the left extent of the region ABOVE the
02003   baseline.
02004   */
02005   left_limit = (float) MAX_INT32;
02006   junk = (float) -MAX_INT32;
02007   if (blob->blob () != NULL)
02008                                  //blob to test
02009     find_blob_limits (blob->blob (),
02010       (float) -MAX_INT16,        //rotated lower limit
02011       -baseline,                 //rotated upper limit
02012       FCOORD (0.0, 1.0),         //90deg anticlock rot
02013       left_limit, junk);         //min y max_y
02014   else
02015                                  //blob to test
02016     find_cblob_hlimits (blob->cblob (),
02017       baseline,                  //rotated upper limit
02018       (float) MAX_INT16,         //rotated lower limit
02019     // FCOORD( 0.0, 1.0 ),       //90deg anticlock rot
02020       left_limit, junk);         //min y max_y
02021 
02022   if (left_limit > junk)
02023     return BOX ();               //no area within xht so return empty box
02024   /*
02025   Find reduced RH limit of blob - the right extent of the region BELOW the xht.
02026   */
02027   junk = (float) MAX_INT32;
02028   right_limit = (float) -MAX_INT32;
02029   if (blob->blob () != NULL)
02030                                  //blob to test
02031     find_blob_limits (blob->blob (),
02032       -(baseline + row->xheight),
02033     //rotated lower limit
02034       (float) MAX_INT16,         //rotated upper limit
02035       FCOORD (0.0, 1.0),         //90deg anticlock rot
02036       junk, right_limit);        //min y max_y
02037   else
02038                                  //blob to test
02039     find_cblob_hlimits (blob->cblob (),
02040       (float) -MAX_INT16,        //rotated upper limit
02041       (baseline + row->xheight),
02042     //rotated lower limit
02043     //  FCOORD( 0.0, 1.0 ),      //90deg anticlock rot
02044       junk, right_limit);        //min y max_y
02045   if (junk > right_limit)
02046     return BOX ();               //no area within xht so return empty box
02047 
02048   return BOX (ICOORD ((INT16) floor (left_limit), blob_box.bottom ()),
02049     ICOORD ((INT16) ceil (right_limit), blob_box.top ()));
02050 }

BOX reduced_box_next ( TO_ROW row,
BLOBNBOX_IT *  it 
)

Get bounding box.

Compute the bounding box of this blob with merging of x overlaps but no pre-chopping.

Then move the iterator on to the start of the next blob. DONT reduce the box for small things - eg punctuation.

Definition at line 1878 of file tospace.cpp.

References BOX::height(), INT_HOLLOW, BOX::left(), MIN, NULL, BOX::plot(), reduced_box_for_blob(), to_win, TRUE, BOX::width(), TO_ROW::xheight, and YELLOW.

Referenced by block_spacing_stats(), isolated_row_stats(), peek_at_next_gap(), and row_spacing_stats().

01881                       {
01882   BLOBNBOX *blob;                //current blob
01883   BLOBNBOX *head_blob;           //place to store box
01884   BOX full_box;                  //full blob boundg box
01885   BOX reduced_box;               //box of significant part
01886   INT16 left_above_xht;          //ABOVE xht left limit
01887   INT16 new_left_above_xht;      //ABOVE xht left limit
01888 
01889   blob = it->data ();
01890   if (blob->red_box_set ()) {
01891     reduced_box = blob->reduced_box ();
01892     do {
01893       it->forward ();
01894       blob = it->data ();
01895     }
01896                                  //until next real blob
01897     while (blob->blob () == NULL
01898       && blob->cblob () == NULL || blob->joined_to_prev ());
01899     return reduced_box;
01900   }
01901   head_blob = blob;
01902   full_box = blob->bounding_box ();
01903   reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
01904   do {
01905     it->forward ();
01906     blob = it->data ();
01907     if (blob->blob () == NULL && blob->cblob () == NULL)
01908                                  //was pre-chopped
01909       full_box += blob->bounding_box ();
01910     else if (blob->joined_to_prev ()) {
01911       reduced_box +=
01912         reduced_box_for_blob(blob, row, &new_left_above_xht);
01913       left_above_xht = MIN (left_above_xht, new_left_above_xht);
01914     }
01915   }
01916                                  //until next real blob
01917   while (blob->blob () == NULL
01918     && blob->cblob () == NULL || blob->joined_to_prev ());
01919 
01920   if ((reduced_box.width () > 0) &&
01921     ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ())
01922   < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) {
01923 #ifndef GRAPHICS_DISABLED
01924     if (textord_show_initial_words)
01925       reduced_box.plot (to_win, INT_HOLLOW, TRUE, YELLOW, YELLOW);
01926 #endif
01927   }
01928   else
01929     reduced_box = full_box;
01930   head_blob->set_reduced_box (reduced_box);
01931   return reduced_box;
01932 }//reduced_box_next

void row_spacing_stats ( TO_ROW row,
GAPMAP gapmap,
INT16  block_idx,
INT16  row_idx,
INT16  block_space_gap_width,
INT16  block_non_space_gap_width 
)

Estimate spacing for block from row stats.

Returns:
0 values if failed
Set values for min_space, max_non_space based on row stats only

Definition at line 381 of file tospace.cpp.

References STATS::add(), TO_ROW::blob_list(), box_next(), box_next_pre_chopped(), STATS::get_total(), ignore_big_gap(), improve_row_threshold(), isolated_row_stats(), BOX::left(), MAX, max, MAXSPACING, STATS::median(), MIN, narrow_blob(), old_to_method(), STATS::pile_count(), reduced_box_next(), BOX::right(), tprintf(), wide_blob(), and TO_ROW::xheight.

Referenced by to_spacing().

00388                         {
00389                                  //iterator
00390   BLOBNBOX_IT blob_it = row->blob_list ();
00391   STATS all_gap_stats (0, MAXSPACING);
00392   STATS cert_space_gap_stats (0, MAXSPACING);
00393   STATS all_space_gap_stats (0, MAXSPACING);
00394   STATS small_gap_stats (0, MAXSPACING);
00395   BOX blob_box;
00396   BOX prev_blob_box;
00397   INT16 gap_width;
00398   INT16 real_space_threshold = 0;
00399   INT16 max = 0;
00400   INT16 index;
00401   INT16 large_gap_count = 0;
00402   BOOL8 suspected_table;
00403   INT32 max_max_nonspace;        //upper bound
00404   BOOL8 good_block_space_estimate = block_space_gap_width > 0;
00405   INT32 end_of_row;
00406   INT32 row_length = 0;
00407   float sane_space;
00408   INT32 sane_threshold;
00409 
00410   /* Collect first pass stats for row */
00411 
00412   if (!good_block_space_estimate)
00413     block_space_gap_width = INT16 (floor (row->xheight / 2));
00414   if (!row->blob_list ()->empty ()) {
00415     if (tosp_threshold_bias1 > 0)
00416       real_space_threshold =
00417         block_non_space_gap_width +
00418         INT16 (floor (0.5 +
00419         tosp_threshold_bias1 * (block_space_gap_width -
00420         block_non_space_gap_width)));
00421     else
00422       real_space_threshold =     //Old TO method
00423         (block_space_gap_width + block_non_space_gap_width) / 2;
00424     blob_it.set_to_list (row->blob_list ());
00425     blob_it.mark_cycle_pt ();
00426     end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
00427     if (tosp_use_pre_chopping)
00428       blob_box = box_next_pre_chopped (&blob_it);
00429     else if (tosp_stats_use_xht_gaps)
00430       blob_box = reduced_box_next (row, &blob_it);
00431     else
00432       blob_box = box_next (&blob_it);
00433     row_length = end_of_row - blob_box.left ();
00434     prev_blob_box = blob_box;
00435     while (!blob_it.cycled_list ()) {
00436       if (tosp_use_pre_chopping)
00437         blob_box = box_next_pre_chopped (&blob_it);
00438       else if (tosp_stats_use_xht_gaps)
00439         blob_box = reduced_box_next (row, &blob_it);
00440       else
00441         blob_box = box_next (&blob_it);
00442       gap_width = blob_box.left () - prev_blob_box.right ();
00443       if (ignore_big_gap (row, row_length, gapmap,
00444         prev_blob_box.right (), blob_box.left ()))
00445         large_gap_count++;
00446       else {
00447         if (gap_width >= real_space_threshold) {
00448           if (!tosp_row_use_cert_spaces ||
00449             (gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
00450             ((gap_width > tosp_fuzzy_space_factor1 * row->xheight)
00451             && (!tosp_narrow_blobs_not_cert
00452             || (!narrow_blob (row, prev_blob_box)
00453             && !narrow_blob (row, blob_box))))
00454             || (wide_blob (row, prev_blob_box)
00455             && wide_blob (row, blob_box)))
00456             cert_space_gap_stats.add (gap_width, 1);
00457           all_space_gap_stats.add (gap_width, 1);
00458         }
00459         else
00460           small_gap_stats.add (gap_width, 1);
00461         all_gap_stats.add (gap_width, 1);
00462       }
00463       prev_blob_box = blob_box;
00464     }
00465   }
00466   suspected_table = (large_gap_count > 1) ||
00467     ((large_gap_count > 0) &&
00468     (all_gap_stats.get_total () <= tosp_few_samples));
00469 
00470   /* Determine row kern size, space size and threshold */
00471 
00472   if ((cert_space_gap_stats.get_total () >=
00473     tosp_enough_space_samples_for_median) ||
00474     ((suspected_table ||
00475     all_gap_stats.get_total () <= tosp_short_row) &&
00476     cert_space_gap_stats.get_total () > 0))
00477     old_to_method(row,
00478                   &all_gap_stats,
00479                   &cert_space_gap_stats,
00480                   &small_gap_stats,
00481                   block_space_gap_width,
00482                   block_non_space_gap_width);
00483   else {
00484     if (!tosp_recovery_isolated_row_stats ||
00485       !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
00486     block_idx, row_idx)) {
00487       if (tosp_row_use_cert_spaces && (tosp_debug_level > 5))
00488         tprintf ("B:%d R:%d -- Inadequate certain spaces.\n",
00489           block_idx, row_idx);
00490       if (tosp_row_use_cert_spaces1 && good_block_space_estimate) {
00491                                  //Use block default
00492         row->space_size = block_space_gap_width;
00493         if (all_gap_stats.get_total () > tosp_redo_kern_limit)
00494           row->kern_size = all_gap_stats.median ();
00495         else
00496           row->kern_size = block_non_space_gap_width;
00497         row->space_threshold =
00498           INT32 (floor ((row->space_size + row->kern_size) / 2));
00499       }
00500       else
00501         old_to_method(row,
00502                       &all_gap_stats,
00503                       &all_space_gap_stats,
00504                       &small_gap_stats,
00505                       block_space_gap_width,
00506                       block_non_space_gap_width);
00507     }
00508   }
00509 
00510   if (tosp_improve_thresh && !suspected_table)
00511     improve_row_threshold(row, &all_gap_stats);
00512 
00513   /* Lets try to be careful not to do anything silly with tables when we
00514   are ignoring big gaps */
00515   if (tosp_sanity_method == 0) {
00516     if (suspected_table &&
00517     (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
00518       if (tosp_debug_level > 0)
00519         tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n",
00520           block_idx, row_idx,
00521           row->kern_size, row->space_threshold, row->space_size);
00522       row->space_threshold =
00523         (INT32) (tosp_table_kn_sp_ratio * row->kern_size);
00524       row->space_size = MAX (row->space_threshold + 1, row->xheight);
00525     }
00526   }
00527   else if (tosp_sanity_method == 1) {
00528     sane_space = row->space_size;
00529     /* NEVER let space size get too close to kern size */
00530     if ((row->space_size < tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5))
00531       || ((row->space_size - row->kern_size) <
00532     (tosp_silly_kn_sp_gap * row->xheight))) {
00533       if (good_block_space_estimate &&
00534         (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size))
00535         sane_space = block_space_gap_width;
00536       else
00537         sane_space =
00538           MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5),
00539           row->xheight / 2);
00540       if (tosp_debug_level > 0)
00541         tprintf
00542           ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
00543           block_idx, row_idx, row->kern_size, row->space_threshold,
00544           row->space_size, sane_space);
00545       row->space_size = sane_space;
00546       row->space_threshold =
00547         INT32 (floor ((row->space_size + row->kern_size) / 2));
00548     }
00549     /* NEVER let threshold get VERY far away from kern */
00550     sane_threshold = INT32 (floor (tosp_max_sane_kn_thresh *
00551       MAX (row->kern_size, 2.5)));
00552     if (row->space_threshold > sane_threshold) {
00553       if (tosp_debug_level > 0)
00554         tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
00555           block_idx, row_idx,
00556           row->kern_size,
00557           row->space_threshold, row->space_size, sane_threshold);
00558       row->space_threshold = sane_threshold;
00559       if (row->space_size <= sane_threshold)
00560         row->space_size = row->space_threshold + 1.0f;
00561     }
00562     /* Beware of tables - there may be NO spaces */
00563     if (suspected_table) {
00564       sane_space = MAX (tosp_table_kn_sp_ratio * row->kern_size,
00565         tosp_table_xht_sp_ratio * row->xheight);
00566       sane_threshold = INT32 (floor ((sane_space + row->kern_size) / 2));
00567 
00568       if ((row->space_size < sane_space) ||
00569       (row->space_threshold < sane_threshold)) {
00570         if (tosp_debug_level > 0)
00571           tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
00572             block_idx, row_idx,
00573             row->kern_size,
00574             row->space_threshold, row->space_size);
00575                                  //the minimum sane value
00576         row->space_threshold = (INT32) sane_space;
00577         row->space_size = MAX (row->space_threshold + 1, row->xheight);
00578       }
00579     }
00580   }
00581 
00582   /* Lets try to put some error limits on the threshold */
00583 
00584   if (tosp_old_to_method) {
00585     /* Old textord made a space if gap >= threshold */
00586                                  //NO FUZZY SPACES YET
00587     row->max_nonspace = row->space_threshold;
00588                                  //NO FUZZY SPACES       YET
00589     row->min_space = row->space_threshold + 1;
00590   }
00591   else {
00592     /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it :-) */
00593     row->min_space =
00594       MIN (INT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),
00595       INT32 (row->space_size));
00596     if (row->min_space <= row->space_threshold)
00597                                  //Dont be silly
00598       row->min_space = row->space_threshold + 1;
00599 /*
00600 Kern Gap
00601 
00602 Lets try to guess the max certain kern gap by looking at the cluster of
00603 kerns for the row. The row is proportional so the kerns should cluster
00604 tightly at the bottom of the distribution. We also expect most gaps to be
00605 kerns. Find the maximum of the kern piles between 0 and twice the kern
00606 estimate. Piles before the first one with less than 1/10 the maximum
00607 number of samples can be taken as certain kerns.
00608 
00609 Of course, there are some cases where the kern peak and space peaks merge,
00610 so we will put an UPPER limit on the max certain kern gap of some fraction
00611 below the threshold.
00612 */
00613 
00614     max_max_nonspace = INT32 ((row->space_threshold + row->kern_size) / 2);
00615 
00616                                  //default
00617     row->max_nonspace = max_max_nonspace;
00618     for (index = 0; index <= max_max_nonspace; index++) {
00619       if (all_gap_stats.pile_count (index) > max)
00620         max = all_gap_stats.pile_count (index);
00621       if ((index > row->kern_size) &&
00622       (all_gap_stats.pile_count (index) < 0.1 * max)) {
00623         row->max_nonspace = index;
00624         break;
00625       }
00626     }
00627   }
00628 
00629   /* Yet another algorithm - simpler this time - just choose a fraction of the
00630   threshold to space range */
00631 
00632   if ((tosp_fuzzy_sp_fraction > 0) &&
00633     (row->space_size > row->space_threshold))
00634     row->min_space = MAX (row->min_space,
00635       (INT32) ceil (row->space_threshold +
00636       tosp_fuzzy_sp_fraction *
00637       (row->space_size -
00638       row->space_threshold)));
00639 
00640   /*
00641   Kern Table
00642 
00643   Ensure that ANY space less than some multiplier times the kern size is
00644   fuzzy.  In tables there is a risk of erroneously setting a small space size
00645   when there are no real spaces. Sometimes tables have text squashed into
00646   columns so that the kn->sp ratio is small anyway - this means that we cant
00647   use this to force a wider separation - hence we rely on context to join any
00648   dubious breaks.
00649   */
00650 
00651   if ((tosp_table_fuzzy_kn_sp_ratio > 0) &&
00652     (suspected_table || tosp_fuzzy_limit_all))
00653     row->min_space = MAX (row->min_space,
00654       (INT32) ceil (tosp_table_fuzzy_kn_sp_ratio *
00655       row->kern_size));
00656 
00657   if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold))
00658     row->max_nonspace = (INT32) floor (0.5 + row->kern_size +
00659       tosp_fuzzy_kn_fraction *
00660       (row->space_threshold -
00661       row->kern_size));
00662 
00663   if (row->max_nonspace > row->space_threshold)
00664                                  //Dont be silly
00665     row->max_nonspace = row->space_threshold;
00666 
00667   if (tosp_debug_level > 5)
00668     tprintf
00669       ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
00670       block_idx, row_idx, row_length, block_non_space_gap_width,
00671       block_space_gap_width, real_space_threshold, row->kern_size,
00672       row->max_nonspace, row->space_threshold, row->min_space,
00673       row->space_size);
00674 }

INT16 stats_count_under ( STATS stats,
INT16  threshold 
)

Counts number of stats under threshold

Definition at line 873 of file tospace.cpp.

References STATS::pile_count().

Referenced by improve_row_threshold(), and isolated_row_stats().

00873                                                        { 
00874   INT16 index;
00875   INT16 total = 0;
00876 
00877   for (index = 0; index < threshold; index++)
00878     total += stats->pile_count (index);
00879   return total;
00880 }

BOOL8 suspected_punct_blob ( TO_ROW row,
BOX  box 
)

Determine if blob might contain punctuation.

Parameters:
row Row being made
box 
Returns:
TRUE or FALSE

Definition at line 1660 of file tospace.cpp.

References TO_ROW::baseline, baseline, BOX::bottom(), BOX::height(), BOX::left(), BOX::right(), BOX::top(), TO_ROW::xheight, and QSPLINE::y().

Referenced by make_a_word_break().

01660                                                  { 
01661   BOOL8 result;
01662   float baseline;
01663   float blob_x_centre;
01664 
01665   /* Find baseline of centre of blob */
01666 
01667   blob_x_centre = (box.right () + box.left ()) / 2.0;
01668   baseline = row->baseline.y (blob_x_centre);
01669 
01670   result = (box.height () <= 0.66 * row->xheight) ||
01671     (box.top () < baseline + row->xheight / 2.0) ||
01672     (box.bottom () > baseline + row->xheight / 2.0);
01673   return result;
01674 }

void to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Set spacing by computing fuzzy word spacing thresholds for each row.

Parameters:
page_tr Topright of page
blocks Blocks on page
Returns:
none
Sets: max_nonspace, space_threshold, min_space, kern_size, & space_size for each row.

Note:
ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE

Definition at line 138 of file tospace.cpp.

References block_spacing_stats(), TO_ROW::fixed_pitch, NULL, PITCH_CORR_PROP, TO_ROW::pitch_decision, PITCH_DEF_PROP, plot_word_decisions(), row_spacing_stats(), to_win, and tprintf().

Referenced by make_words().

00141                  {
00142   TO_BLOCK_IT block_it;          //iterator
00143   TO_BLOCK *block;               //current block;
00144   TO_ROW_IT row_it;              //row iterator
00145   TO_ROW *row;                   //current row
00146   int block_index;               //block number
00147   int row_index;                 //row number
00148   INT16 block_space_gap_width;   //Est width of real spaces for whole block
00149                                  //Est width ofnon space gaps for whole block
00150   INT16 block_non_space_gap_width;
00151                                  //Old fixed/prop result
00152   BOOL8 old_text_ord_proportional;
00153   GAPMAP *gapmap = NULL;         //map of big vert gaps in blk
00154 
00155   block_it.set_to_list (blocks);
00156   block_index = 1;
00157   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00158   block_it.forward ()) {
00159     block = block_it.data ();
00160     gapmap = new GAPMAP (block);
00161     block_spacing_stats(block,
00162                         gapmap,
00163                         old_text_ord_proportional,
00164                         block_space_gap_width,
00165                         block_non_space_gap_width);
00166     row_it.set_to_list (block->get_rows ());
00167     row_index = 1;
00168     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00169       row = row_it.data ();
00170       if ((row->pitch_decision == PITCH_DEF_PROP) ||
00171       (row->pitch_decision == PITCH_CORR_PROP)) {
00172         if ((tosp_debug_level > 0) && !old_text_ord_proportional)
00173           tprintf ("Block %d Row %d: Now Proportional\n",
00174             block_index, row_index);
00175         row_spacing_stats(row,
00176                           gapmap,
00177                           block_index,
00178                           row_index,
00179                           block_space_gap_width,
00180                           block_non_space_gap_width);
00181       }
00182       else {
00183         if ((tosp_debug_level > 0) && old_text_ord_proportional)
00184           tprintf
00185             ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
00186             block_index, row_index, row->pitch_decision,
00187             row->fixed_pitch);
00188       }
00189 #ifndef GRAPHICS_DISABLED
00190       if (textord_show_initial_words)
00191         plot_word_decisions (to_win, (INT16) row->fixed_pitch, row);
00192 #endif
00193       row_index++;
00194     }
00195     delete gapmap;
00196     block_index++;
00197   }
00198 }

BOOL8 wide_blob ( TO_ROW row,
BOX  blob_box 
)

Determine if blob is wide.

Parameters:
row Row being made
blob_box For next_blob
Returns:
TRUE or FALSE

Definition at line 1636 of file tospace.cpp.

References BOX::height(), narrow_blob(), BOX::width(), and TO_ROW::xheight.

Referenced by block_spacing_stats(), isolated_row_stats(), make_a_word_break(), and row_spacing_stats().

01636                                            { 
01637   BOOL8 result;
01638 
01639   if (tosp_wide_fraction > 0) {
01640     if (tosp_wide_aspect_ratio > 0)
01641       result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) &&
01642         (((float) blob_box.width () / blob_box.height ()) >
01643         tosp_wide_aspect_ratio));
01644     else
01645       result = (blob_box.width () >= tosp_wide_fraction * row->xheight);
01646   }
01647   else
01648     result = !narrow_blob (row, blob_box);
01649   return result;
01650 }


Generated on Wed Feb 28 19:49:26 2007 for Tesseract by  doxygen 1.5.1