#include "blobbox.h"
#include "gap_map.h"
#include "statistc.h"
#include "notdll.h"
Go to the source code of this file.
void block_spacing_stats | ( | TO_BLOCK * | block, | |
GAPMAP * | gapmap, | |||
BOOL8 & | old_text_ord_proportional, | |||
INT16 & | block_space_gap_width, | |||
INT16 & | block_non_space_gap_width | |||
) |
DEBUG USE ONLY.
Definition at line 206 of file tospace.cpp.
References STATS::add(), TO_ROW::blob_list(), box_next(), box_next_pre_chopped(), STATS::get_total(), ignore_big_gap(), STATS::ile(), BOX::left(), MAX, MAX_INT16, MAXSPACING, STATS::median(), narrow_blob(), PITCH_CORR_PROP, TO_ROW::pitch_decision, PITCH_DEF_PROP, reduced_box_next(), BOX::right(), TRUE, wide_blob(), BOX::width(), and TO_ROW::xheight.
Referenced by to_spacing().
00212 { 00213 TO_ROW_IT row_it; //row iterator 00214 TO_ROW *row; //current row 00215 BLOBNBOX_IT blob_it; //iterator 00216 00217 STATS centre_to_centre_stats (0, MAXSPACING); 00218 //DEBUG USE ONLY 00219 STATS all_gap_stats (0, MAXSPACING); 00220 STATS space_gap_stats (0, MAXSPACING); 00221 INT16 minwidth = MAX_INT16; //narrowest blob 00222 BOX blob_box; 00223 BOX prev_blob_box; 00224 INT16 centre_to_centre; 00225 INT16 gap_width; 00226 float real_space_threshold; 00227 float iqr_centre_to_centre; //DEBUG USE ONLY 00228 float iqr_all_gap_stats; //DEBUG USE ONLY 00229 INT32 end_of_row; 00230 INT32 row_length; 00231 00232 row_it.set_to_list (block->get_rows ()); 00233 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00234 row = row_it.data (); 00235 if (!row->blob_list ()->empty () && 00236 (!tosp_only_use_prop_rows || 00237 (row->pitch_decision == PITCH_DEF_PROP) || 00238 (row->pitch_decision == PITCH_CORR_PROP))) { 00239 blob_it.set_to_list (row->blob_list ()); 00240 blob_it.mark_cycle_pt (); 00241 end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); 00242 if (tosp_use_pre_chopping) 00243 blob_box = box_next_pre_chopped (&blob_it); 00244 else if (tosp_stats_use_xht_gaps) 00245 blob_box = reduced_box_next (row, &blob_it); 00246 else 00247 blob_box = box_next (&blob_it); 00248 row_length = end_of_row - blob_box.left (); 00249 if (blob_box.width () < minwidth) 00250 minwidth = blob_box.width (); 00251 prev_blob_box = blob_box; 00252 while (!blob_it.cycled_list ()) { 00253 if (tosp_use_pre_chopping) 00254 blob_box = box_next_pre_chopped (&blob_it); 00255 else if (tosp_stats_use_xht_gaps) 00256 blob_box = reduced_box_next (row, &blob_it); 00257 else 00258 blob_box = box_next (&blob_it); 00259 if (blob_box.width () < minwidth) 00260 minwidth = blob_box.width (); 00261 gap_width = blob_box.left () - prev_blob_box.right (); 00262 if (!ignore_big_gap (row, row_length, gapmap, 00263 prev_blob_box.right (), blob_box.left ())) { 00264 all_gap_stats.add (gap_width, 1); 00265 00266 centre_to_centre = (blob_box.left () + blob_box.right () - 00267 (prev_blob_box.left () + 00268 prev_blob_box.right ())) / 2; 00269 //DEBUG 00270 centre_to_centre_stats.add (centre_to_centre, 1); 00271 // DEBUG 00272 } 00273 prev_blob_box = blob_box; 00274 } 00275 } 00276 } 00277 00278 //Inadequate samples 00279 if (all_gap_stats.get_total () <= 1) { 00280 block_non_space_gap_width = minwidth; 00281 block_space_gap_width = -1; //No est. space width 00282 //DEBUG 00283 old_text_ord_proportional = TRUE; 00284 } 00285 else { 00286 /* For debug only ..... */ 00287 iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) - 00288 centre_to_centre_stats.ile (0.25); 00289 iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25); 00290 old_text_ord_proportional = 00291 iqr_centre_to_centre * 2 > iqr_all_gap_stats; 00292 /* .......For debug only */ 00293 00294 /* 00295 The median of the gaps is used as an estimate of the NON-SPACE gap width. 00296 This RELIES on the assumption that there are more gaps WITHIN words than 00297 BETWEEN words in a block 00298 00299 Now try to estimate the width of a real space for all real spaces in the 00300 block. Do this by using a crude threshold to ignore "narrow" gaps, then 00301 find the median of the "wide" gaps and use this. 00302 */ 00303 block_non_space_gap_width = (INT16) floor (all_gap_stats.median ()); 00304 // median gap 00305 00306 row_it.set_to_list (block->get_rows ()); 00307 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00308 row = row_it.data (); 00309 if (!row->blob_list ()->empty () && 00310 (!tosp_only_use_prop_rows || 00311 (row->pitch_decision == PITCH_DEF_PROP) || 00312 (row->pitch_decision == PITCH_CORR_PROP))) { 00313 real_space_threshold = 00314 MAX (tosp_init_guess_kn_mult * block_non_space_gap_width, 00315 tosp_init_guess_xht_mult * row->xheight); 00316 blob_it.set_to_list (row->blob_list ()); 00317 blob_it.mark_cycle_pt (); 00318 end_of_row = 00319 blob_it.data_relative (-1)->bounding_box ().right (); 00320 if (tosp_use_pre_chopping) 00321 blob_box = box_next_pre_chopped (&blob_it); 00322 else if (tosp_stats_use_xht_gaps) 00323 blob_box = reduced_box_next (row, &blob_it); 00324 else 00325 blob_box = box_next (&blob_it); 00326 row_length = blob_box.left () - end_of_row; 00327 prev_blob_box = blob_box; 00328 while (!blob_it.cycled_list ()) { 00329 if (tosp_use_pre_chopping) 00330 blob_box = box_next_pre_chopped (&blob_it); 00331 else if (tosp_stats_use_xht_gaps) 00332 blob_box = reduced_box_next (row, &blob_it); 00333 else 00334 blob_box = box_next (&blob_it); 00335 gap_width = blob_box.left () - prev_blob_box.right (); 00336 if ((gap_width > real_space_threshold) && 00337 !ignore_big_gap (row, row_length, gapmap, 00338 prev_blob_box.right (), 00339 blob_box.left ())) { 00340 /* 00341 If tosp_use_cert_spaces is enabled, the estimate of the space gap is 00342 restricted to obvious spaces - those wider than half the xht or those 00343 with wide blobs on both sides - i.e not things that are suspect 1's or 00344 punctiation that is sometimes widely spaced. 00345 */ 00346 if (!tosp_block_use_cert_spaces || 00347 (gap_width > 00348 tosp_fuzzy_space_factor2 * row->xheight) 00349 || 00350 ((gap_width > 00351 tosp_fuzzy_space_factor1 * row->xheight) 00352 && (!tosp_narrow_blobs_not_cert 00353 || (!narrow_blob (row, prev_blob_box) 00354 && !narrow_blob (row, blob_box)))) 00355 || (wide_blob (row, prev_blob_box) 00356 && wide_blob (row, blob_box))) 00357 space_gap_stats.add (gap_width, 1); 00358 } 00359 prev_blob_box = blob_box; 00360 } 00361 } 00362 } 00363 //Inadequate samples 00364 if (space_gap_stats.get_total () <= 2) 00365 block_space_gap_width = -1;//No est. space width 00366 else 00367 block_space_gap_width = 00368 MAX ((INT16) floor (space_gap_stats.median ()), 00369 3 * block_non_space_gap_width); 00370 } 00371 }
float find_mean_blob_spacing | ( | WERD * | word | ) |
Find mean of blob spacing.
Definition at line 1790 of file tospace.cpp.
References WERD::blob_list(), WERD::cblob_list(), WERD::flag(), BOX::left(), BOX::right(), and W_POLYGON.
Referenced by make_prop_words().
01790 { 01791 PBLOB_IT blob_it; 01792 C_BLOB_IT cblob_it; 01793 BOX blob_box; 01794 INT32 gap_sum = 0; 01795 INT16 gap_count = 0; 01796 INT16 prev_right; 01797 01798 if (word->flag (W_POLYGON)) { 01799 blob_it.set_to_list (word->blob_list ()); 01800 if (!blob_it.empty ()) { 01801 blob_it.mark_cycle_pt (); 01802 prev_right = blob_it.data ()->bounding_box ().right (); 01803 //first blob 01804 blob_it.forward (); 01805 for (; !blob_it.cycled_list (); blob_it.forward ()) { 01806 blob_box = blob_it.data ()->bounding_box (); 01807 gap_sum += blob_box.left () - prev_right; 01808 gap_count++; 01809 prev_right = blob_box.right (); 01810 } 01811 } 01812 } 01813 else { 01814 cblob_it.set_to_list (word->cblob_list ()); 01815 if (!cblob_it.empty ()) { 01816 cblob_it.mark_cycle_pt (); 01817 prev_right = cblob_it.data ()->bounding_box ().right (); 01818 //first blob 01819 cblob_it.forward (); 01820 for (; !cblob_it.cycled_list (); cblob_it.forward ()) { 01821 blob_box = cblob_it.data ()->bounding_box (); 01822 gap_sum += blob_box.left () - prev_right; 01823 gap_count++; 01824 prev_right = blob_box.right (); 01825 } 01826 } 01827 } 01828 if (gap_count > 0) 01829 return (gap_sum / (float) gap_count); 01830 else 01831 return 0.0f; 01832 }
Decide if big gaps should be ignored
Definition at line 1838 of file tospace.cpp.
References FALSE, GAPMAP::table_gap(), TRUE, and TO_ROW::xheight.
Referenced by block_spacing_stats(), isolated_row_stats(), and row_spacing_stats().
01842 { 01843 INT16 gap = right - left + 1; 01844 01845 if (tosp_ignore_big_gaps > 999) 01846 return FALSE; //Dont ignore 01847 if (tosp_ignore_big_gaps > 0) 01848 return (gap > tosp_ignore_big_gaps * row->xheight); 01849 if (gap > tosp_ignore_very_big_gaps * row->xheight) 01850 return TRUE; 01851 if (tosp_ignore_big_gaps == 0) { 01852 if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight)) 01853 return TRUE; 01854 if ((gap > 1.75 * row->xheight) && 01855 ((row_length > 35 * row->xheight) || 01856 gapmap->table_gap (left, right))) 01857 return TRUE; 01858 } 01859 else { 01860 /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */ 01861 if ((gap > gapmap_big_gaps * row->xheight) && 01862 gapmap->table_gap (left, right)) 01863 return TRUE; 01864 } 01865 return FALSE; 01866 }
Try to recognise a "normal line".
The following conditions are used: \> 25 gaps && space \> 3 * kn && space \> 10 (I.e. reasonably large space and kn:sp ratio) && > 3/4 # gaps \< kn + (sp \- kn)/3 (I.e. most gaps are well away from space estimate) && a gap of max( 3, (sp \- kn)/3 ) empty histogram positions is found somewhere in the histogram between kn and sp THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies
NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!! try moving the default threshold to within this band but leave the fuzzy limit calculation as at present.
Definition at line 905 of file tospace.cpp.
References STATS::get_total(), TO_ROW::kern_size, TO_ROW::space_size, TO_ROW::space_threshold, stats_count_under(), and tprintf().
Referenced by row_spacing_stats().
00905 { 00906 float sp = row->space_size; 00907 float kn = row->kern_size; 00908 INT16 reqd_zero_width = 0; 00909 INT16 zero_width = 0; 00910 INT16 zero_start = 0; 00911 INT16 index = 0; 00912 00913 if (tosp_debug_level > 10) 00914 tprintf ("Improve row threshold 0"); 00915 if ((all_gap_stats->get_total () <= 25) || 00916 (sp <= 10) || 00917 (sp <= 3 * kn) || 00918 (stats_count_under (all_gap_stats, 00919 (INT16) ceil (kn + (sp - kn) / 3 + 0.5)) < 00920 (0.75 * all_gap_stats->get_total ()))) 00921 return; 00922 if (tosp_debug_level > 10) 00923 tprintf (" 1"); 00924 /* 00925 Look for the first region of all 0's in the histogram which is wider than 00926 max( 3, (sp - kn)/3 ) and starts between kn and sp. If found, and current 00927 threshold is not within it, move the threshold so that is is just inside it. 00928 */ 00929 reqd_zero_width = (INT16) floor ((sp - kn) / 3 + 0.5); 00930 if (reqd_zero_width < 3) 00931 reqd_zero_width = 3; 00932 00933 for (index = INT16 (ceil (kn)); index < INT16 (floor (sp)); index++) { 00934 if (all_gap_stats->pile_count (index) == 0) { 00935 if (zero_width == 0) 00936 zero_start = index; 00937 zero_width++; 00938 } 00939 else { 00940 if (zero_width >= reqd_zero_width) 00941 break; 00942 else { 00943 zero_width = 0; 00944 } 00945 } 00946 } 00947 index--; 00948 if (tosp_debug_level > 10) 00949 tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n", 00950 reqd_zero_width, zero_width, zero_start, row->space_threshold); 00951 if ((zero_width < reqd_zero_width) || 00952 ((row->space_threshold >= zero_start) && 00953 (row->space_threshold <= index))) 00954 return; 00955 if (tosp_debug_level > 10) 00956 tprintf (" 2"); 00957 if (row->space_threshold < zero_start) { 00958 if (tosp_debug_level > 5) 00959 tprintf 00960 ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", 00961 kn, sp, zero_start, index, row->space_threshold, zero_start); 00962 row->space_threshold = zero_start; 00963 } 00964 if (row->space_threshold > index) { 00965 if (tosp_debug_level > 5) 00966 tprintf 00967 ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", 00968 kn, sp, zero_start, index, row->space_threshold, index); 00969 row->space_threshold = index; 00970 } 00971 }
BOOL8 isolated_row_stats | ( | TO_ROW * | row, | |
GAPMAP * | gapmap, | |||
STATS * | all_gap_stats, | |||
BOOL8 | suspected_table, | |||
INT16 | block_idx, | |||
INT16 | row_idx | |||
) |
Set values for min_space, max_non_space based on isolated row's stats only
Definition at line 754 of file tospace.cpp.
References STATS::add(), TO_ROW::blob_list(), box_next(), box_next_pre_chopped(), FALSE, STATS::get_total(), ignore_big_gap(), TO_ROW::kern_size, BOX::left(), MAX, MAXSPACING, STATS::mean(), STATS::median(), narrow_blob(), reduced_box_next(), BOX::right(), TO_ROW::space_size, TO_ROW::space_threshold, stats_count_under(), tprintf(), TRUE, wide_blob(), and TO_ROW::xheight.
Referenced by row_spacing_stats().
00759 { 00760 float kern_estimate; 00761 float crude_threshold_estimate; 00762 INT16 small_gaps_count; 00763 INT16 total; 00764 //iterator 00765 BLOBNBOX_IT blob_it = row->blob_list (); 00766 STATS cert_space_gap_stats (0, MAXSPACING); 00767 STATS all_space_gap_stats (0, MAXSPACING); 00768 STATS small_gap_stats (0, MAXSPACING); 00769 BOX blob_box; 00770 BOX prev_blob_box; 00771 INT16 gap_width; 00772 INT32 end_of_row; 00773 INT32 row_length; 00774 00775 kern_estimate = all_gap_stats->median (); 00776 crude_threshold_estimate = MAX (tosp_init_guess_kn_mult * kern_estimate, 00777 tosp_init_guess_xht_mult * row->xheight); 00778 small_gaps_count = stats_count_under (all_gap_stats, 00779 (INT16) 00780 ceil (crude_threshold_estimate)); 00781 total = all_gap_stats->get_total (); 00782 00783 if ((total <= tosp_redo_kern_limit) || 00784 ((small_gaps_count / (float) total) < tosp_enough_small_gaps) || 00785 (total - small_gaps_count < 1)) { 00786 if (tosp_debug_level > 5) 00787 tprintf ("B:%d R:%d -- Cant do isolated row stats.\n", 00788 block_idx, row_idx); 00789 return FALSE; 00790 } 00791 blob_it.set_to_list (row->blob_list ()); 00792 blob_it.mark_cycle_pt (); 00793 end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); 00794 if (tosp_use_pre_chopping) 00795 blob_box = box_next_pre_chopped (&blob_it); 00796 else if (tosp_stats_use_xht_gaps) 00797 blob_box = reduced_box_next (row, &blob_it); 00798 else 00799 blob_box = box_next (&blob_it); 00800 row_length = end_of_row - blob_box.left (); 00801 prev_blob_box = blob_box; 00802 while (!blob_it.cycled_list ()) { 00803 if (tosp_use_pre_chopping) 00804 blob_box = box_next_pre_chopped (&blob_it); 00805 else if (tosp_stats_use_xht_gaps) 00806 blob_box = reduced_box_next (row, &blob_it); 00807 else 00808 blob_box = box_next (&blob_it); 00809 gap_width = blob_box.left () - prev_blob_box.right (); 00810 if (!ignore_big_gap (row, row_length, gapmap, 00811 prev_blob_box.right (), blob_box.left ()) && 00812 (gap_width > crude_threshold_estimate)) { 00813 if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) || 00814 ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && 00815 (!tosp_narrow_blobs_not_cert || 00816 (!narrow_blob (row, prev_blob_box) && 00817 !narrow_blob (row, blob_box)))) || 00818 (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box))) 00819 cert_space_gap_stats.add (gap_width, 1); 00820 all_space_gap_stats.add (gap_width, 1); 00821 } 00822 if (gap_width < crude_threshold_estimate) 00823 small_gap_stats.add (gap_width, 1); 00824 00825 prev_blob_box = blob_box; 00826 } 00827 if (cert_space_gap_stats.get_total () >= 00828 tosp_enough_space_samples_for_median) 00829 //median 00830 row->space_size = cert_space_gap_stats.median (); 00831 else if (suspected_table && (cert_space_gap_stats.get_total () > 0)) 00832 //to avoid spaced 00833 row->space_size = cert_space_gap_stats.mean (); 00834 // 1's in tables 00835 else if (all_space_gap_stats.get_total () >= 00836 tosp_enough_space_samples_for_median) 00837 //median 00838 row->space_size = all_space_gap_stats.median (); 00839 else 00840 row->space_size = all_space_gap_stats.mean (); 00841 00842 if (tosp_only_small_gaps_for_kern) 00843 row->kern_size = small_gap_stats.median (); 00844 else 00845 row->kern_size = all_gap_stats->median (); 00846 row->space_threshold = 00847 INT32 (floor ((row->space_size + row->kern_size) / 2)); 00848 /* Sanity check */ 00849 if ((row->kern_size >= row->space_threshold) || 00850 (row->space_threshold >= row->space_size) || 00851 (row->space_threshold <= 0)) { 00852 if (tosp_debug_level > 0) 00853 tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n", 00854 block_idx, row_idx, 00855 row->kern_size, row->space_threshold, row->space_size); 00856 row->kern_size = 0.0f; 00857 row->space_threshold = 0; 00858 row->space_size = 0.0f; 00859 return FALSE; 00860 } 00861 00862 if (tosp_debug_level > 5) 00863 tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n", 00864 block_idx, row_idx, 00865 row->kern_size, row->space_threshold, row->space_size); 00866 return TRUE; 00867 }
BOOL8 make_a_word_break | ( | TO_ROW * | row, | |
BOX | blob_box, | |||
INT16 | prev_gap, | |||
BOX | prev_blob_box, | |||
INT16 | real_current_gap, | |||
INT16 | within_xht_current_gap, | |||
BOX | next_blob_box, | |||
INT16 | next_gap, | |||
UINT8 & | blanks, | |||
BOOL8 & | fuzzy_sp, | |||
BOOL8 & | fuzzy_non | |||
) |
Decide on word break.
row | Row being made | |
blob_box | For next_blob | |
prev_gap | How many blanks? | |
prev_blob_box | ||
real_current_gap | ||
within_xht_current_gap | ||
next_blob_box | ||
next_gap | ||
blanks | ||
fuzzy_sp | ||
fuzzy_non |
Definition at line 1319 of file tospace.cpp.
References FALSE, TO_ROW::kern_size, mark_gap(), MAX, MAX_INT16, TO_ROW::max_nonspace, TO_ROW::min_space, narrow_blob(), BOX::null_box(), TO_ROW::space_size, TO_ROW::space_threshold, suspected_punct_blob(), TRUE, wide_blob(), BOX::width(), and TO_ROW::xheight.
Referenced by make_prop_words().
01330 { 01331 static BOOL8 prev_gap_was_a_space; 01332 BOOL8 space; 01333 INT16 current_gap; 01334 float fuzzy_sp_to_kn_limit; 01335 01336 /* 01337 Inhibit using the reduced gap 01338 */ 01339 if ((row->kern_size > tosp_large_kerning * row->xheight) || 01340 ((tosp_dont_fool_with_small_kerns >= 0) && 01341 (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size))) 01342 //Ignore the difference 01343 within_xht_current_gap = real_current_gap; 01344 01345 if (tosp_use_xht_gaps && tosp_only_use_xht_gaps) 01346 current_gap = within_xht_current_gap; 01347 else 01348 current_gap = real_current_gap; 01349 01350 if (tosp_old_to_method) { 01351 //Boring old method 01352 space = current_gap > row->max_nonspace; 01353 if (space && (current_gap < MAX_INT16)) { 01354 if (current_gap < row->min_space) { 01355 if (current_gap > row->space_threshold) { 01356 blanks = 1; 01357 fuzzy_sp = TRUE; 01358 fuzzy_non = FALSE; 01359 } 01360 else { 01361 blanks = 0; 01362 fuzzy_sp = FALSE; 01363 fuzzy_non = TRUE; 01364 } 01365 } 01366 else { 01367 blanks = (UINT8) (current_gap / row->space_size); 01368 if (blanks < 1) 01369 blanks = 1; 01370 fuzzy_sp = FALSE; 01371 fuzzy_non = FALSE; 01372 } 01373 } 01374 return space; 01375 } 01376 else { 01377 /* New exciting heuristic method */ 01378 if (prev_blob_box.null_box ()) 01379 //Beginning of row 01380 prev_gap_was_a_space = TRUE; 01381 01382 //Default as old TO 01383 space = current_gap > row->space_threshold; 01384 01385 /* Set defaults for the word break incase we find one. Currently there are 01386 no fuzzy spaces. Depending on the reliability of the different heuristics 01387 we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY 01388 be used if the function returns TRUE - ie the word is to be broken. 01389 */ 01390 blanks = (UINT8) (current_gap / row->space_size); 01391 if (blanks < 1) 01392 blanks = 1; 01393 fuzzy_sp = FALSE; 01394 fuzzy_non = FALSE; 01395 /* 01396 If xht measure causes gap to flip one of the 3 thresholds act accordingly - 01397 despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to 01398 context. 01399 */ 01400 if (tosp_use_xht_gaps && 01401 (real_current_gap <= row->max_nonspace) && 01402 (within_xht_current_gap > row->max_nonspace)) { 01403 space = TRUE; 01404 fuzzy_non = TRUE; 01405 #ifndef GRAPHICS_DISABLED 01406 mark_gap (blob_box, 20, 01407 prev_gap, prev_blob_box.width (), 01408 current_gap, next_blob_box.width (), next_gap); 01409 #endif 01410 } 01411 else if (tosp_use_xht_gaps && 01412 (real_current_gap <= row->space_threshold) && 01413 (within_xht_current_gap > row->space_threshold)) { 01414 space = TRUE; 01415 if (tosp_flip_fuzz_kn_to_sp) 01416 fuzzy_sp = TRUE; 01417 else 01418 fuzzy_non = TRUE; 01419 #ifndef GRAPHICS_DISABLED 01420 mark_gap (blob_box, 21, 01421 prev_gap, prev_blob_box.width (), 01422 current_gap, next_blob_box.width (), next_gap); 01423 #endif 01424 } 01425 else if (tosp_use_xht_gaps && 01426 (real_current_gap < row->min_space) && 01427 (within_xht_current_gap >= row->min_space)) { 01428 space = TRUE; 01429 #ifndef GRAPHICS_DISABLED 01430 mark_gap (blob_box, 22, 01431 prev_gap, prev_blob_box.width (), 01432 current_gap, next_blob_box.width (), next_gap); 01433 #endif 01434 } 01435 /* Now continue with normal heuristics */ 01436 else if ((current_gap < row->min_space) && 01437 (current_gap > row->space_threshold)) { 01438 /* Heuristics to turn dubious spaces to kerns */ 01439 if (tosp_pass_wide_fuzz_sp_to_context > 0) 01440 fuzzy_sp_to_kn_limit = row->kern_size + 01441 tosp_pass_wide_fuzz_sp_to_context * 01442 (row->space_size - row->kern_size); 01443 else 01444 fuzzy_sp_to_kn_limit = 99999.0f; 01445 01446 /* If current gap is significantly smaller than the previous space the other 01447 side of a narrow blob then this gap is a kern. */ 01448 if ((prev_blob_box.width () > 0) && 01449 narrow_blob (row, prev_blob_box) && 01450 prev_gap_was_a_space && 01451 (current_gap <= tosp_gap_factor * prev_gap)) { 01452 if ((tosp_all_flips_fuzzy) || 01453 (current_gap > fuzzy_sp_to_kn_limit)) { 01454 if (tosp_flip_fuzz_sp_to_kn) 01455 fuzzy_non = TRUE; 01456 else 01457 fuzzy_sp = TRUE; 01458 } 01459 else 01460 space = FALSE; 01461 #ifndef GRAPHICS_DISABLED 01462 mark_gap (blob_box, 1, 01463 prev_gap, prev_blob_box.width (), 01464 current_gap, next_blob_box.width (), next_gap); 01465 #endif 01466 } 01467 /* If current gap not much bigger than the previous kern the other side of a 01468 narrow blob then this gap is a kern as well */ 01469 else if ((prev_blob_box.width () > 0) && 01470 narrow_blob (row, prev_blob_box) && 01471 !prev_gap_was_a_space && 01472 (current_gap * tosp_gap_factor <= prev_gap)) { 01473 if ((tosp_all_flips_fuzzy) || 01474 (current_gap > fuzzy_sp_to_kn_limit)) { 01475 if (tosp_flip_fuzz_sp_to_kn) 01476 fuzzy_non = TRUE; 01477 else 01478 fuzzy_sp = TRUE; 01479 } 01480 else 01481 space = FALSE; 01482 #ifndef GRAPHICS_DISABLED 01483 mark_gap (blob_box, 2, 01484 prev_gap, prev_blob_box.width (), 01485 current_gap, next_blob_box.width (), next_gap); 01486 #endif 01487 } 01488 else if ((next_blob_box.width () > 0) && 01489 narrow_blob (row, next_blob_box) && 01490 (next_gap > row->space_threshold) && 01491 (current_gap <= tosp_gap_factor * next_gap)) { 01492 if ((tosp_all_flips_fuzzy) || 01493 (current_gap > fuzzy_sp_to_kn_limit)) { 01494 if (tosp_flip_fuzz_sp_to_kn) 01495 fuzzy_non = TRUE; 01496 else 01497 fuzzy_sp = TRUE; 01498 } 01499 else 01500 space = FALSE; 01501 #ifndef GRAPHICS_DISABLED 01502 mark_gap (blob_box, 3, 01503 prev_gap, prev_blob_box.width (), 01504 current_gap, next_blob_box.width (), next_gap); 01505 #endif 01506 } 01507 else if ((next_blob_box.width () > 0) && 01508 narrow_blob (row, next_blob_box) && 01509 (next_gap <= row->space_threshold) && 01510 (current_gap * tosp_gap_factor <= next_gap)) { 01511 if ((tosp_all_flips_fuzzy) || 01512 (current_gap > fuzzy_sp_to_kn_limit)) { 01513 if (tosp_flip_fuzz_sp_to_kn) 01514 fuzzy_non = TRUE; 01515 else 01516 fuzzy_sp = TRUE; 01517 } 01518 else 01519 space = FALSE; 01520 #ifndef GRAPHICS_DISABLED 01521 mark_gap (blob_box, 4, 01522 prev_gap, prev_blob_box.width (), 01523 current_gap, next_blob_box.width (), next_gap); 01524 #endif 01525 } 01526 else if ((((next_blob_box.width () > 0) && 01527 narrow_blob (row, next_blob_box)) || 01528 ((prev_blob_box.width () > 0) && 01529 narrow_blob (row, prev_blob_box)))) { 01530 fuzzy_sp = TRUE; 01531 #ifndef GRAPHICS_DISABLED 01532 mark_gap (blob_box, 6, 01533 prev_gap, prev_blob_box.width (), 01534 current_gap, next_blob_box.width (), next_gap); 01535 #endif 01536 } 01537 } 01538 else if ((current_gap > row->max_nonspace) && 01539 (current_gap <= row->space_threshold)) { 01540 01541 /* Heuristics to turn dubious kerns to spaces */ 01542 /* TRIED THIS BUT IT MADE THINGS WORSE 01543 if ( prev_gap == MAX_INT16 ) 01544 prev_gap = 0; //start of row 01545 if ( next_gap == MAX_INT16 ) 01546 next_gap = 0; //end of row 01547 */ 01548 if ((prev_blob_box.width () > 0) && 01549 (next_blob_box.width () > 0) && 01550 (current_gap >= 01551 tosp_kern_gap_factor1 * MAX (prev_gap, next_gap)) && 01552 wide_blob (row, prev_blob_box) && 01553 wide_blob (row, next_blob_box)) { 01554 01555 space = TRUE; 01556 /* 01557 tosp_flip_caution is an attempt to stop the default changing in cases 01558 where there is a large difference between the kern and space estimates. 01559 See problem in 'chiefs' where "have" gets split in the quotation. 01560 */ 01561 if ((tosp_flip_fuzz_kn_to_sp) && 01562 ((tosp_flip_caution <= 0) || 01563 (tosp_flip_caution * row->kern_size > row->space_size))) 01564 fuzzy_sp = TRUE; 01565 else 01566 fuzzy_non = TRUE; 01567 #ifndef GRAPHICS_DISABLED 01568 mark_gap (blob_box, 7, 01569 prev_gap, prev_blob_box.width (), 01570 current_gap, next_blob_box.width (), next_gap); 01571 #endif 01572 } 01573 else if ((prev_blob_box.width () > 0) && 01574 (next_blob_box.width () > 0) && 01575 (current_gap >= 01576 tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) && 01577 !(narrow_blob (row, prev_blob_box) || 01578 suspected_punct_blob (row, prev_blob_box)) && 01579 !(narrow_blob (row, next_blob_box) || 01580 suspected_punct_blob (row, next_blob_box))) { 01581 space = TRUE; 01582 fuzzy_non = TRUE; 01583 #ifndef GRAPHICS_DISABLED 01584 mark_gap (blob_box, 8, 01585 prev_gap, prev_blob_box.width (), 01586 current_gap, next_blob_box.width (), next_gap); 01587 #endif 01588 } 01589 else if ((tosp_kern_gap_factor3 > 0) && 01590 (prev_blob_box.width () > 0) && 01591 (next_blob_box.width () > 0) && 01592 (current_gap >= 01593 tosp_kern_gap_factor3 * MAX (prev_gap, next_gap)) && 01594 (!tosp_rule_9_test_punct || 01595 (!suspected_punct_blob (row, prev_blob_box) && 01596 !suspected_punct_blob (row, next_blob_box)))) { 01597 space = TRUE; 01598 fuzzy_non = TRUE; 01599 #ifndef GRAPHICS_DISABLED 01600 mark_gap (blob_box, 9, 01601 prev_gap, prev_blob_box.width (), 01602 current_gap, next_blob_box.width (), next_gap); 01603 #endif 01604 } 01605 } 01606 prev_gap_was_a_space = space && !(fuzzy_non); 01607 return space; 01608 } 01609 }
Find lines.
row | Row to make | |
rotation | For drawing |
prev_ values are for start of word being built. non prev_ values are for the gap between the word being built and the next one.
Definition at line 983 of file tospace.cpp.
References TO_ROW::blob_list(), BOX::bottom(), ROW::bounding_box(), WERD::bounding_box(), FALSE, find_mean_blob_spacing(), TO_ROW::kern_size, BOX::left(), TO_ROW::line_c(), TO_ROW::line_m(), make_a_word_break(), MAX_INT16, MAX_INT32, NULL, peek_at_next_gap(), ROW::recalc_bounding_box(), TO_ROW::rep_words, BOX::right(), WERD::set_blanks(), WERD::set_flag(), TO_ROW::space_size, BOX::top(), tprintf(), TRUE, W_BOL, W_EOL, W_FUZZY_NON, W_FUZZY_SP, word_count, and ROW::word_list().
Referenced by make_real_words().
00986 { 00987 BOOL8 bol; // start of line 00993 BOOL8 prev_fuzzy_sp; // probably space 00994 BOOL8 prev_fuzzy_non; // probably not 00995 UINT8 prev_blanks; // in front of word 00996 BOOL8 fuzzy_sp; // probably space 00997 BOOL8 fuzzy_non; // probably not 00998 UINT8 blanks; // in front of word 00999 ROW *real_row; // output row 01000 OUTLINE_IT out_it; // outlines 01001 C_OUTLINE_IT cout_it; 01002 PBLOB_LIST blobs; // blobs in word 01003 C_BLOB_LIST cblobs; 01004 PBLOB_IT blob_it = &blobs; // iterator 01005 C_BLOB_IT cblob_it = &cblobs; 01006 WERD_LIST words; 01007 WERD_IT word_it; // new words 01008 WERD *word; // new word 01009 WERD_IT rep_char_it; // repeated char words 01010 INT32 next_rep_char_word_right = MAX_INT32; 01011 float repetition_spacing; // gap between repetitions 01012 INT32 xstarts[2]; // row ends 01013 double coeffs[3]; // quadratic 01014 INT32 prev_x; // end of prev blob 01015 BLOBNBOX *bblob; // current blob 01016 BOX blob_box; // bounding box 01017 BLOBNBOX_IT box_it; // iterator 01018 BOX prev_blob_box; 01019 BOX next_blob_box; 01020 INT16 prev_gap = MAX_INT16; 01021 INT16 current_gap = MAX_INT16; 01022 INT16 next_gap = MAX_INT16; 01023 INT16 prev_within_xht_gap = MAX_INT16; 01024 INT16 current_within_xht_gap = MAX_INT16; 01025 INT16 next_within_xht_gap = MAX_INT16; 01026 INT16 word_count = 0; 01027 static INT16 row_count = 0; 01028 01029 row_count++; 01030 rep_char_it.set_to_list (&(row->rep_words)); 01031 if (!rep_char_it.empty ()) { 01032 next_rep_char_word_right = 01033 rep_char_it.data ()->bounding_box ().right (); 01034 } 01035 01036 prev_x = -MAX_INT16; 01037 blob_it.set_to_list (&blobs); 01038 cblob_it.set_to_list (&cblobs); 01039 box_it.set_to_list (row->blob_list ()); 01040 word_it.set_to_list (&words); 01041 bol = TRUE; 01042 prev_blanks = 0; 01043 prev_fuzzy_sp = FALSE; 01044 prev_fuzzy_non = FALSE; 01045 if (!box_it.empty ()) { 01046 xstarts[0] = box_it.data ()->bounding_box ().left (); 01047 if (xstarts[0] > next_rep_char_word_right) { 01048 /* We need to insert a repeated char word at the start of the row */ 01049 word = rep_char_it.extract (); 01050 word_it.add_after_then_move (word); 01051 /* Set spaces before repeated char word */ 01052 word->set_flag (W_BOL, TRUE); 01053 bol = FALSE; 01054 word->set_blanks (0); 01055 //NO uncertainty 01056 word->set_flag (W_FUZZY_SP, FALSE); 01057 word->set_flag (W_FUZZY_NON, FALSE); 01058 xstarts[0] = word->bounding_box ().left (); 01059 /* Set spaces after repeated char word (and leave current word set) */ 01060 repetition_spacing = find_mean_blob_spacing (word); 01061 current_gap = box_it.data ()->bounding_box ().left () - 01062 next_rep_char_word_right; 01063 current_within_xht_gap = current_gap; 01064 if (current_gap > tosp_rep_space * repetition_spacing) { 01065 prev_blanks = (UINT8) floor (current_gap / row->space_size); 01066 if (prev_blanks < 1) 01067 prev_blanks = 1; 01068 } 01069 else 01070 prev_blanks = 0; 01071 if (tosp_debug_level > 5) 01072 tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ", 01073 box_it.data ()->bounding_box ().left (), 01074 box_it.data ()->bounding_box ().bottom (), 01075 repetition_spacing, current_gap); 01076 prev_fuzzy_sp = FALSE; 01077 prev_fuzzy_non = FALSE; 01078 if (rep_char_it.empty ()) { 01079 next_rep_char_word_right = MAX_INT32; 01080 } 01081 else { 01082 rep_char_it.forward (); 01083 next_rep_char_word_right = 01084 rep_char_it.data ()->bounding_box ().right (); 01085 } 01086 } 01087 01088 peek_at_next_gap(row, 01089 box_it, 01090 next_blob_box, 01091 next_gap, 01092 next_within_xht_gap); 01093 do { 01094 bblob = box_it.data (); 01095 blob_box = bblob->bounding_box (); 01096 if (bblob->joined_to_prev ()) { 01097 if (bblob->blob () != NULL) { 01098 out_it.set_to_list (blob_it.data ()->out_list ()); 01099 out_it.move_to_last (); 01100 out_it.add_list_after (bblob->blob ()->out_list ()); 01101 delete bblob->blob (); 01102 } 01103 else if (bblob->cblob () != NULL) { 01104 cout_it.set_to_list (cblob_it.data ()->out_list ()); 01105 cout_it.move_to_last (); 01106 cout_it.add_list_after (bblob->cblob ()->out_list ()); 01107 delete bblob->cblob (); 01108 } 01109 } 01110 else { 01111 if (bblob->blob () != NULL) 01112 blob_it.add_after_then_move (bblob->blob ()); 01113 else if (bblob->cblob () != NULL) 01114 cblob_it.add_after_then_move (bblob->cblob ()); 01115 prev_x = blob_box.right (); 01116 } 01117 box_it.forward (); //next one 01118 bblob = box_it.data (); 01119 blob_box = bblob->bounding_box (); 01120 01121 if (!bblob->joined_to_prev () && 01122 (bblob->blob () != NULL || bblob->cblob () != NULL)) { 01123 /* Real Blob - not multiple outlines or pre-chopped */ 01124 prev_gap = current_gap; 01125 prev_within_xht_gap = current_within_xht_gap; 01126 prev_blob_box = next_blob_box; 01127 current_gap = next_gap; 01128 current_within_xht_gap = next_within_xht_gap; 01129 peek_at_next_gap(row, 01130 box_it, 01131 next_blob_box, 01132 next_gap, 01133 next_within_xht_gap); 01134 01135 if ((blob_box.left () > next_rep_char_word_right) || 01136 (!tosp_only_use_xht_gaps && 01137 make_a_word_break (row, blob_box, prev_gap, prev_blob_box, 01138 current_gap, current_within_xht_gap, 01139 next_blob_box, next_gap, 01140 blanks, fuzzy_sp, fuzzy_non)) || 01141 (tosp_only_use_xht_gaps && 01142 make_a_word_break (row, blob_box, prev_within_xht_gap, 01143 prev_blob_box, 01144 current_gap, current_within_xht_gap, 01145 next_blob_box, next_within_xht_gap, 01146 blanks, fuzzy_sp, fuzzy_non)) || 01147 box_it.at_first ()) { 01148 /* Form a new word out of the blobs collected */ 01149 if (!blob_it.empty ()) { 01150 word = new WERD (&blobs, prev_blanks, NULL); 01151 //make real word 01152 word_count++; 01153 } 01154 else { 01155 word = new WERD (&cblobs, prev_blanks, NULL); 01156 word_count++; 01157 } 01158 word_it.add_after_then_move (word); 01159 if (bol) { 01160 word->set_flag (W_BOL, TRUE); 01161 bol = FALSE; 01162 } 01163 if (prev_fuzzy_sp) 01164 //probably space 01165 word->set_flag (W_FUZZY_SP, TRUE); 01166 else if (prev_fuzzy_non) 01167 word->set_flag (W_FUZZY_NON, TRUE); 01168 //probably not 01169 01170 if (blob_box.left () > next_rep_char_word_right) { 01171 /* We need to insert a repeated char word */ 01172 word = rep_char_it.extract (); 01173 word_it.add_after_then_move (word); 01174 01175 /* Set spaces before repeated char word */ 01176 repetition_spacing = find_mean_blob_spacing (word); 01177 current_gap = word->bounding_box ().left () - prev_x; 01178 current_within_xht_gap = current_gap; 01179 if (current_gap > tosp_rep_space * repetition_spacing) { 01180 blanks = 01181 (UINT8) floor (current_gap / row->space_size); 01182 if (blanks < 1) 01183 blanks = 1; 01184 } 01185 else 01186 blanks = 0; 01187 if (tosp_debug_level > 5) 01188 tprintf 01189 ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);", 01190 word->bounding_box ().left (), 01191 word->bounding_box ().bottom (), 01192 repetition_spacing, current_gap, blanks); 01193 word->set_blanks (blanks); 01194 //NO uncertainty 01195 word->set_flag (W_FUZZY_SP, FALSE); 01196 word->set_flag (W_FUZZY_NON, FALSE); 01197 01198 /* Set spaces after repeated char word (and leave current word set) */ 01199 current_gap = 01200 blob_box.left () - next_rep_char_word_right; 01201 if (current_gap > tosp_rep_space * repetition_spacing) { 01202 blanks = (UINT8) (current_gap / row->space_size); 01203 if (blanks < 1) 01204 blanks = 1; 01205 } 01206 else 01207 blanks = 0; 01208 if (tosp_debug_level > 5) 01209 tprintf (" Rgap:%d (%d blanks)\n", 01210 current_gap, blanks); 01211 fuzzy_sp = FALSE; 01212 fuzzy_non = FALSE; 01213 01214 if (rep_char_it.empty ()) { 01215 next_rep_char_word_right = MAX_INT32; 01216 } 01217 else { 01218 rep_char_it.forward (); 01219 next_rep_char_word_right = 01220 rep_char_it.data ()->bounding_box ().right (); 01221 } 01222 } 01223 01224 if (box_it.at_first () && rep_char_it.empty ()) { 01225 //at end of line 01226 word->set_flag (W_EOL, TRUE); 01227 xstarts[1] = prev_x; 01228 } 01229 else { 01230 prev_blanks = blanks; 01231 prev_fuzzy_sp = fuzzy_sp; 01232 prev_fuzzy_non = fuzzy_non; 01233 } 01234 } 01235 } 01236 } 01237 while (!box_it.at_first ()); //until back at start 01238 01239 /* Insert any further repeated char words */ 01240 while (!rep_char_it.empty ()) { 01241 word = rep_char_it.extract (); 01242 word_it.add_after_then_move (word); 01243 01244 /* Set spaces before repeated char word */ 01245 repetition_spacing = find_mean_blob_spacing (word); 01246 current_gap = word->bounding_box ().left () - prev_x; 01247 if (current_gap > tosp_rep_space * repetition_spacing) { 01248 blanks = (UINT8) floor (current_gap / row->space_size); 01249 if (blanks < 1) 01250 blanks = 1; 01251 } 01252 else 01253 blanks = 0; 01254 if (tosp_debug_level > 5) 01255 tprintf 01256 ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n", 01257 word->bounding_box ().left (), word->bounding_box ().bottom (), 01258 repetition_spacing, current_gap, blanks); 01259 word->set_blanks (blanks); 01260 //NO uncertainty 01261 word->set_flag (W_FUZZY_SP, FALSE); 01262 word->set_flag (W_FUZZY_NON, FALSE); 01263 prev_x = word->bounding_box ().right (); 01264 if (rep_char_it.empty ()) { 01265 //at end of line 01266 word->set_flag (W_EOL, TRUE); 01267 xstarts[1] = prev_x; 01268 } 01269 else { 01270 rep_char_it.forward (); 01271 } 01272 } 01273 coeffs[0] = 0; 01274 coeffs[1] = row->line_m (); 01275 coeffs[2] = row->line_c (); 01276 real_row = new ROW (row, 01277 (INT16) row->kern_size, (INT16) row->space_size); 01278 word_it.set_to_list (real_row->word_list ()); 01279 //put words in row 01280 word_it.add_list_after (&words); 01281 real_row->recalc_bounding_box (); 01282 if (tosp_debug_level > 9) { 01283 tprintf ("Row %d Made %d words in row ((%d,%d)(%d,%d))\n", 01284 row_count, 01285 word_count, 01286 real_row->bounding_box ().left (), 01287 real_row->bounding_box ().bottom (), 01288 real_row->bounding_box ().right (), 01289 real_row->bounding_box ().top ()); 01290 } 01291 return real_row; 01292 } 01293 return NULL; 01294 }
void mark_gap | ( | BOX | blob, | |
INT16 | rule, | |||
INT16 | prev_gap, | |||
INT16 | prev_blob_width, | |||
INT16 | current_gap, | |||
INT16 | next_blob_width, | |||
INT16 | next_gap | |||
) |
Debug stuff using user interface.
Definition at line 1710 of file tospace.cpp.
References BLACK, BLUE, BOX::bottom(), CYAN, ellipse, f, FALSE, fill_color_index, GREEN, BOX::height(), INT_HOLLOW, INT_SOLID, interior_style, BOX::left(), MAGENTA, perimeter_color_index, RED, to_win, tprintf(), TRUE, WHITE, and YELLOW.
Referenced by make_a_word_break().
01717 { 01718 COLOUR col; //of ellipse marking flipped gap 01719 01720 switch (rule) { 01721 case 1: 01722 col = RED; 01723 break; 01724 case 2: 01725 col = CYAN; 01726 break; 01727 case 3: 01728 col = GREEN; 01729 break; 01730 case 4: 01731 col = BLACK; 01732 break; 01733 case 5: 01734 col = MAGENTA; 01735 break; 01736 case 6: 01737 col = BLUE; 01738 break; 01739 01740 case 7: 01741 col = WHITE; 01742 break; 01743 case 8: 01744 col = YELLOW; 01745 break; 01746 case 9: 01747 col = BLACK; 01748 break; 01749 01750 case 20: 01751 col = CYAN; 01752 break; 01753 case 21: 01754 col = GREEN; 01755 break; 01756 case 22: 01757 col = MAGENTA; 01758 break; 01759 default: 01760 col = BLACK; 01761 } 01762 if (textord_show_initial_words) { 01763 fill_color_index(to_win, col); 01764 perimeter_color_index(to_win, col); 01765 if (rule < 20) 01766 interior_style(to_win, INT_SOLID, FALSE); 01767 else 01768 interior_style(to_win, INT_HOLLOW, TRUE); 01769 //x radius 01770 ellipse (to_win, current_gap / 2.0f, 01771 blob.height () / 2.0f, //y radius 01772 //x centre 01773 blob.left () - current_gap / 2.0f, 01774 //y centre 01775 blob.bottom () + blob.height () / 2.0f, 01776 0.0f); 01777 } 01778 if (tosp_debug_level > 0) 01779 tprintf (" (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n", 01780 blob.left () - current_gap / 2, blob.bottom (), rule, 01781 prev_gap, prev_blob_width, current_gap, 01782 next_blob_width, next_gap); 01783 }
Determine if blob is narrow.
row | Row being made | |
blob_box | For next_blob |
Definition at line 1619 of file tospace.cpp.
References BOX::height(), BOX::width(), and TO_ROW::xheight.
Referenced by block_spacing_stats(), isolated_row_stats(), make_a_word_break(), row_spacing_stats(), and wide_blob().
01619 { 01620 BOOL8 result; 01621 01622 result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) || 01623 (((float) blob_box.width () / blob_box.height ()) <= 01624 tosp_narrow_aspect_ratio)); 01625 return result; 01626 }
void old_to_method | ( | TO_ROW * | row, | |
STATS * | all_gap_stats, | |||
STATS * | space_gap_stats, | |||
STATS * | small_gap_stats, | |||
INT16 | block_space_gap_width, | |||
INT16 | block_non_space_gap_width | |||
) |
Estimate space size for block in row
Definition at line 680 of file tospace.cpp.
References STATS::get_total(), TO_ROW::kern_size, STATS::mean(), STATS::median(), TO_ROW::space_size, and TO_ROW::space_threshold.
Referenced by row_spacing_stats().
00687 { 00688 /* Old to condition was > 2 */ 00689 if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) { 00690 //Adequate samples 00691 /* Set space size to median of spaces BUT limits it if it seems wildly out */ 00692 row->space_size = space_gap_stats->median (); 00693 if (row->space_size > block_space_gap_width * 1.5) { 00694 if (tosp_old_to_bug_fix) 00695 row->space_size = block_space_gap_width * 1.5; 00696 else 00697 //BUG??? should be *1.5 00698 row->space_size = block_space_gap_width; 00699 } 00700 if (row->space_size < (block_non_space_gap_width * 2) + 1) 00701 row->space_size = (block_non_space_gap_width * 2) + 1; 00702 } 00703 //Only 1 or 2 samples 00704 else if (space_gap_stats->get_total () >= 1) { 00705 //hence mean not median 00706 row->space_size = space_gap_stats->mean (); 00707 if (row->space_size > block_space_gap_width * 1.5) { 00708 if (tosp_old_to_bug_fix) 00709 row->space_size = block_space_gap_width * 1.5; 00710 else 00711 //BUG??? should be *1.5 00712 row->space_size = block_space_gap_width; 00713 } 00714 if (row->space_size < (block_non_space_gap_width * 3) + 1) 00715 row->space_size = (block_non_space_gap_width * 3) + 1; 00716 } 00717 else 00718 //Use block default 00719 row->space_size = block_space_gap_width; 00720 00721 if ((tosp_only_small_gaps_for_kern) && 00722 (small_gap_stats->get_total () > tosp_redo_kern_limit)) 00723 row->kern_size = small_gap_stats->median (); 00724 else if (all_gap_stats->get_total () > tosp_redo_kern_limit) 00725 row->kern_size = all_gap_stats->median (); 00726 else 00727 //old TO -SAME FOR ALL ROWS 00728 row->kern_size = block_non_space_gap_width; 00729 00730 if (tosp_threshold_bias2 > 0) 00731 row->space_threshold = 00732 INT32 (floor (0.5 + row->kern_size + 00733 tosp_threshold_bias2 * (row->space_size - 00734 row->kern_size))); 00735 else 00736 /* 00737 NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold 00738 and holds this in a float. The use is with a >= test 00739 NEW textord uses an integer threshold and a > test 00740 00741 It comes to the same thing. 00742 00743 (Though there is a difference in that old textor has integer space_size 00744 and kern_size.) 00745 */ 00746 row->space_threshold = 00747 INT32 (floor ((row->space_size + row->kern_size) / 2)); 00748 }
void peek_at_next_gap | ( | TO_ROW * | row, | |
BLOBNBOX_IT | box_it, | |||
BOX & | next_blob_box, | |||
INT16 & | next_gap, | |||
INT16 & | next_within_xht_gap | |||
) |
Get a copy of next gap for peeking.
Definition at line 1680 of file tospace.cpp.
References box_next(), BOX::left(), MAX_INT16, reduced_box_next(), and BOX::right().
Referenced by make_prop_words().
01685 { 01686 BOX next_reduced_blob_box; 01687 BOX bit_beyond; 01688 BLOBNBOX_IT reduced_box_it = box_it; 01689 01690 next_blob_box = box_next (&box_it); 01691 next_reduced_blob_box = reduced_box_next (row, &reduced_box_it); 01692 if (box_it.at_first ()) { 01693 next_gap = MAX_INT16; 01694 next_within_xht_gap = MAX_INT16; 01695 } 01696 else { 01697 bit_beyond = box_it.data ()->bounding_box (); 01698 next_gap = bit_beyond.left () - next_blob_box.right (); 01699 bit_beyond = reduced_box_next (row, &reduced_box_it); 01700 next_within_xht_gap = 01701 bit_beyond.left () - next_reduced_blob_box.right (); 01702 } 01703 }
Find bounding box for blob.
Find box for blob which is the same height and y position as the whole blob, but whose left limit is the left most position of the blob ABOVE the baseline and whose right limit is the right most position of the blob BELOW the xheight.
WONT WORK WITH LARGE UPPER CASE CHARS - T F V W: Look at examples on "home". ???
Perhaps we need something which say if the width ABOVE the xht alone includes the whole of the reduced width, then use the full blob box - Might still fail on italic F
Alternatively we could be a little less severe and only reduce the left and right edges by half the difference between the full box and the reduced box.
NOTE that we need to rotate all the coordinates as find_blob_limits finds the y min and max within a specified x band
Definition at line 1958 of file tospace.cpp.
References TO_ROW::baseline, baseline, BOX::bottom(), find_blob_limits(), find_cblob_hlimits(), BOX::left(), MAX_INT16, MAX_INT32, NULL, BOX::right(), BOX::top(), TO_ROW::xheight, and QSPLINE::y().
Referenced by reduced_box_next().
01961 { 01962 float baseline; 01963 float blob_x_centre; 01964 float left_limit; 01965 float right_limit; 01966 float junk; 01967 BOX blob_box; 01968 01969 /* Find baseline of centre of blob */ 01970 01971 blob_box = blob->bounding_box (); 01972 blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0; 01973 baseline = row->baseline.y (blob_x_centre); 01974 01975 /* 01976 Find LH limit of blob ABOVE the xht. This is so that we can detect certain 01977 caps ht chars which should NOT have their box reduced: T, Y, V, W etc 01978 */ 01979 left_limit = (float) MAX_INT32; 01980 junk = (float) -MAX_INT32; 01981 if (blob->blob () != NULL) 01982 //blob to test 01983 find_blob_limits (blob->blob (), 01984 (float) -MAX_INT16, //rotated lower limit 01985 -(baseline + 1.1 * row->xheight), 01986 //rotated upper limit 01987 FCOORD (0.0, 1.0), //90deg anticlock rot 01988 left_limit, junk); //min y max_y 01989 else 01990 //blob to test 01991 find_cblob_hlimits (blob->cblob (), 01992 //rotated lower limit 01993 (baseline + 1.1 * row->xheight), (float) MAX_INT16, 01994 //rotated upper limit 01995 // FCOORD( 0.0, 1.0 ), //90deg anticlock rot 01996 left_limit, junk); //min y max_y 01997 if (left_limit > junk) 01998 *left_above_xht = MAX_INT16; //No area above xht 01999 else 02000 *left_above_xht = (INT16) floor (left_limit); 02001 /* 02002 Find reduced LH limit of blob - the left extent of the region ABOVE the 02003 baseline. 02004 */ 02005 left_limit = (float) MAX_INT32; 02006 junk = (float) -MAX_INT32; 02007 if (blob->blob () != NULL) 02008 //blob to test 02009 find_blob_limits (blob->blob (), 02010 (float) -MAX_INT16, //rotated lower limit 02011 -baseline, //rotated upper limit 02012 FCOORD (0.0, 1.0), //90deg anticlock rot 02013 left_limit, junk); //min y max_y 02014 else 02015 //blob to test 02016 find_cblob_hlimits (blob->cblob (), 02017 baseline, //rotated upper limit 02018 (float) MAX_INT16, //rotated lower limit 02019 // FCOORD( 0.0, 1.0 ), //90deg anticlock rot 02020 left_limit, junk); //min y max_y 02021 02022 if (left_limit > junk) 02023 return BOX (); //no area within xht so return empty box 02024 /* 02025 Find reduced RH limit of blob - the right extent of the region BELOW the xht. 02026 */ 02027 junk = (float) MAX_INT32; 02028 right_limit = (float) -MAX_INT32; 02029 if (blob->blob () != NULL) 02030 //blob to test 02031 find_blob_limits (blob->blob (), 02032 -(baseline + row->xheight), 02033 //rotated lower limit 02034 (float) MAX_INT16, //rotated upper limit 02035 FCOORD (0.0, 1.0), //90deg anticlock rot 02036 junk, right_limit); //min y max_y 02037 else 02038 //blob to test 02039 find_cblob_hlimits (blob->cblob (), 02040 (float) -MAX_INT16, //rotated upper limit 02041 (baseline + row->xheight), 02042 //rotated lower limit 02043 // FCOORD( 0.0, 1.0 ), //90deg anticlock rot 02044 junk, right_limit); //min y max_y 02045 if (junk > right_limit) 02046 return BOX (); //no area within xht so return empty box 02047 02048 return BOX (ICOORD ((INT16) floor (left_limit), blob_box.bottom ()), 02049 ICOORD ((INT16) ceil (right_limit), blob_box.top ())); 02050 }
Get bounding box.
Compute the bounding box of this blob with merging of x overlaps but no pre-chopping.
Then move the iterator on to the start of the next blob. DONT reduce the box for small things - eg punctuation.
Definition at line 1878 of file tospace.cpp.
References BOX::height(), INT_HOLLOW, BOX::left(), MIN, NULL, BOX::plot(), reduced_box_for_blob(), to_win, TRUE, BOX::width(), TO_ROW::xheight, and YELLOW.
Referenced by block_spacing_stats(), isolated_row_stats(), peek_at_next_gap(), and row_spacing_stats().
01881 { 01882 BLOBNBOX *blob; //current blob 01883 BLOBNBOX *head_blob; //place to store box 01884 BOX full_box; //full blob boundg box 01885 BOX reduced_box; //box of significant part 01886 INT16 left_above_xht; //ABOVE xht left limit 01887 INT16 new_left_above_xht; //ABOVE xht left limit 01888 01889 blob = it->data (); 01890 if (blob->red_box_set ()) { 01891 reduced_box = blob->reduced_box (); 01892 do { 01893 it->forward (); 01894 blob = it->data (); 01895 } 01896 //until next real blob 01897 while (blob->blob () == NULL 01898 && blob->cblob () == NULL || blob->joined_to_prev ()); 01899 return reduced_box; 01900 } 01901 head_blob = blob; 01902 full_box = blob->bounding_box (); 01903 reduced_box = reduced_box_for_blob (blob, row, &left_above_xht); 01904 do { 01905 it->forward (); 01906 blob = it->data (); 01907 if (blob->blob () == NULL && blob->cblob () == NULL) 01908 //was pre-chopped 01909 full_box += blob->bounding_box (); 01910 else if (blob->joined_to_prev ()) { 01911 reduced_box += 01912 reduced_box_for_blob(blob, row, &new_left_above_xht); 01913 left_above_xht = MIN (left_above_xht, new_left_above_xht); 01914 } 01915 } 01916 //until next real blob 01917 while (blob->blob () == NULL 01918 && blob->cblob () == NULL || blob->joined_to_prev ()); 01919 01920 if ((reduced_box.width () > 0) && 01921 ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ()) 01922 < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) { 01923 #ifndef GRAPHICS_DISABLED 01924 if (textord_show_initial_words) 01925 reduced_box.plot (to_win, INT_HOLLOW, TRUE, YELLOW, YELLOW); 01926 #endif 01927 } 01928 else 01929 reduced_box = full_box; 01930 head_blob->set_reduced_box (reduced_box); 01931 return reduced_box; 01932 }//reduced_box_next
void row_spacing_stats | ( | TO_ROW * | row, | |
GAPMAP * | gapmap, | |||
INT16 | block_idx, | |||
INT16 | row_idx, | |||
INT16 | block_space_gap_width, | |||
INT16 | block_non_space_gap_width | |||
) |
Estimate spacing for block from row stats.
Definition at line 381 of file tospace.cpp.
References STATS::add(), TO_ROW::blob_list(), box_next(), box_next_pre_chopped(), STATS::get_total(), ignore_big_gap(), improve_row_threshold(), isolated_row_stats(), BOX::left(), MAX, max, MAXSPACING, STATS::median(), MIN, narrow_blob(), old_to_method(), STATS::pile_count(), reduced_box_next(), BOX::right(), tprintf(), wide_blob(), and TO_ROW::xheight.
Referenced by to_spacing().
00388 { 00389 //iterator 00390 BLOBNBOX_IT blob_it = row->blob_list (); 00391 STATS all_gap_stats (0, MAXSPACING); 00392 STATS cert_space_gap_stats (0, MAXSPACING); 00393 STATS all_space_gap_stats (0, MAXSPACING); 00394 STATS small_gap_stats (0, MAXSPACING); 00395 BOX blob_box; 00396 BOX prev_blob_box; 00397 INT16 gap_width; 00398 INT16 real_space_threshold = 0; 00399 INT16 max = 0; 00400 INT16 index; 00401 INT16 large_gap_count = 0; 00402 BOOL8 suspected_table; 00403 INT32 max_max_nonspace; //upper bound 00404 BOOL8 good_block_space_estimate = block_space_gap_width > 0; 00405 INT32 end_of_row; 00406 INT32 row_length = 0; 00407 float sane_space; 00408 INT32 sane_threshold; 00409 00410 /* Collect first pass stats for row */ 00411 00412 if (!good_block_space_estimate) 00413 block_space_gap_width = INT16 (floor (row->xheight / 2)); 00414 if (!row->blob_list ()->empty ()) { 00415 if (tosp_threshold_bias1 > 0) 00416 real_space_threshold = 00417 block_non_space_gap_width + 00418 INT16 (floor (0.5 + 00419 tosp_threshold_bias1 * (block_space_gap_width - 00420 block_non_space_gap_width))); 00421 else 00422 real_space_threshold = //Old TO method 00423 (block_space_gap_width + block_non_space_gap_width) / 2; 00424 blob_it.set_to_list (row->blob_list ()); 00425 blob_it.mark_cycle_pt (); 00426 end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); 00427 if (tosp_use_pre_chopping) 00428 blob_box = box_next_pre_chopped (&blob_it); 00429 else if (tosp_stats_use_xht_gaps) 00430 blob_box = reduced_box_next (row, &blob_it); 00431 else 00432 blob_box = box_next (&blob_it); 00433 row_length = end_of_row - blob_box.left (); 00434 prev_blob_box = blob_box; 00435 while (!blob_it.cycled_list ()) { 00436 if (tosp_use_pre_chopping) 00437 blob_box = box_next_pre_chopped (&blob_it); 00438 else if (tosp_stats_use_xht_gaps) 00439 blob_box = reduced_box_next (row, &blob_it); 00440 else 00441 blob_box = box_next (&blob_it); 00442 gap_width = blob_box.left () - prev_blob_box.right (); 00443 if (ignore_big_gap (row, row_length, gapmap, 00444 prev_blob_box.right (), blob_box.left ())) 00445 large_gap_count++; 00446 else { 00447 if (gap_width >= real_space_threshold) { 00448 if (!tosp_row_use_cert_spaces || 00449 (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || 00450 ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) 00451 && (!tosp_narrow_blobs_not_cert 00452 || (!narrow_blob (row, prev_blob_box) 00453 && !narrow_blob (row, blob_box)))) 00454 || (wide_blob (row, prev_blob_box) 00455 && wide_blob (row, blob_box))) 00456 cert_space_gap_stats.add (gap_width, 1); 00457 all_space_gap_stats.add (gap_width, 1); 00458 } 00459 else 00460 small_gap_stats.add (gap_width, 1); 00461 all_gap_stats.add (gap_width, 1); 00462 } 00463 prev_blob_box = blob_box; 00464 } 00465 } 00466 suspected_table = (large_gap_count > 1) || 00467 ((large_gap_count > 0) && 00468 (all_gap_stats.get_total () <= tosp_few_samples)); 00469 00470 /* Determine row kern size, space size and threshold */ 00471 00472 if ((cert_space_gap_stats.get_total () >= 00473 tosp_enough_space_samples_for_median) || 00474 ((suspected_table || 00475 all_gap_stats.get_total () <= tosp_short_row) && 00476 cert_space_gap_stats.get_total () > 0)) 00477 old_to_method(row, 00478 &all_gap_stats, 00479 &cert_space_gap_stats, 00480 &small_gap_stats, 00481 block_space_gap_width, 00482 block_non_space_gap_width); 00483 else { 00484 if (!tosp_recovery_isolated_row_stats || 00485 !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table, 00486 block_idx, row_idx)) { 00487 if (tosp_row_use_cert_spaces && (tosp_debug_level > 5)) 00488 tprintf ("B:%d R:%d -- Inadequate certain spaces.\n", 00489 block_idx, row_idx); 00490 if (tosp_row_use_cert_spaces1 && good_block_space_estimate) { 00491 //Use block default 00492 row->space_size = block_space_gap_width; 00493 if (all_gap_stats.get_total () > tosp_redo_kern_limit) 00494 row->kern_size = all_gap_stats.median (); 00495 else 00496 row->kern_size = block_non_space_gap_width; 00497 row->space_threshold = 00498 INT32 (floor ((row->space_size + row->kern_size) / 2)); 00499 } 00500 else 00501 old_to_method(row, 00502 &all_gap_stats, 00503 &all_space_gap_stats, 00504 &small_gap_stats, 00505 block_space_gap_width, 00506 block_non_space_gap_width); 00507 } 00508 } 00509 00510 if (tosp_improve_thresh && !suspected_table) 00511 improve_row_threshold(row, &all_gap_stats); 00512 00513 /* Lets try to be careful not to do anything silly with tables when we 00514 are ignoring big gaps */ 00515 if (tosp_sanity_method == 0) { 00516 if (suspected_table && 00517 (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { 00518 if (tosp_debug_level > 0) 00519 tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n", 00520 block_idx, row_idx, 00521 row->kern_size, row->space_threshold, row->space_size); 00522 row->space_threshold = 00523 (INT32) (tosp_table_kn_sp_ratio * row->kern_size); 00524 row->space_size = MAX (row->space_threshold + 1, row->xheight); 00525 } 00526 } 00527 else if (tosp_sanity_method == 1) { 00528 sane_space = row->space_size; 00529 /* NEVER let space size get too close to kern size */ 00530 if ((row->space_size < tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5)) 00531 || ((row->space_size - row->kern_size) < 00532 (tosp_silly_kn_sp_gap * row->xheight))) { 00533 if (good_block_space_estimate && 00534 (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size)) 00535 sane_space = block_space_gap_width; 00536 else 00537 sane_space = 00538 MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5), 00539 row->xheight / 2); 00540 if (tosp_debug_level > 0) 00541 tprintf 00542 ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", 00543 block_idx, row_idx, row->kern_size, row->space_threshold, 00544 row->space_size, sane_space); 00545 row->space_size = sane_space; 00546 row->space_threshold = 00547 INT32 (floor ((row->space_size + row->kern_size) / 2)); 00548 } 00549 /* NEVER let threshold get VERY far away from kern */ 00550 sane_threshold = INT32 (floor (tosp_max_sane_kn_thresh * 00551 MAX (row->kern_size, 2.5))); 00552 if (row->space_threshold > sane_threshold) { 00553 if (tosp_debug_level > 0) 00554 tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n", 00555 block_idx, row_idx, 00556 row->kern_size, 00557 row->space_threshold, row->space_size, sane_threshold); 00558 row->space_threshold = sane_threshold; 00559 if (row->space_size <= sane_threshold) 00560 row->space_size = row->space_threshold + 1.0f; 00561 } 00562 /* Beware of tables - there may be NO spaces */ 00563 if (suspected_table) { 00564 sane_space = MAX (tosp_table_kn_sp_ratio * row->kern_size, 00565 tosp_table_xht_sp_ratio * row->xheight); 00566 sane_threshold = INT32 (floor ((sane_space + row->kern_size) / 2)); 00567 00568 if ((row->space_size < sane_space) || 00569 (row->space_threshold < sane_threshold)) { 00570 if (tosp_debug_level > 0) 00571 tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n", 00572 block_idx, row_idx, 00573 row->kern_size, 00574 row->space_threshold, row->space_size); 00575 //the minimum sane value 00576 row->space_threshold = (INT32) sane_space; 00577 row->space_size = MAX (row->space_threshold + 1, row->xheight); 00578 } 00579 } 00580 } 00581 00582 /* Lets try to put some error limits on the threshold */ 00583 00584 if (tosp_old_to_method) { 00585 /* Old textord made a space if gap >= threshold */ 00586 //NO FUZZY SPACES YET 00587 row->max_nonspace = row->space_threshold; 00588 //NO FUZZY SPACES YET 00589 row->min_space = row->space_threshold + 1; 00590 } 00591 else { 00592 /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it :-) */ 00593 row->min_space = 00594 MIN (INT32 (ceil (tosp_fuzzy_space_factor * row->xheight)), 00595 INT32 (row->space_size)); 00596 if (row->min_space <= row->space_threshold) 00597 //Dont be silly 00598 row->min_space = row->space_threshold + 1; 00599 /* 00600 Kern Gap 00601 00602 Lets try to guess the max certain kern gap by looking at the cluster of 00603 kerns for the row. The row is proportional so the kerns should cluster 00604 tightly at the bottom of the distribution. We also expect most gaps to be 00605 kerns. Find the maximum of the kern piles between 0 and twice the kern 00606 estimate. Piles before the first one with less than 1/10 the maximum 00607 number of samples can be taken as certain kerns. 00608 00609 Of course, there are some cases where the kern peak and space peaks merge, 00610 so we will put an UPPER limit on the max certain kern gap of some fraction 00611 below the threshold. 00612 */ 00613 00614 max_max_nonspace = INT32 ((row->space_threshold + row->kern_size) / 2); 00615 00616 //default 00617 row->max_nonspace = max_max_nonspace; 00618 for (index = 0; index <= max_max_nonspace; index++) { 00619 if (all_gap_stats.pile_count (index) > max) 00620 max = all_gap_stats.pile_count (index); 00621 if ((index > row->kern_size) && 00622 (all_gap_stats.pile_count (index) < 0.1 * max)) { 00623 row->max_nonspace = index; 00624 break; 00625 } 00626 } 00627 } 00628 00629 /* Yet another algorithm - simpler this time - just choose a fraction of the 00630 threshold to space range */ 00631 00632 if ((tosp_fuzzy_sp_fraction > 0) && 00633 (row->space_size > row->space_threshold)) 00634 row->min_space = MAX (row->min_space, 00635 (INT32) ceil (row->space_threshold + 00636 tosp_fuzzy_sp_fraction * 00637 (row->space_size - 00638 row->space_threshold))); 00639 00640 /* 00641 Kern Table 00642 00643 Ensure that ANY space less than some multiplier times the kern size is 00644 fuzzy. In tables there is a risk of erroneously setting a small space size 00645 when there are no real spaces. Sometimes tables have text squashed into 00646 columns so that the kn->sp ratio is small anyway - this means that we cant 00647 use this to force a wider separation - hence we rely on context to join any 00648 dubious breaks. 00649 */ 00650 00651 if ((tosp_table_fuzzy_kn_sp_ratio > 0) && 00652 (suspected_table || tosp_fuzzy_limit_all)) 00653 row->min_space = MAX (row->min_space, 00654 (INT32) ceil (tosp_table_fuzzy_kn_sp_ratio * 00655 row->kern_size)); 00656 00657 if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) 00658 row->max_nonspace = (INT32) floor (0.5 + row->kern_size + 00659 tosp_fuzzy_kn_fraction * 00660 (row->space_threshold - 00661 row->kern_size)); 00662 00663 if (row->max_nonspace > row->space_threshold) 00664 //Dont be silly 00665 row->max_nonspace = row->space_threshold; 00666 00667 if (tosp_debug_level > 5) 00668 tprintf 00669 ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n", 00670 block_idx, row_idx, row_length, block_non_space_gap_width, 00671 block_space_gap_width, real_space_threshold, row->kern_size, 00672 row->max_nonspace, row->space_threshold, row->min_space, 00673 row->space_size); 00674 }
Counts number of stats under threshold
Definition at line 873 of file tospace.cpp.
References STATS::pile_count().
Referenced by improve_row_threshold(), and isolated_row_stats().
00873 { 00874 INT16 index; 00875 INT16 total = 0; 00876 00877 for (index = 0; index < threshold; index++) 00878 total += stats->pile_count (index); 00879 return total; 00880 }
Determine if blob might contain punctuation.
row | Row being made | |
box |
Definition at line 1660 of file tospace.cpp.
References TO_ROW::baseline, baseline, BOX::bottom(), BOX::height(), BOX::left(), BOX::right(), BOX::top(), TO_ROW::xheight, and QSPLINE::y().
Referenced by make_a_word_break().
01660 { 01661 BOOL8 result; 01662 float baseline; 01663 float blob_x_centre; 01664 01665 /* Find baseline of centre of blob */ 01666 01667 blob_x_centre = (box.right () + box.left ()) / 2.0; 01668 baseline = row->baseline.y (blob_x_centre); 01669 01670 result = (box.height () <= 0.66 * row->xheight) || 01671 (box.top () < baseline + row->xheight / 2.0) || 01672 (box.bottom () > baseline + row->xheight / 2.0); 01673 return result; 01674 }
void to_spacing | ( | ICOORD | page_tr, | |
TO_BLOCK_LIST * | blocks | |||
) |
Set spacing by computing fuzzy word spacing thresholds for each row.
page_tr | Topright of page | |
blocks | Blocks on page |
Definition at line 138 of file tospace.cpp.
References block_spacing_stats(), TO_ROW::fixed_pitch, NULL, PITCH_CORR_PROP, TO_ROW::pitch_decision, PITCH_DEF_PROP, plot_word_decisions(), row_spacing_stats(), to_win, and tprintf().
Referenced by make_words().
00141 { 00142 TO_BLOCK_IT block_it; //iterator 00143 TO_BLOCK *block; //current block; 00144 TO_ROW_IT row_it; //row iterator 00145 TO_ROW *row; //current row 00146 int block_index; //block number 00147 int row_index; //row number 00148 INT16 block_space_gap_width; //Est width of real spaces for whole block 00149 //Est width ofnon space gaps for whole block 00150 INT16 block_non_space_gap_width; 00151 //Old fixed/prop result 00152 BOOL8 old_text_ord_proportional; 00153 GAPMAP *gapmap = NULL; //map of big vert gaps in blk 00154 00155 block_it.set_to_list (blocks); 00156 block_index = 1; 00157 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00158 block_it.forward ()) { 00159 block = block_it.data (); 00160 gapmap = new GAPMAP (block); 00161 block_spacing_stats(block, 00162 gapmap, 00163 old_text_ord_proportional, 00164 block_space_gap_width, 00165 block_non_space_gap_width); 00166 row_it.set_to_list (block->get_rows ()); 00167 row_index = 1; 00168 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00169 row = row_it.data (); 00170 if ((row->pitch_decision == PITCH_DEF_PROP) || 00171 (row->pitch_decision == PITCH_CORR_PROP)) { 00172 if ((tosp_debug_level > 0) && !old_text_ord_proportional) 00173 tprintf ("Block %d Row %d: Now Proportional\n", 00174 block_index, row_index); 00175 row_spacing_stats(row, 00176 gapmap, 00177 block_index, 00178 row_index, 00179 block_space_gap_width, 00180 block_non_space_gap_width); 00181 } 00182 else { 00183 if ((tosp_debug_level > 0) && old_text_ord_proportional) 00184 tprintf 00185 ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n", 00186 block_index, row_index, row->pitch_decision, 00187 row->fixed_pitch); 00188 } 00189 #ifndef GRAPHICS_DISABLED 00190 if (textord_show_initial_words) 00191 plot_word_decisions (to_win, (INT16) row->fixed_pitch, row); 00192 #endif 00193 row_index++; 00194 } 00195 delete gapmap; 00196 block_index++; 00197 } 00198 }
Determine if blob is wide.
row | Row being made | |
blob_box | For next_blob |
Definition at line 1636 of file tospace.cpp.
References BOX::height(), narrow_blob(), BOX::width(), and TO_ROW::xheight.
Referenced by block_spacing_stats(), isolated_row_stats(), make_a_word_break(), and row_spacing_stats().
01636 { 01637 BOOL8 result; 01638 01639 if (tosp_wide_fraction > 0) { 01640 if (tosp_wide_aspect_ratio > 0) 01641 result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) && 01642 (((float) blob_box.width () / blob_box.height ()) > 01643 tosp_wide_aspect_ratio)); 01644 else 01645 result = (blob_box.width () >= tosp_wide_fraction * row->xheight); 01646 } 01647 else 01648 result = !narrow_blob (row, blob_box); 01649 return result; 01650 }