#include "mfcpch.h"
#include "stderr.h"
#include "blobbox.h"
#include "lmedsq.h"
#include "statistc.h"
#include "drawtord.h"
#include "makerow.h"
#include "pitsync1.h"
#include "pithsync.h"
#include "blobcmpl.h"
#include "tovars.h"
#include "wordseg.h"
#include "topitch.h"
#include "secname.h"
Go to the source code of this file.
#define BLOCK_STATS_CLUSTERS 10 |
#define EXTERN |
* (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
Definition at line 38 of file topitch.cpp.
#define FIXED_WIDTH_MULTIPLE 5 |
Definition at line 64 of file topitch.cpp.
#define MAX_ALLOWED_PITCH 100 |
EXTERN BOOL_VAR | ( | textord_all_prop | , | |
FALSE | , | |||
"All doc is proportial text" | ||||
) |
Process each block.
Decide whether each block is fixed pitch individually.
Definition at line 337 of file topitch.cpp.
References ASSERT_HOST, BOX::bottom(), compute_rows_pitch(), find_repeated_chars(), BOX::left(), overlap_picture_ops, BOX::right(), BOX::top(), tprintf(), and TRUE.
Referenced by compute_fixed_pitch().
00342 { 00343 BOX block_box; //bounding box 00344 00345 block_box = block->block->bounding_box (); 00346 if (testing_on && textord_debug_pitch_test) { 00347 tprintf ("Block %d at (%d,%d)->(%d,%d)\n", 00348 block_index, 00349 block_box.left (), block_box.bottom (), 00350 block_box.right (), block_box.top ()); 00351 } 00352 block->min_space = (INT32) floor (block->xheight 00353 * textord_words_default_minspace); 00354 block->max_nonspace = (INT32) ceil (block->xheight 00355 * textord_words_default_nonspace); 00356 block->fixed_pitch = 0.0f; 00357 block->space_size = (float) block->min_space; 00358 block->kern_size = (float) block->max_nonspace; 00359 block->pr_nonsp = block->xheight * words_default_prop_nonspace; 00360 block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop; 00361 if (!block->get_rows ()->empty ()) { 00362 ASSERT_HOST (block->xheight > 0); 00363 if (textord_repeat_extraction) 00364 find_repeated_chars(block, textord_show_initial_words &&testing_on); 00365 #ifndef GRAPHICS_DISABLED 00366 if (textord_show_initial_words && testing_on) 00367 overlap_picture_ops(TRUE); 00368 #endif 00369 compute_rows_pitch(block, 00370 block_index, 00371 textord_debug_pitch_test &&testing_on); 00372 } 00373 }
void compute_fixed_pitch | ( | ICOORD | page_tr, | |
TO_BLOCK_LIST * | port_blocks, | |||
float | gradient, | |||
FCOORD | rotation, | |||
BOOL8 | testing_on | |||
) |
Determine pitch.
Decide whether each row is fixed pitch individually. Correlate definite and uncertain results to obtain an individual result for each row in the TO_ROW class.
Definition at line 75 of file topitch.cpp.
References compute_block_pitch(), create_to_win(), fix_row_pitch(), NO_WINDOW, NULL, overlap_picture_ops, print_block_counts(), rotation, to_win, tprintf(), TRUE, try_block_fixed(), try_doc_fixed(), and try_rows_fixed().
Referenced by make_words().
00081 { 00082 TO_BLOCK_IT block_it; //iterator 00083 TO_BLOCK *block; //current block; 00084 TO_ROW_IT row_it; //row iterator 00085 TO_ROW *row; //current row 00086 int block_index; //block number 00087 int row_index; //row number 00088 00089 #ifndef GRAPHICS_DISABLED 00090 if (textord_show_initial_words && testing_on) { 00091 if (to_win == NO_WINDOW) 00092 create_to_win(page_tr); 00093 } 00094 #endif 00095 00096 block_it.set_to_list (port_blocks); 00097 block_index = 1; 00098 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00099 block_it.forward ()) { 00100 block = block_it.data (); 00101 compute_block_pitch(block, rotation, block_index, testing_on); 00102 block_index++; 00103 } 00104 00105 if (!try_doc_fixed (page_tr, port_blocks, gradient)) { 00106 block_index = 1; 00107 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00108 block_it.forward ()) { 00109 block = block_it.data (); 00110 if (!try_block_fixed (block, block_index)) 00111 try_rows_fixed(block, block_index, testing_on); 00112 block_index++; 00113 } 00114 } 00115 00116 block_index = 1; 00117 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00118 block_it.forward ()) { 00119 block = block_it.data (); 00120 row_it.set_to_list (block->get_rows ()); 00121 row_index = 1; 00122 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00123 row = row_it.data (); 00124 fix_row_pitch(row, block, port_blocks, row_index, block_index); 00125 row_index++; 00126 } 00127 if (testing_on 00128 && (textord_debug_pitch_test && block->block->text_region () != NULL 00129 || textord_blocksall_fixed || textord_blocksall_prop)) { 00130 tprintf ("Corr:"); 00131 print_block_counts(block, block_index); 00132 } 00133 block_index++; 00134 } 00135 #ifndef GRAPHICS_DISABLED 00136 if (textord_show_initial_words && testing_on) { 00137 overlap_picture_ops(TRUE); 00138 } 00139 #endif 00140 }
float compute_pitch_sd | ( | TO_ROW * | row, | |
STATS * | projection, | |||
INT16 | projection_left, | |||
INT16 | projection_right, | |||
float | space_size, | |||
float | initial_pitch, | |||
float & | sp_sd, | |||
INT16 & | mid_cuts, | |||
ICOORDELT_LIST * | row_cells, | |||
BOOL8 | testing_on, | |||
INT16 | start, | |||
INT16 | end | |||
) |
Find fp cells.
Use a dp algorithm to fit the character cells and return the sd of the cell size over the row.
Definition at line 1412 of file topitch.cpp.
References blob_count, TO_ROW::blob_list(), box_next(), check_pitch_sync(), check_pitch_sync2(), compute_pitch_sd2(), CORAL, GOLDENROD, TO_ROW::intercept(), BOX::left(), STATS::plot(), plot_fp_cells2(), projection, BOX::right(), seg_list, ICOORD::set_x(), to_win, BOX::top(), tprintf(), and TO_ROW::xheight.
Referenced by tune_row_pitch(), and tune_row_pitch2().
01425 { 01426 INT16 occupation; //no of cells in word. 01427 //blobs 01428 BLOBNBOX_IT blob_it = row->blob_list (); 01429 BLOBNBOX_IT start_it; //start of word 01430 BLOBNBOX_IT plot_it; //for plotting 01431 INT16 blob_count; //no of blobs 01432 BOX blob_box; //bounding box 01433 BOX prev_box; //of super blob 01434 INT32 prev_right; //of word sync 01435 int scale_factor; //on scores for big words 01436 INT32 sp_count; //spaces 01437 FPSEGPT_LIST seg_list; //char cells 01438 FPSEGPT_IT seg_it; //iterator 01439 INT16 segpos; //position of segment 01440 INT16 cellpos; //previous cell boundary 01441 //iterator 01442 ICOORDELT_IT cell_it = row_cells; 01443 ICOORDELT *cell; //new cell 01444 double sqsum; //sum of squares 01445 double spsum; //of spaces 01446 double sp_var; //space error 01447 double word_sync; //result for word 01448 INT32 total_count; //total blobs 01449 01450 if ((pitsync_linear_version & 3) > 1) { 01451 word_sync = compute_pitch_sd2 (row, projection, projection_left, 01452 projection_right, initial_pitch, 01453 occupation, mid_cuts, row_cells, 01454 testing_on, start, end); 01455 sp_sd = occupation; 01456 return word_sync; 01457 } 01458 mid_cuts = 0; 01459 cellpos = 0; 01460 total_count = 0; 01461 sqsum = 0; 01462 sp_count = 0; 01463 spsum = 0; 01464 prev_right = -1; 01465 if (blob_it.empty ()) 01466 return space_size * 10; 01467 #ifndef GRAPHICS_DISABLED 01468 if (testing_on && to_win > 0) { 01469 blob_box = blob_it.data ()->bounding_box (); 01470 projection->plot (to_win, projection_left, 01471 row->intercept (), 1.0f, -1.0f, CORAL); 01472 } 01473 #endif 01474 start_it = blob_it; 01475 blob_count = 0; 01476 blob_box = box_next (&blob_it);//first blob 01477 blob_it.mark_cycle_pt (); 01478 do { 01479 for (; blob_count > 0; blob_count--) 01480 box_next(&start_it); 01481 do { 01482 prev_box = blob_box; 01483 blob_count++; 01484 blob_box = box_next (&blob_it); 01485 } 01486 while (!blob_it.cycled_list () 01487 && blob_box.left () - prev_box.right () < space_size); 01488 plot_it = start_it; 01489 if (pitsync_linear_version & 3) 01490 word_sync = 01491 check_pitch_sync2 (&start_it, blob_count, (INT16) initial_pitch, 2, 01492 projection, projection_left, projection_right, 01493 row->xheight * textord_projection_scale, 01494 occupation, &seg_list, start, end); 01495 else 01496 word_sync = 01497 check_pitch_sync (&start_it, blob_count, (INT16) initial_pitch, 2, 01498 projection, &seg_list); 01499 if (testing_on) { 01500 tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ", 01501 prev_box.right (), prev_box.top (), 01502 seg_list.length () - 1, word_sync); 01503 seg_it.set_to_list (&seg_list); 01504 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); 01505 seg_it.forward ()) { 01506 if (seg_it.data ()->faked) 01507 tprintf ("(F)"); 01508 tprintf ("%d, ", seg_it.data ()->position ()); 01509 // tprintf("C=%g, s=%g, sq=%g\n", 01510 // seg_it.data()->cost_function(), 01511 // seg_it.data()->sum(), 01512 // seg_it.data()->squares()); 01513 } 01514 tprintf ("\n"); 01515 } 01516 #ifndef GRAPHICS_DISABLED 01517 if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0) 01518 plot_fp_cells2(to_win, GOLDENROD, row, &seg_list); 01519 #endif 01520 seg_it.set_to_list (&seg_list); 01521 if (prev_right >= 0) { 01522 sp_var = seg_it.data ()->position () - prev_right; 01523 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; 01524 sp_var *= sp_var; 01525 spsum += sp_var; 01526 sp_count++; 01527 } 01528 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { 01529 segpos = seg_it.data ()->position (); 01530 if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) { 01531 //big gap 01532 while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) { 01533 cell = new ICOORDELT (cellpos + (INT16) initial_pitch, 0); 01534 cell_it.add_after_then_move (cell); 01535 cellpos += (INT16) initial_pitch; 01536 } 01537 //make new one 01538 cell = new ICOORDELT (segpos, 0); 01539 cell_it.add_after_then_move (cell); 01540 cellpos = segpos; 01541 } 01542 else if (segpos > cellpos - initial_pitch / 2) { 01543 cell = cell_it.data (); 01544 //average positions 01545 cell->set_x ((cellpos + segpos) / 2); 01546 cellpos = cell->x (); 01547 } 01548 } 01549 seg_it.move_to_last (); 01550 prev_right = seg_it.data ()->position (); 01551 if (textord_pitch_scalebigwords) { 01552 scale_factor = (seg_list.length () - 2) / 2; 01553 if (scale_factor < 1) 01554 scale_factor = 1; 01555 } 01556 else 01557 scale_factor = 1; 01558 sqsum += word_sync * scale_factor; 01559 total_count += (seg_list.length () - 1) * scale_factor; 01560 seg_list.clear (); 01561 } 01562 while (!blob_it.cycled_list ()); 01563 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; 01564 return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; 01565 }
float compute_pitch_sd2 | ( | TO_ROW * | row, | |
STATS * | projection, | |||
INT16 | projection_left, | |||
INT16 | projection_right, | |||
float | initial_pitch, | |||
INT16 & | occupation, | |||
INT16 & | mid_cuts, | |||
ICOORDELT_LIST * | row_cells, | |||
BOOL8 | testing_on, | |||
INT16 | start, | |||
INT16 | end | |||
) |
Find fp cells.
Use a dp algorithm to fit the character cells and return the sd of the cell size over the row.
Definition at line 1574 of file topitch.cpp.
References blob_count, TO_ROW::blob_list(), box_next(), check_pitch_sync2(), CORAL, GOLDENROD, TO_ROW::intercept(), STATS::plot(), plot_fp_cells2(), projection, BOX::right(), seg_list, to_win, BOX::top(), tprintf(), and TO_ROW::xheight.
Referenced by compute_pitch_sd().
01586 { 01587 //blobs 01588 BLOBNBOX_IT blob_it = row->blob_list (); 01589 BLOBNBOX_IT plot_it; 01590 INT16 blob_count; //no of blobs 01591 BOX blob_box; //bounding box 01592 FPSEGPT_LIST seg_list; //char cells 01593 FPSEGPT_IT seg_it; //iterator 01594 INT16 segpos; //position of segment 01595 //iterator 01596 ICOORDELT_IT cell_it = row_cells; 01597 ICOORDELT *cell; //new cell 01598 double word_sync; //result for word 01599 01600 mid_cuts = 0; 01601 if (blob_it.empty ()) { 01602 occupation = 0; 01603 return initial_pitch * 10; 01604 } 01605 #ifndef GRAPHICS_DISABLED 01606 if (testing_on && to_win > 0) { 01607 projection->plot (to_win, projection_left, 01608 row->intercept (), 1.0f, -1.0f, CORAL); 01609 } 01610 #endif 01611 blob_count = 0; 01612 blob_it.mark_cycle_pt (); 01613 do { 01614 //first blob 01615 blob_box = box_next (&blob_it); 01616 blob_count++; 01617 } 01618 while (!blob_it.cycled_list ()); 01619 plot_it = blob_it; 01620 word_sync = check_pitch_sync2 (&blob_it, blob_count, (INT16) initial_pitch, 01621 2, projection, projection_left, 01622 projection_right, 01623 row->xheight * textord_projection_scale, 01624 occupation, &seg_list, start, end); 01625 if (testing_on) { 01626 tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ", 01627 blob_box.right (), blob_box.top (), 01628 seg_list.length () - 1, word_sync); 01629 seg_it.set_to_list (&seg_list); 01630 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { 01631 if (seg_it.data ()->faked) 01632 tprintf ("(F)"); 01633 tprintf ("%d, ", seg_it.data ()->position ()); 01634 // tprintf("C=%g, s=%g, sq=%g\n", 01635 // seg_it.data()->cost_function(), 01636 // seg_it.data()->sum(), 01637 // seg_it.data()->squares()); 01638 } 01639 tprintf ("\n"); 01640 } 01641 #ifndef GRAPHICS_DISABLED 01642 if (textord_show_fixed_cuts && blob_count > 0 && to_win > 0) 01643 plot_fp_cells2(to_win, GOLDENROD, row, &seg_list); 01644 #endif 01645 seg_it.set_to_list (&seg_list); 01646 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { 01647 segpos = seg_it.data ()->position (); 01648 //make new one 01649 cell = new ICOORDELT (segpos, 0); 01650 cell_it.add_after_then_move (cell); 01651 if (seg_it.at_last ()) 01652 mid_cuts = seg_it.data ()->cheap_cuts (); 01653 } 01654 seg_list.clear (); 01655 return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10; 01656 }
Find line stats.
Decide whether each row is fixed pitch individually.
Definition at line 381 of file topitch.cpp.
References ASSERT_HOST, TO_ROW::compute_vertical_projection(), FALSE, find_row_pitch(), PITCH_DUNNO, row_pitch_stats(), and TO_ROW::xheight.
Referenced by compute_block_pitch().
00385 { 00386 INT32 maxwidth; //of spaces 00387 TO_ROW *row; //current row 00388 INT32 row_index; //row number. 00389 float lower, upper; //cluster thresholds 00390 TO_ROW_IT row_it = block->get_rows (); 00391 00392 row_index = 1; 00393 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00394 row = row_it.data (); 00395 ASSERT_HOST (row->xheight > 0); 00396 row->compute_vertical_projection (); 00397 maxwidth = (INT32) ceil (row->xheight * textord_words_maxspace); 00398 if (row_pitch_stats (row, maxwidth, testing_on) 00399 && find_row_pitch (row, maxwidth, 00400 textord_dotmatrix_gap + 1, block, block_index, 00401 row_index, testing_on)) { 00402 if (row->fixed_pitch == 0) { 00403 lower = row->pr_nonsp; 00404 upper = row->pr_space; 00405 row->space_size = upper; 00406 row->kern_size = lower; 00407 } 00408 } 00409 else { 00410 row->fixed_pitch = 0.0f; //insufficient data 00411 row->pitch_decision = PITCH_DUNNO; 00412 } 00413 row_index++; 00414 } 00415 return FALSE; 00416 }
void count_block_votes | ( | TO_BLOCK * | block, | |
INT32 & | def_fixed, | |||
INT32 & | def_prop, | |||
INT32 & | maybe_fixed, | |||
INT32 & | maybe_prop, | |||
INT32 & | corr_fixed, | |||
INT32 & | corr_prop, | |||
INT32 & | dunno | |||
) |
Find line stats.
Count the number of rows in the block with each kind of pitch_decision.
Definition at line 694 of file topitch.cpp.
References PITCH_CORR_FIXED, PITCH_CORR_PROP, TO_ROW::pitch_decision, PITCH_DEF_FIXED, PITCH_DEF_PROP, PITCH_DUNNO, PITCH_MAYBE_FIXED, and PITCH_MAYBE_PROP.
Referenced by print_block_counts(), and try_rows_fixed().
00702 { 00703 TO_ROW *row; //current row 00704 TO_ROW_IT row_it = block->get_rows (); 00705 00706 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00707 row = row_it.data (); 00708 switch (row->pitch_decision) { 00709 case PITCH_DUNNO: 00710 dunno++; 00711 break; 00712 case PITCH_DEF_PROP: 00713 def_prop++; 00714 break; 00715 case PITCH_MAYBE_PROP: 00716 maybe_prop++; 00717 break; 00718 case PITCH_DEF_FIXED: 00719 def_fixed++; 00720 break; 00721 case PITCH_MAYBE_FIXED: 00722 maybe_fixed++; 00723 break; 00724 case PITCH_CORR_PROP: 00725 corr_prop++; 00726 break; 00727 case PITCH_CORR_FIXED: 00728 corr_fixed++; 00729 break; 00730 } 00731 } 00732 }
BOOL8 count_pitch_stats | ( | TO_ROW * | row, | |
STATS * | gap_stats, | |||
STATS * | pitch_stats, | |||
float | initial_pitch, | |||
float | min_space, | |||
BOOL8 | ignore_outsize, | |||
BOOL8 | split_outsize, | |||
INT32 | dm_gap | |||
) |
Find lines.
Definition at line 1098 of file topitch.cpp.
References STATS::add(), TO_ROW::blob_list(), STATS::clear(), FALSE, STATS::get_total(), BOX::left(), NULL, BOX::right(), and BOX::width().
Referenced by find_row_pitch().
01107 { 01108 BOOL8 prev_valid; //not word broken 01109 BLOBNBOX *blob; //current blob 01110 //blobs 01111 BLOBNBOX_IT blob_it = row->blob_list (); 01112 INT32 prev_right; //end of prev blob 01113 INT32 prev_centre; //centre of previous blob 01114 INT32 x_centre; //centre of this blob 01115 INT32 blob_width; //width of blob 01116 INT32 width_units; //no of widths in blob 01117 float width; //blob width 01118 BOX blob_box; //bounding box 01119 BOX joined_box; //of super blob 01120 01121 gap_stats->clear (); 01122 pitch_stats->clear (); 01123 if (blob_it.empty ()) 01124 return FALSE; 01125 prev_valid = FALSE; 01126 prev_centre = 0; 01127 prev_right = 0; //stop complier warning 01128 joined_box = blob_it.data ()->bounding_box (); 01129 do { 01130 blob_it.forward (); 01131 blob = blob_it.data (); 01132 if (!blob->joined_to_prev ()) { 01133 blob_box = blob->bounding_box (); 01134 if (blob_box.left () - joined_box.right () < dm_gap 01135 && !blob_it.at_first () 01136 || blob->cblob () == NULL && blob->blob () == NULL) 01137 joined_box += blob_box; //merge blobs 01138 else { 01139 blob_width = joined_box.width (); 01140 if (split_outsize) { 01141 width_units = 01142 (INT32) floor ((float) blob_width / initial_pitch + 0.5); 01143 if (width_units < 1) 01144 width_units = 1; 01145 width_units--; 01146 } 01147 else if (ignore_outsize) { 01148 width = (float) blob_width / initial_pitch; 01149 width_units = width < 1 + words_default_fixed_limit 01150 && width > 1 - words_default_fixed_limit ? 0 : -1; 01151 } 01152 else 01153 width_units = 0; //everything in 01154 x_centre = (INT32) (joined_box.left () 01155 + (blob_width - 01156 width_units * initial_pitch) / 2); 01157 if (prev_valid && width_units >= 0) { 01158 // if (width_units>0) 01159 // { 01160 // tprintf("wu=%d, width=%d, xc=%d, adding %d\n", 01161 // width_units,blob_width,x_centre,x_centre-prev_centre); 01162 // } 01163 gap_stats->add (joined_box.left () - prev_right, 1); 01164 pitch_stats->add (x_centre - prev_centre, 1); 01165 } 01166 prev_centre = (INT32) (x_centre + width_units * initial_pitch); 01167 prev_right = joined_box.right (); 01168 prev_valid = blob_box.left () - joined_box.right () < min_space; 01169 prev_valid = prev_valid && width_units >= 0; 01170 joined_box = blob_box; 01171 } 01172 } 01173 } 01174 while (!blob_it.at_first ()); 01175 return gap_stats->get_total () >= 3; 01176 }
Search for equal chars.
Find 4 or more adjacent chars which are the same and put them into words in advance of fixed pitch checking and word generation.
Definition at line 1812 of file topitch.cpp.
References TO_ROW::blob_list(), BOX::bottom(), WERD::bounding_box(), compare_blobs(), FALSE, INT_HOLLOW, interior_style, BOX::left(), make_real_word(), MAX_INT16, NULL, perimeter_color_index, rectangle, RED, TO_ROW::rep_words, BOX::right(), WERD::set_flag(), to_win, BOX::top(), tprintf(), TRUE, W_DONT_CHOP, and W_REP_CHAR.
Referenced by compute_block_pitch().
01815 { 01816 BOOL8 bol; //start of line 01817 TO_ROW *row; //current row 01818 TO_ROW_IT row_it = block->get_rows (); 01819 ROW *real_row; //output row 01820 WERD_IT word_it; //new words 01821 WERD *word; //new word 01822 BLOBNBOX *bblob; //current blob 01823 BLOBNBOX *nextblob; //neighbour to compare 01824 BLOBNBOX_IT box_it; //iterator 01825 BLOBNBOX_IT search_it; //forward search 01826 INT32 blobcount; //no of neighbours 01827 INT32 matched_blobcount; //no of matches 01828 INT32 blobindex; //in row 01829 INT32 row_length; //blobs in row 01830 INT32 width_change; //max width change 01831 INT32 blob_width; //required blob width 01832 INT32 space_width; //required gap width 01833 INT32 prev_right; //right edge of last blob 01834 float rating; //match rating 01835 PBLOB *pblob1; //polygonal blob 01836 PBLOB *pblob2; //second blob 01837 BOX word_box; //for plotting 01838 01839 if (row_it.empty ()) 01840 return; //empty block 01841 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01842 row = row_it.data (); 01843 box_it.set_to_list (row->blob_list ()); 01844 row_length = row->blob_list ()->length (); 01845 blobindex = 0; 01846 word_it.set_to_list (&row->rep_words); 01847 bol = TRUE; 01848 if (!box_it.empty ()) { 01849 real_row = new ROW (row, 01850 (INT16) block->kern_size, 01851 (INT16) block->space_size); 01852 do { 01853 bblob = box_it.data (); 01854 blobcount = 1; 01855 search_it = box_it; 01856 search_it.forward (); 01857 matched_blobcount = 1; 01858 width_change = MAX_INT16; 01859 blob_width = 0; 01860 space_width = 0; 01861 prev_right = bblob->bounding_box ().right (); 01862 if (bblob->bounding_box ().height () * 2 < row->xheight 01863 && !bblob->joined_to_prev () 01864 && (bblob->blob () != NULL || bblob->cblob () != NULL)) { 01865 if (bblob->cblob () != NULL) 01866 pblob1 = new PBLOB (bblob->cblob (), row->xheight); 01867 else 01868 pblob1 = bblob->blob (); 01869 01870 rating = 0.0f; 01871 while (rating < textord_repeat_rating 01872 && blobindex + blobcount < row_length 01873 && ((nextblob = search_it.data ())->blob () != NULL 01874 || nextblob->cblob () != NULL) 01875 && nextblob->bounding_box ().height () * 2 < 01876 row->xheight) { 01877 if (blobcount == 1) { 01878 space_width = nextblob->bounding_box ().left () 01879 - bblob->bounding_box ().right (); 01880 blob_width = bblob->bounding_box ().width (); 01881 width_change = 01882 blob_width > 01883 space_width ? blob_width : space_width; 01884 width_change = 01885 (INT32) (width_change * 01886 textord_repch_width_variance); 01887 if (width_change < 3) 01888 width_change = 3; 01889 } 01890 if (nextblob->bounding_box ().width () > 01891 blob_width + width_change 01892 || nextblob->bounding_box ().width () < 01893 blob_width - width_change 01894 || nextblob->bounding_box ().left () - prev_right > 01895 space_width + width_change 01896 || nextblob->bounding_box ().left () - prev_right < 01897 space_width - width_change) { 01898 if (testing_on) 01899 tprintf 01900 ("Repch terminated:bw=%d, sw=%d, wc=%d, pr=%d, nb=(%d,%d)\n", 01901 blob_width, space_width, width_change, 01902 prev_right, nextblob->bounding_box ().left (), 01903 nextblob->bounding_box ().right ()); 01904 break; //not good enough 01905 } 01906 if (nextblob->blob () != NULL) 01907 rating = compare_blobs (pblob1, real_row, 01908 nextblob->blob (), real_row); 01909 else { 01910 pblob2 = 01911 new PBLOB (nextblob->cblob (), row->xheight); 01912 rating = 01913 compare_blobs(pblob1, real_row, pblob2, real_row); 01914 delete pblob2; 01915 } 01916 if (rating < textord_repeat_rating) { 01917 // if (testing_on) 01918 // tprintf("Blob at (%d,%d)->(%d,%d) had rating %g\n", 01919 // nextblob->bounding_box().left(), 01920 // nextblob->bounding_box().bottom(), 01921 // nextblob->bounding_box().right(), 01922 // nextblob->bounding_box().top(), 01923 // rating); 01924 blobcount++; 01925 search_it.forward (); 01926 matched_blobcount++; 01927 while (blobindex + blobcount < row_length 01928 && search_it.data ()->joined_to_prev ()) { 01929 search_it.forward (); 01930 blobcount++; //suck in joined bits 01931 } 01932 } 01933 prev_right = nextblob->bounding_box ().right (); 01934 } 01935 if (bblob->cblob () != NULL) 01936 delete pblob1; 01937 01938 if (matched_blobcount >= textord_repeat_threshold) { 01939 word = 01940 make_real_word (&box_it, blobcount, bol, FALSE, FALSE, 01941 1); 01942 #ifndef GRAPHICS_DISABLED 01943 if (testing_on) { 01944 word_box = word->bounding_box (); 01945 tprintf 01946 ("Found repeated word of %d blobs (%d matched) from (%d,%d)->(%d,%d)\n", 01947 blobcount, matched_blobcount, word_box.left (), 01948 word_box.bottom (), word_box.right (), 01949 word_box.top ()); 01950 perimeter_color_index(to_win, RED); 01951 interior_style(to_win, INT_HOLLOW, TRUE); 01952 rectangle (to_win, word_box.left (), 01953 word_box.bottom (), word_box.right (), 01954 word_box.top ()); 01955 } 01956 #endif 01957 word->set_flag (W_REP_CHAR, TRUE); 01958 word->set_flag (W_DONT_CHOP, TRUE); 01959 word_it.add_after_then_move (word); 01960 blobindex += blobcount; 01961 } 01962 } 01963 bol = FALSE; 01964 box_it.forward (); //next one 01965 blobindex++; 01966 } 01967 //until all done 01968 while (!box_it.at_first ()); 01969 delete real_row; 01970 } 01971 } 01972 }
BOOL8 find_row_pitch | ( | TO_ROW * | row, | |
INT32 | maxwidth, | |||
INT32 | dm_gap, | |||
TO_BLOCK * | block, | |||
INT32 | block_index, | |||
INT32 | row_index, | |||
BOOL8 | testing_on | |||
) |
Find lines.
Check to see if this row could be fixed pitch using the given spacings. Blobs with gaps smaller than the lower threshold are assumed to be one. The larger threshold is the word gap threshold.
Definition at line 872 of file topitch.cpp.
References STATS::clear(), count_pitch_stats(), FALSE, TO_ROW::fixed_pitch, TO_ROW::fp_nonsp, TO_ROW::fp_space, STATS::ile(), NULL, pitch, PITCH_DUNNO, PITCH_MAYBE_FIXED, PITCH_MAYBE_PROP, tprintf(), TRUE, and TO_ROW::xheight.
Referenced by compute_rows_pitch().
00880 { 00881 BOOL8 used_dm_model; //looks lik dot matrix 00882 float min_space; //estimate threshold 00883 float non_space; //gap size 00884 float gap_iqr; //interquartile range 00885 float pitch_iqr; 00886 float dm_gap_iqr; //interquartile range 00887 float dm_pitch_iqr; 00888 float dm_pitch; //pitch with dm on 00889 float pitch; //revised estimate 00890 float initial_pitch; //guess at pitch 00891 STATS gap_stats (0, maxwidth); 00892 //centre-centre 00893 STATS pitch_stats (0, maxwidth); 00894 00895 row->fixed_pitch = 0.0f; 00896 initial_pitch = row->fp_space; 00897 if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) 00898 initial_pitch = row->xheight;//keep pitch decent 00899 non_space = row->fp_nonsp; 00900 if (non_space > initial_pitch) 00901 non_space = initial_pitch; 00902 min_space = (initial_pitch + non_space) / 2; 00903 00904 if (!count_pitch_stats (row, &gap_stats, &pitch_stats, 00905 initial_pitch, min_space, TRUE, FALSE, dm_gap)) { 00906 dm_gap_iqr = 0.0001; 00907 dm_pitch_iqr = maxwidth * 2.0f; 00908 dm_pitch = initial_pitch; 00909 } 00910 else { 00911 dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); 00912 dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); 00913 dm_pitch = pitch_stats.ile (0.5); 00914 } 00915 gap_stats.clear (); 00916 pitch_stats.clear (); 00917 if (!count_pitch_stats (row, &gap_stats, &pitch_stats, 00918 initial_pitch, min_space, TRUE, FALSE, 0)) { 00919 gap_iqr = 0.0001; 00920 pitch_iqr = maxwidth * 3.0f; 00921 } 00922 else { 00923 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); 00924 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); 00925 if (testing_on) 00926 tprintf 00927 ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", 00928 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); 00929 initial_pitch = pitch_stats.ile (0.5); 00930 if (min_space > initial_pitch 00931 && count_pitch_stats (row, &gap_stats, &pitch_stats, 00932 initial_pitch, initial_pitch, TRUE, FALSE, 0)) { 00933 min_space = initial_pitch; 00934 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); 00935 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); 00936 if (testing_on) 00937 tprintf 00938 ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", 00939 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); 00940 initial_pitch = pitch_stats.ile (0.5); 00941 } 00942 } 00943 if (textord_debug_pitch_metric) 00944 tprintf ("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", 00945 block_index, row_index, 00946 block->block->text_region () != NULL ? 00947 (block->block->text_region ()->is_prop ()? 'P' : 'F') : 'X', 00948 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr, 00949 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' 00950 : (pitch_iqr * dm_gap_iqr <= 00951 dm_pitch_iqr * gap_iqr ? 'S' : 'M')); 00952 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) { 00953 row->pitch_decision = PITCH_DUNNO; 00954 if (textord_debug_pitch_metric) 00955 tprintf ("\n"); 00956 return FALSE; //insufficient data 00957 } 00958 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) { 00959 if (testing_on) 00960 tprintf 00961 ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", 00962 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); 00963 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); 00964 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); 00965 pitch = pitch_stats.ile (0.5); 00966 used_dm_model = FALSE; 00967 } 00968 else { 00969 if (testing_on) 00970 tprintf 00971 ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", 00972 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); 00973 gap_iqr = dm_gap_iqr; 00974 pitch_iqr = dm_pitch_iqr; 00975 pitch = dm_pitch; 00976 used_dm_model = TRUE; 00977 } 00978 if (textord_debug_pitch_metric) { 00979 tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", 00980 pitch_iqr, gap_iqr, pitch); 00981 tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", 00982 pitch_iqr / gap_iqr, pitch_iqr / block->xheight, 00983 pitch_iqr < gap_iqr * textord_fpiqr_ratio 00984 && pitch_iqr < block->xheight * textord_max_pitch_iqr 00985 && pitch < block->xheight * textord_words_default_maxspace 00986 ? 'F' : 'P'); 00987 } 00988 if (pitch_iqr < gap_iqr * textord_fpiqr_ratio 00989 && pitch_iqr < block->xheight * textord_max_pitch_iqr 00990 && pitch < block->xheight * textord_words_default_maxspace) 00991 row->pitch_decision = PITCH_MAYBE_FIXED; 00992 else 00993 row->pitch_decision = PITCH_MAYBE_PROP; 00994 row->fixed_pitch = pitch; 00995 row->kern_size = gap_stats.ile (0.5); 00996 row->min_space = (INT32) (row->fixed_pitch + non_space) / 2; 00997 if (row->min_space > row->fixed_pitch) 00998 row->min_space = (INT32) row->fixed_pitch; 00999 row->max_nonspace = row->min_space; 01000 row->space_size = row->fixed_pitch; 01001 row->space_threshold = (row->max_nonspace + row->min_space) / 2; 01002 row->used_dm_model = used_dm_model; 01003 return TRUE; 01004 }
void fix_row_pitch | ( | TO_ROW * | bad_row, | |
TO_BLOCK * | bad_block, | |||
TO_BLOCK_LIST * | blocks, | |||
INT32 | row_target, | |||
INT32 | block_target | |||
) |
Get some value.
Get a pitch_decision for this row by voting among similar rows in the block, then similar rows over all the page, or any other rows at all.
Definition at line 149 of file topitch.cpp.
References STATS::add(), TO_ROW::all_caps, TO_ROW::ascrise, TO_ROW::char_cells, FALSE, TO_ROW::fixed_pitch, STATS::ile(), TO_ROW::kern_size, TO_ROW::max_nonspace, TO_ROW::min_space, NULL, PITCH_CORR_FIXED, PITCH_CORR_PROP, TO_ROW::pitch_decision, PITCH_DEF_FIXED, PITCH_DEF_PROP, PITCH_MAYBE_FIXED, PITCH_MAYBE_PROP, TO_ROW::projection, STATS::set_range(), TO_ROW::space_size, TO_ROW::space_threshold, tprintf(), tune_row_pitch(), and TO_ROW::xheight.
Referenced by compute_fixed_pitch().
00155 { 00156 const char *res_string; //decision on line 00157 INT16 mid_cuts; 00158 int block_votes; //votes in block 00159 int like_votes; //votes over page 00160 int other_votes; //votes of unlike blocks 00161 int block_index; //number of block 00162 int row_index; //number of row 00163 int maxwidth; //max pitch 00164 TO_BLOCK_IT block_it = blocks; //block iterator 00165 TO_ROW_IT row_it; 00166 TO_BLOCK *block; //current block 00167 TO_ROW *row; //current row 00168 float sp_sd; //space deviation 00169 STATS block_stats; //pitches in block 00170 STATS like_stats; //pitches in page 00171 00172 block_votes = like_votes = other_votes = 0; 00173 maxwidth = (INT32) ceil (bad_row->xheight * textord_words_maxspace); 00174 if (bad_row->pitch_decision != PITCH_DEF_FIXED 00175 && bad_row->pitch_decision != PITCH_DEF_PROP) { 00176 block_stats.set_range (0, maxwidth); 00177 like_stats.set_range (0, maxwidth); 00178 block_index = 1; 00179 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00180 block_it.forward ()) { 00181 block = block_it.data (); 00182 row_index = 1; 00183 row_it.set_to_list (block->get_rows ()); 00184 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); 00185 row_it.forward ()) { 00186 row = row_it.data (); 00187 if (bad_row->all_caps 00188 && row->xheight + row->ascrise 00189 < 00190 (bad_row->xheight + bad_row->ascrise) * (1 + 00191 textord_pitch_rowsimilarity) 00192 && row->xheight + row->ascrise > 00193 (bad_row->xheight + bad_row->ascrise) * (1 - 00194 textord_pitch_rowsimilarity) 00195 || !bad_row->all_caps 00196 && row->xheight < 00197 bad_row->xheight * (1 + textord_pitch_rowsimilarity) 00198 && row->xheight > 00199 bad_row->xheight * (1 - textord_pitch_rowsimilarity)) { 00200 if (block_index == block_target) { 00201 if (row->pitch_decision == PITCH_DEF_FIXED) { 00202 block_votes += textord_words_veto_power; 00203 block_stats.add ((INT32) row->fixed_pitch, 00204 textord_words_veto_power); 00205 } 00206 else if (row->pitch_decision == PITCH_MAYBE_FIXED 00207 || row->pitch_decision == PITCH_CORR_FIXED) { 00208 block_votes++; 00209 block_stats.add ((INT32) row->fixed_pitch, 1); 00210 } 00211 else if (row->pitch_decision == PITCH_DEF_PROP) 00212 block_votes -= textord_words_veto_power; 00213 else if (row->pitch_decision == PITCH_MAYBE_PROP 00214 || row->pitch_decision == PITCH_CORR_PROP) 00215 block_votes--; 00216 } 00217 else { 00218 if (row->pitch_decision == PITCH_DEF_FIXED) { 00219 like_votes += textord_words_veto_power; 00220 like_stats.add ((INT32) row->fixed_pitch, 00221 textord_words_veto_power); 00222 } 00223 else if (row->pitch_decision == PITCH_MAYBE_FIXED 00224 || row->pitch_decision == PITCH_CORR_FIXED) { 00225 like_votes++; 00226 like_stats.add ((INT32) row->fixed_pitch, 1); 00227 } 00228 else if (row->pitch_decision == PITCH_DEF_PROP) 00229 like_votes -= textord_words_veto_power; 00230 else if (row->pitch_decision == PITCH_MAYBE_PROP 00231 || row->pitch_decision == PITCH_CORR_PROP) 00232 like_votes--; 00233 } 00234 } 00235 else { 00236 if (row->pitch_decision == PITCH_DEF_FIXED) 00237 other_votes += textord_words_veto_power; 00238 else if (row->pitch_decision == PITCH_MAYBE_FIXED 00239 || row->pitch_decision == PITCH_CORR_FIXED) 00240 other_votes++; 00241 else if (row->pitch_decision == PITCH_DEF_PROP) 00242 other_votes -= textord_words_veto_power; 00243 else if (row->pitch_decision == PITCH_MAYBE_PROP 00244 || row->pitch_decision == PITCH_CORR_PROP) 00245 other_votes--; 00246 } 00247 row_index++; 00248 } 00249 block_index++; 00250 } 00251 if (block_votes > textord_words_veto_power) { 00252 bad_row->fixed_pitch = block_stats.ile (0.5); 00253 bad_row->pitch_decision = PITCH_CORR_FIXED; 00254 } 00255 else if (block_votes <= textord_words_veto_power && like_votes > 0) { 00256 bad_row->fixed_pitch = like_stats.ile (0.5); 00257 bad_row->pitch_decision = PITCH_CORR_FIXED; 00258 } 00259 else { 00260 bad_row->pitch_decision = PITCH_CORR_PROP; 00261 #ifndef SECURE_NAMES 00262 if (block_votes == 0 && like_votes == 0 && other_votes > 0 00263 && (textord_debug_pitch_test || textord_debug_pitch_metric)) 00264 tprintf 00265 ("Warning:row %d of block %d set prop with no like rows against trend\n", 00266 row_target, block_target); 00267 #endif 00268 } 00269 } 00270 if (textord_debug_pitch_metric) { 00271 tprintf (":b_votes=%d:l_votes=%d:o_votes=%d", 00272 block_votes, like_votes, other_votes); 00273 if (bad_row->pitch_decision == PITCH_CORR_PROP 00274 || bad_row->pitch_decision == PITCH_DEF_PROP) { 00275 res_string = bad_block->block->text_region () != NULL ? 00276 (bad_block->block->text_region ()-> 00277 is_prop ()? "CP" : "WP") : "XP"; 00278 } 00279 else { 00280 res_string = bad_block->block->text_region () != NULL ? 00281 (bad_block->block->text_region ()-> 00282 is_prop ()? "WF" : "CF") : "XF"; 00283 } 00284 tprintf (":Blk=%d:Row=%d:%c:", 00285 block_target, row_target, 00286 bad_block->block->text_region () != NULL ? 00287 (bad_block->block->text_region ()-> 00288 is_prop ()? 'P' : 'F') : 'X'); 00289 tprintf ("x=%g:asc=%g:corr_res=%s\n", bad_row->xheight, 00290 bad_row->ascrise, res_string); 00291 } 00292 if (textord_pitch_cheat && bad_block->block->text_region () != NULL) 00293 bad_row->pitch_decision = 00294 bad_block->block->text_region ()-> 00295 is_prop ()? PITCH_CORR_PROP : PITCH_CORR_FIXED; 00296 if (bad_row->pitch_decision == PITCH_CORR_FIXED) { 00297 if (bad_row->fixed_pitch < textord_min_xheight) { 00298 if (block_votes > 0) 00299 bad_row->fixed_pitch = block_stats.ile (0.5); 00300 else if (block_votes == 0 && like_votes > 0) 00301 bad_row->fixed_pitch = like_stats.ile (0.5); 00302 else { 00303 tprintf 00304 ("Warning:guessing pitch as xheight on row %d, block %d\n", 00305 row_target, block_target); 00306 bad_row->fixed_pitch = bad_row->xheight; 00307 } 00308 } 00309 if (bad_row->fixed_pitch < textord_min_xheight) 00310 bad_row->fixed_pitch = (float) textord_min_xheight; 00311 bad_row->kern_size = bad_row->fixed_pitch / 4; 00312 bad_row->min_space = (INT32) (bad_row->fixed_pitch * 0.6); 00313 bad_row->max_nonspace = (INT32) (bad_row->fixed_pitch * 0.4); 00314 bad_row->space_threshold = 00315 (bad_row->min_space + bad_row->max_nonspace) / 2; 00316 bad_row->space_size = bad_row->fixed_pitch; 00317 if (bad_row->char_cells.empty ()) 00318 tune_row_pitch (bad_row, &bad_row->projection, 00319 bad_row->projection_left, bad_row->projection_right, 00320 (bad_row->fixed_pitch + 00321 bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch, 00322 sp_sd, mid_cuts, &bad_row->char_cells, FALSE); 00323 } 00324 else if (bad_row->pitch_decision == PITCH_CORR_PROP 00325 || bad_row->pitch_decision == PITCH_DEF_PROP) { 00326 bad_row->fixed_pitch = 0.0f; 00327 bad_row->char_cells.clear (); 00328 } 00329 }
Find lines.
Check to see if this row could be fixed pitch using the given spacings. Blobs with gaps smaller than the lower threshold are assumed to be one. The larger threshold is the word gap threshold.
Definition at line 1015 of file topitch.cpp.
References TO_ROW::all_caps, TO_ROW::char_cells, TO_ROW::fixed_pitch, TO_ROW::fp_nonsp, TO_ROW::pitch_decision, PITCH_DEF_FIXED, PITCH_DEF_PROP, PITCH_DUNNO, PITCH_MAYBE_FIXED, PITCH_MAYBE_PROP, TO_ROW::projection, TO_ROW::projection_left, TO_ROW::projection_right, tprintf(), TRUE, tune_row_pitch(), and TO_ROW::used_dm_model.
Referenced by try_rows_fixed().
01018 { 01019 const char *res_string; //pitch result 01020 INT16 mid_cuts; //no of cheap cuts 01021 float non_space; //gap size 01022 float pitch_sd; //error on pitch 01023 float sp_sd; //space sd 01024 01025 non_space = row->fp_nonsp; 01026 if (non_space > row->fixed_pitch) 01027 non_space = row->fixed_pitch; 01028 if (textord_all_prop) { 01029 // Set the decision to definitely proportional. 01030 pitch_sd = textord_words_def_prop * row->fixed_pitch; 01031 row->pitch_decision = PITCH_DEF_PROP; 01032 } else { 01033 pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left, 01034 row->projection_right, 01035 (row->fixed_pitch + non_space * 3) / 4, 01036 row->fixed_pitch, sp_sd, mid_cuts, 01037 &row->char_cells, 01038 block_index == textord_debug_block); 01039 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch 01040 && ((pitsync_linear_version & 3) < 3 01041 || (pitsync_linear_version & 3) >= 3 && (row->used_dm_model 01042 || sp_sd > 20 01043 || pitch_sd == 0 01044 && sp_sd > 10))) { 01045 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch 01046 && !row->all_caps 01047 && ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) 01048 row->pitch_decision = PITCH_DEF_FIXED; 01049 else 01050 row->pitch_decision = PITCH_MAYBE_FIXED; 01051 } 01052 else if ((pitsync_linear_version & 3) < 3 01053 || sp_sd > 20 01054 || mid_cuts > 0 01055 || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) { 01056 if (pitch_sd < textord_words_def_prop * row->fixed_pitch) 01057 row->pitch_decision = PITCH_MAYBE_PROP; 01058 else 01059 row->pitch_decision = PITCH_DEF_PROP; 01060 } 01061 else 01062 row->pitch_decision = PITCH_DUNNO; 01063 } 01064 01065 if (textord_debug_pitch_metric) { 01066 res_string = "??"; 01067 switch (row->pitch_decision) { 01068 case PITCH_DEF_PROP: 01069 res_string = "DP"; 01070 break; 01071 case PITCH_MAYBE_PROP: 01072 res_string = "MP"; 01073 break; 01074 case PITCH_DEF_FIXED: 01075 res_string = "DF"; 01076 break; 01077 case PITCH_MAYBE_FIXED: 01078 res_string = "MF"; 01079 default: 01080 res_string = "??"; 01081 } 01082 tprintf (":sd/p=%g:occ=%g:init_res=%s\n", 01083 pitch_sd / row->fixed_pitch, sp_sd, res_string); 01084 } 01085 return TRUE; 01086 }
void plot_fp_word | ( | TO_BLOCK * | block, | |
float | pitch, | |||
float | nonspace | |||
) |
Draw block of words.
Plot a block of words as if fixed pitch.
Definition at line 1981 of file topitch.cpp.
References TO_ROW::max_nonspace, TO_ROW::min_space, plot_word_decisions(), TO_ROW::space_threshold, and to_win.
01985 { 01986 TO_ROW *row; //current row 01987 TO_ROW_IT row_it = block->get_rows (); 01988 01989 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01990 row = row_it.data (); 01991 row->min_space = (INT32) ((pitch + nonspace) / 2); 01992 row->max_nonspace = row->min_space; 01993 row->space_threshold = row->min_space; 01994 plot_word_decisions (to_win, (INT16) pitch, row); 01995 } 01996 }
Print line stats.
Count up how many rows have what decision and print the results.
Definition at line 650 of file topitch.cpp.
References count_block_votes(), NULL, and tprintf().
Referenced by compute_fixed_pitch(), and try_rows_fixed().
00653 { 00654 INT32 def_fixed = 0; //counters 00655 INT32 def_prop = 0; 00656 INT32 maybe_fixed = 0; 00657 INT32 maybe_prop = 0; 00658 INT32 dunno = 0; 00659 INT32 corr_fixed = 0; 00660 INT32 corr_prop = 0; 00661 00662 count_block_votes(block, 00663 def_fixed, 00664 def_prop, 00665 maybe_fixed, 00666 maybe_prop, 00667 corr_fixed, 00668 corr_prop, 00669 dunno); 00670 tprintf ("Block %d has (%d,%d,%d)", 00671 block_index, def_fixed, maybe_fixed, corr_fixed); 00672 if ((textord_blocksall_prop 00673 || block->block->text_region () != NULL 00674 && block->block->text_region ()->is_prop ()) && (def_fixed 00675 || maybe_fixed 00676 || corr_fixed)) 00677 tprintf (" (Wrongly)"); 00678 tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop); 00679 if ((textord_blocksall_fixed 00680 || block->block->text_region () != NULL 00681 && !block->block->text_region ()->is_prop ()) && (def_prop 00682 || maybe_prop 00683 || corr_prop)) 00684 tprintf (" (Wrongly)"); 00685 tprintf (" prop, %d dunno\n", dunno); 00686 }
void print_pitch_sd | ( | TO_ROW * | row, | |
STATS * | projection, | |||
INT16 | projection_left, | |||
INT16 | projection_right, | |||
float | space_size, | |||
float | initial_pitch | |||
) |
Print fp cells.
Use a dp algorithm to fit the character cells and return the sd of the cell size over the row.
Definition at line 1665 of file topitch.cpp.
References TO_ROW::all_caps, blob_count, TO_ROW::blob_list(), box_next(), check_pitch_sync2(), CORAL, TO_ROW::fixed_pitch, BOX::left(), NO_WINDOW, plot_fp_cells2(), projection, BOX::right(), seg_list, to_win, tprintf(), and TO_ROW::xheight.
Referenced by tune_row_pitch(), and tune_row_pitch2().
01672 { 01673 const char *res2; //pitch result 01674 INT16 occupation; //used cells 01675 float sp_sd; //space sd 01676 //blobs 01677 BLOBNBOX_IT blob_it = row->blob_list (); 01678 BLOBNBOX_IT start_it; //start of word 01679 BLOBNBOX_IT row_start; //start of row 01680 INT16 blob_count; //no of blobs 01681 INT16 total_blob_count; //total blobs in line 01682 BOX blob_box; //bounding box 01683 BOX prev_box; //of super blob 01684 INT32 prev_right; //of word sync 01685 int scale_factor; //on scores for big words 01686 INT32 sp_count; //spaces 01687 FPSEGPT_LIST seg_list; //char cells 01688 FPSEGPT_IT seg_it; //iterator 01689 double sqsum; //sum of squares 01690 double spsum; //of spaces 01691 double sp_var; //space error 01692 double word_sync; //result for word 01693 double total_count; //total cuts 01694 01695 if (blob_it.empty ()) 01696 return; 01697 row_start = blob_it; 01698 total_blob_count = 0; 01699 01700 total_count = 0; 01701 sqsum = 0; 01702 sp_count = 0; 01703 spsum = 0; 01704 prev_right = -1; 01705 blob_it = row_start; 01706 start_it = blob_it; 01707 blob_count = 0; 01708 blob_box = box_next (&blob_it);//first blob 01709 blob_it.mark_cycle_pt (); 01710 do { 01711 for (; blob_count > 0; blob_count--) 01712 box_next(&start_it); 01713 do { 01714 prev_box = blob_box; 01715 blob_count++; 01716 blob_box = box_next (&blob_it); 01717 } 01718 while (!blob_it.cycled_list () 01719 && blob_box.left () - prev_box.right () < space_size); 01720 word_sync = 01721 check_pitch_sync2 (&start_it, blob_count, (INT16) initial_pitch, 2, 01722 projection, projection_left, projection_right, 01723 row->xheight * textord_projection_scale, 01724 occupation, &seg_list, 0, 0); 01725 total_blob_count += blob_count; 01726 seg_it.set_to_list (&seg_list); 01727 if (prev_right >= 0) { 01728 sp_var = seg_it.data ()->position () - prev_right; 01729 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; 01730 sp_var *= sp_var; 01731 spsum += sp_var; 01732 sp_count++; 01733 } 01734 seg_it.move_to_last (); 01735 prev_right = seg_it.data ()->position (); 01736 if (textord_pitch_scalebigwords) { 01737 scale_factor = (seg_list.length () - 2) / 2; 01738 if (scale_factor < 1) 01739 scale_factor = 1; 01740 } 01741 else 01742 scale_factor = 1; 01743 sqsum += word_sync * scale_factor; 01744 total_count += (seg_list.length () - 1) * scale_factor; 01745 seg_list.clear (); 01746 } 01747 while (!blob_it.cycled_list ()); 01748 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; 01749 word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; 01750 tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", 01751 word_sync, word_sync / initial_pitch, sp_sd, 01752 word_sync < textord_words_pitchsd_threshold * initial_pitch 01753 ? 'F' : 'P'); 01754 01755 start_it = row_start; 01756 blob_it = row_start; 01757 word_sync = 01758 check_pitch_sync2 (&blob_it, total_blob_count, (INT16) initial_pitch, 2, 01759 projection, projection_left, projection_right, 01760 row->xheight * textord_projection_scale, occupation, 01761 &seg_list, 0, 0); 01762 if (occupation > 1) 01763 word_sync /= occupation; 01764 word_sync = sqrt (word_sync); 01765 01766 #ifndef GRAPHICS_DISABLED 01767 if (textord_show_row_cuts && to_win != NO_WINDOW) 01768 plot_fp_cells2(to_win, CORAL, row, &seg_list); 01769 #endif 01770 seg_list.clear (); 01771 if (word_sync < textord_words_pitchsd_threshold * initial_pitch) { 01772 if (word_sync < textord_words_def_fixed * initial_pitch 01773 && !row->all_caps) 01774 res2 = "DF"; 01775 else 01776 res2 = "MF"; 01777 } 01778 else 01779 res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP"; 01780 tprintf 01781 ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n", 01782 word_sync, word_sync / initial_pitch, 01783 word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', 01784 occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps); 01785 }
Find line stats.
Decide whether each row is fixed pitch individually.
Definition at line 740 of file topitch.cpp.
References STATS::add(), TO_ROW::blob_list(), BLOCK_STATS_CLUSTERS, STATS::cluster(), FALSE, TO_ROW::fp_nonsp, TO_ROW::fp_space, STATS::get_total(), BOX::left(), TO_ROW::pr_nonsp, TO_ROW::pr_space, BOX::right(), STATS::smooth(), sort_floats2(), tprintf(), TRUE, and TO_ROW::xheight.
Referenced by compute_rows_pitch().
00744 { 00745 BLOBNBOX *blob; //current blob 00746 int gap_index; //current gap 00747 INT32 prev_x; //end of prev blob 00748 INT32 cluster_count; //no of clusters 00749 INT32 prev_count; //of clusters 00750 INT32 smooth_factor; //for smoothing stats 00751 BOX blob_box; //bounding box 00752 float lower, upper; //cluster thresholds 00753 //gap sizes 00754 float gaps[BLOCK_STATS_CLUSTERS]; 00755 //blobs 00756 BLOBNBOX_IT blob_it = row->blob_list (); 00757 STATS gap_stats (0, maxwidth); 00758 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; 00759 //clusters 00760 00761 smooth_factor = 00762 (INT32) (row->xheight * textord_wordstats_smooth_factor + 1.5); 00763 if (!blob_it.empty ()) { 00764 prev_x = blob_it.data ()->bounding_box ().right (); 00765 blob_it.forward (); 00766 while (!blob_it.at_first ()) { 00767 blob = blob_it.data (); 00768 if (!blob->joined_to_prev ()) { 00769 blob_box = blob->bounding_box (); 00770 if (blob_box.left () - prev_x < maxwidth) 00771 gap_stats.add (blob_box.left () - prev_x, 1); 00772 prev_x = blob_box.right (); 00773 } 00774 blob_it.forward (); 00775 } 00776 } 00777 if (gap_stats.get_total () == 0) { 00778 return FALSE; 00779 } 00780 cluster_count = 0; 00781 lower = row->xheight * words_initial_lower; 00782 upper = row->xheight * words_initial_upper; 00783 gap_stats.smooth (smooth_factor); 00784 do { 00785 prev_count = cluster_count; 00786 cluster_count = gap_stats.cluster (lower, upper, 00787 textord_spacesize_ratioprop, 00788 BLOCK_STATS_CLUSTERS, cluster_stats); 00789 } 00790 while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); 00791 if (cluster_count < 1) { 00792 return FALSE; 00793 } 00794 for (gap_index = 0; gap_index < cluster_count; gap_index++) 00795 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); 00796 //get medians 00797 if (testing_on) { 00798 tprintf ("cluster_count=%d:", cluster_count); 00799 for (gap_index = 0; gap_index < cluster_count; gap_index++) 00800 tprintf (" %g(%d)", gaps[gap_index], 00801 cluster_stats[gap_index + 1].get_total ()); 00802 tprintf ("\n"); 00803 } 00804 qsort (gaps, cluster_count, sizeof (float), sort_floats2); 00805 00806 //Try to find proportional non-space and space for row. 00807 lower = row->xheight * words_default_prop_nonspace; 00808 upper = row->xheight * textord_words_min_minspace; 00809 for (gap_index = 0; gap_index < cluster_count 00810 && gaps[gap_index] < lower; gap_index++); 00811 if (gap_index == 0) { 00812 if (testing_on) 00813 tprintf ("No clusters below nonspace threshold!!\n"); 00814 if (cluster_count > 1) { 00815 row->pr_nonsp = gaps[0]; 00816 row->pr_space = gaps[1]; 00817 } 00818 else { 00819 row->pr_nonsp = lower; 00820 row->pr_space = gaps[0]; 00821 } 00822 } 00823 else { 00824 row->pr_nonsp = gaps[gap_index - 1]; 00825 while (gap_index < cluster_count && gaps[gap_index] < upper) 00826 gap_index++; 00827 if (gap_index == cluster_count) { 00828 if (testing_on) 00829 tprintf ("No clusters above nonspace threshold!!\n"); 00830 row->pr_space = lower * textord_spacesize_ratioprop; 00831 } 00832 else 00833 row->pr_space = gaps[gap_index]; 00834 } 00835 00836 //Now try to find the fixed pitch space and non-space. 00837 upper = row->xheight * words_default_fixed_space; 00838 for (gap_index = 0; gap_index < cluster_count 00839 && gaps[gap_index] < upper; gap_index++); 00840 if (gap_index == 0) { 00841 if (testing_on) 00842 tprintf ("No clusters below space threshold!!\n"); 00843 row->fp_nonsp = upper; 00844 row->fp_space = gaps[0]; 00845 } 00846 else { 00847 row->fp_nonsp = gaps[gap_index - 1]; 00848 if (gap_index == cluster_count) { 00849 if (testing_on) 00850 tprintf ("No clusters above space threshold!!\n"); 00851 row->fp_space = row->xheight; 00852 } 00853 else 00854 row->fp_space = gaps[gap_index]; 00855 } 00856 if (testing_on) { 00857 tprintf 00858 ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n", 00859 row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space); 00860 } 00861 return TRUE; //computed some stats 00862 }
int sort_floats2 | ( | const void * | arg1, | |
const void * | arg2 | |||
) |
qsort function to sort 2 floats.
Definition at line 1791 of file topitch.cpp.
Referenced by row_pitch_stats().
01793 { 01794 float diff; //difference 01795 01796 diff = *((float *) arg1) - *((float *) arg2); 01797 if (diff > 0) 01798 return 1; 01799 else if (diff < 0) 01800 return -1; 01801 else 01802 return 0; 01803 }
Find line stats.
Try to call the entire block fixed.
Definition at line 569 of file topitch.cpp.
References FALSE.
Referenced by compute_fixed_pitch().
00572 { 00573 return FALSE; 00574 }
Determine pitch.
Attempt to call the entire document fixed pitch.
Definition at line 424 of file topitch.cpp.
References STATS::add(), TO_ROW::baseline, TO_ROW::char_cells, CORAL, FALSE, TO_ROW::fixed_pitch, STATS::get_total(), GOLDENROD, STATS::ile(), TO_ROW::intercept(), MAX_ALLOWED_PITCH, MAX_INT16, NO_WINDOW, NULL, STATS::pile_count(), pitch, STATS::plot(), plot_row_cells(), projection, TO_ROW::projection, TO_ROW::projection_left, TO_ROW::projection_right, STATS::set_range(), to_win, tprintf(), tune_row_pitch(), and QSPLINE::y().
Referenced by compute_fixed_pitch().
00428 { 00429 INT16 master_x; //uniform shifts 00430 INT16 pitch; //median pitch. 00431 int x; //profile coord 00432 int prop_blocks; //correct counts 00433 int fixed_blocks; 00434 int total_row_count; //total in page 00435 //iterator 00436 TO_BLOCK_IT block_it = port_blocks; 00437 TO_BLOCK *block; //current block; 00438 TO_ROW_IT row_it; //row iterator 00439 TO_ROW *row; //current row 00440 INT16 projection_left; //edges 00441 INT16 projection_right; 00442 INT16 row_left; //edges of row 00443 INT16 row_right; 00444 ICOORDELT_LIST *master_cells; //cells for page 00445 float master_y; //uniform shifts 00446 float shift_factor; //page skew correction 00447 float row_shift; //shift for row 00448 float final_pitch; //output pitch 00449 float row_y; //baseline 00450 STATS projection; //entire page 00451 STATS pitches (0, MAX_ALLOWED_PITCH); 00452 //for median 00453 float sp_sd; //space sd 00454 INT16 mid_cuts; //no of cheap cuts 00455 float pitch_sd; //sync rating 00456 00457 if (block_it.empty () 00458 // || block_it.data()==block_it.data_relative(1) 00459 || !textord_blockndoc_fixed) 00460 return FALSE; 00461 shift_factor = gradient / (gradient * gradient + 1); 00462 row_it.set_to_list (block_it.data ()->get_rows ()); 00463 master_x = row_it.data ()->projection_left; 00464 master_y = row_it.data ()->baseline.y (master_x); 00465 projection_left = MAX_INT16; 00466 projection_right = -MAX_INT16; 00467 prop_blocks = 0; 00468 fixed_blocks = 0; 00469 total_row_count = 0; 00470 00471 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00472 block_it.forward ()) { 00473 block = block_it.data (); 00474 if (block->block->text_region () != NULL) { 00475 if (block->block->text_region ()->is_prop ()) 00476 prop_blocks++; 00477 else 00478 fixed_blocks++; 00479 } 00480 row_it.set_to_list (block->get_rows ()); 00481 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00482 row = row_it.data (); 00483 total_row_count++; 00484 if (row->fixed_pitch > 0) 00485 pitches.add ((INT32) (row->fixed_pitch), 1); 00486 //find median 00487 row_y = row->baseline.y (master_x); 00488 row_left = 00489 (INT16) (row->projection_left - 00490 shift_factor * (master_y - row_y)); 00491 row_right = 00492 (INT16) (row->projection_right - 00493 shift_factor * (master_y - row_y)); 00494 if (row_left < projection_left) 00495 projection_left = row_left; 00496 if (row_right > projection_right) 00497 projection_right = row_right; 00498 } 00499 } 00500 if (pitches.get_total () == 0) 00501 return FALSE; 00502 projection.set_range (projection_left, projection_right); 00503 00504 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00505 block_it.forward ()) { 00506 block = block_it.data (); 00507 row_it.set_to_list (block->get_rows ()); 00508 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00509 row = row_it.data (); 00510 row_y = row->baseline.y (master_x); 00511 row_left = 00512 (INT16) (row->projection_left - 00513 shift_factor * (master_y - row_y)); 00514 for (x = row->projection_left; x < row->projection_right; 00515 x++, row_left++) { 00516 projection.add (row_left, row->projection.pile_count (x)); 00517 } 00518 } 00519 } 00520 00521 row_it.set_to_list (block_it.data ()->get_rows ()); 00522 row = row_it.data (); 00523 #ifndef GRAPHICS_DISABLED 00524 if (textord_show_page_cuts && to_win != NO_WINDOW) 00525 projection.plot (to_win, projection_left, 00526 row->intercept (), 1.0f, -1.0f, CORAL); 00527 #endif 00528 final_pitch = pitches.ile (0.5); 00529 pitch = (INT16) final_pitch; 00530 pitch_sd = 00531 tune_row_pitch (row, &projection, projection_left, projection_right, 00532 pitch * 0.75, final_pitch, sp_sd, mid_cuts, 00533 &row->char_cells, FALSE); 00534 00535 if (textord_debug_pitch_metric) 00536 tprintf 00537 ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n", 00538 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, 00539 pitch_sd / total_row_count, pitch_sd / pitch, 00540 pitch_sd / total_row_count / pitch); 00541 00542 #ifndef GRAPHICS_DISABLED 00543 if (textord_show_page_cuts && to_win != NO_WINDOW) { 00544 master_cells = &row->char_cells; 00545 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00546 block_it.forward ()) { 00547 block = block_it.data (); 00548 row_it.set_to_list (block->get_rows ()); 00549 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); 00550 row_it.forward ()) { 00551 row = row_it.data (); 00552 row_y = row->baseline.y (master_x); 00553 row_shift = shift_factor * (master_y - row_y); 00554 plot_row_cells(to_win, GOLDENROD, row, row_shift, master_cells); 00555 } 00556 } 00557 } 00558 #endif 00559 row->char_cells.clear (); 00560 return FALSE; 00561 }
Find line stats.
Decide whether each row is fixed pitch individually.
Definition at line 582 of file topitch.cpp.
References ASSERT_HOST, count_block_votes(), FALSE, TO_ROW::fixed_pitch, fixed_pitch_row(), TO_ROW::kern_size, PITCH_DEF_FIXED, PITCH_DEF_PROP, PITCH_DUNNO, PITCH_MAYBE_FIXED, PITCH_MAYBE_PROP, TO_ROW::pr_nonsp, TO_ROW::pr_space, print_block_counts(), TO_ROW::space_size, tprintf(), and TO_ROW::xheight.
Referenced by compute_fixed_pitch().
00586 { 00587 INT32 maxwidth; //of spaces 00588 TO_ROW *row; //current row 00589 INT32 row_index; //row number. 00590 INT32 def_fixed = 0; //counters 00591 INT32 def_prop = 0; 00592 INT32 maybe_fixed = 0; 00593 INT32 maybe_prop = 0; 00594 INT32 dunno = 0; 00595 INT32 corr_fixed = 0; 00596 INT32 corr_prop = 0; 00597 float lower, upper; //cluster thresholds 00598 TO_ROW_IT row_it = block->get_rows (); 00599 00600 row_index = 1; 00601 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00602 row = row_it.data (); 00603 ASSERT_HOST (row->xheight > 0); 00604 maxwidth = (INT32) ceil (row->xheight * textord_words_maxspace); 00605 if (row->fixed_pitch > 0 && fixed_pitch_row (row, block_index)) { 00606 if (row->fixed_pitch == 0) { 00607 lower = row->pr_nonsp; 00608 upper = row->pr_space; 00609 row->space_size = upper; 00610 row->kern_size = lower; 00611 } 00612 } 00613 row_index++; 00614 } 00615 count_block_votes(block, 00616 def_fixed, 00617 def_prop, 00618 maybe_fixed, 00619 maybe_prop, 00620 corr_fixed, 00621 corr_prop, 00622 dunno); 00623 if (testing_on 00624 && (textord_debug_pitch_test 00625 || textord_blocksall_prop || textord_blocksall_fixed)) { 00626 tprintf ("Initially:"); 00627 print_block_counts(block, block_index); 00628 } 00629 if (def_fixed > def_prop * textord_words_veto_power) 00630 block->pitch_decision = PITCH_DEF_FIXED; 00631 else if (def_prop > def_fixed * textord_words_veto_power) 00632 block->pitch_decision = PITCH_DEF_PROP; 00633 else if (def_fixed > 0 || def_prop > 0) 00634 block->pitch_decision = PITCH_DUNNO; 00635 else if (maybe_fixed > maybe_prop * textord_words_veto_power) 00636 block->pitch_decision = PITCH_MAYBE_FIXED; 00637 else if (maybe_prop > maybe_fixed * textord_words_veto_power) 00638 block->pitch_decision = PITCH_MAYBE_PROP; 00639 else 00640 block->pitch_decision = PITCH_DUNNO; 00641 return FALSE; 00642 }
float tune_row_pitch | ( | TO_ROW * | row, | |
STATS * | projection, | |||
INT16 | projection_left, | |||
INT16 | projection_right, | |||
float | space_size, | |||
float & | initial_pitch, | |||
float & | best_sp_sd, | |||
INT16 & | best_mid_cuts, | |||
ICOORDELT_LIST * | best_cells, | |||
BOOL8 | testing_on | |||
) |
Find fp cells.
Use a dp algorithm to fit the character cells and return the sd of the cell size over the row.
Definition at line 1185 of file topitch.cpp.
References compute_pitch_sd(), print_pitch_sd(), projection, tprintf(), and tune_row_pitch2().
Referenced by fix_row_pitch(), fixed_pitch_row(), and try_doc_fixed().
01196 { 01197 int pitch_delta; //offset pitch 01198 INT16 mid_cuts; //cheap cuts 01199 float pitch_sd; //current sd 01200 float best_sd; //best result 01201 float best_pitch; //pitch for best result 01202 float initial_sd; //starting error 01203 float sp_sd; //space sd 01204 ICOORDELT_LIST test_cells; //row cells 01205 ICOORDELT_IT best_it; //start of best list 01206 01207 if (textord_fast_pitch_test) 01208 return tune_row_pitch2 (row, projection, projection_left, 01209 projection_right, space_size, initial_pitch, 01210 best_sp_sd, 01211 //space sd 01212 best_mid_cuts, best_cells, testing_on); 01213 if (textord_disable_pitch_test) { 01214 best_sp_sd = initial_pitch; 01215 return initial_pitch; 01216 } 01217 initial_sd = 01218 compute_pitch_sd(row, 01219 projection, 01220 projection_left, 01221 projection_right, 01222 space_size, 01223 initial_pitch, 01224 best_sp_sd, 01225 best_mid_cuts, 01226 best_cells, 01227 testing_on); 01228 best_sd = initial_sd; 01229 best_pitch = initial_pitch; 01230 if (testing_on) 01231 tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd); 01232 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { 01233 pitch_sd = 01234 compute_pitch_sd (row, projection, projection_left, projection_right, 01235 space_size, initial_pitch + pitch_delta, sp_sd, 01236 mid_cuts, &test_cells, testing_on); 01237 if (testing_on) 01238 tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, 01239 pitch_sd); 01240 if (pitch_sd < best_sd) { 01241 best_sd = pitch_sd; 01242 best_mid_cuts = mid_cuts; 01243 best_sp_sd = sp_sd; 01244 best_pitch = initial_pitch + pitch_delta; 01245 best_cells->clear (); 01246 best_it.set_to_list (best_cells); 01247 best_it.add_list_after (&test_cells); 01248 } 01249 else 01250 test_cells.clear (); 01251 if (pitch_sd > initial_sd) 01252 break; //getting worse 01253 } 01254 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { 01255 pitch_sd = 01256 compute_pitch_sd (row, projection, projection_left, projection_right, 01257 space_size, initial_pitch - pitch_delta, sp_sd, 01258 mid_cuts, &test_cells, testing_on); 01259 if (testing_on) 01260 tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, 01261 pitch_sd); 01262 if (pitch_sd < best_sd) { 01263 best_sd = pitch_sd; 01264 best_mid_cuts = mid_cuts; 01265 best_sp_sd = sp_sd; 01266 best_pitch = initial_pitch - pitch_delta; 01267 best_cells->clear (); 01268 best_it.set_to_list (best_cells); 01269 best_it.add_list_after (&test_cells); 01270 } 01271 else 01272 test_cells.clear (); 01273 if (pitch_sd > initial_sd) 01274 break; 01275 } 01276 initial_pitch = best_pitch; 01277 01278 if (textord_debug_pitch_metric) 01279 print_pitch_sd(row, 01280 projection, 01281 projection_left, 01282 projection_right, 01283 space_size, 01284 best_pitch); 01285 01286 return best_sd; 01287 }
float tune_row_pitch2 | ( | TO_ROW * | row, | |
STATS * | projection, | |||
INT16 | projection_left, | |||
INT16 | projection_right, | |||
float | space_size, | |||
float & | initial_pitch, | |||
float & | best_sp_sd, | |||
INT16 & | best_mid_cuts, | |||
ICOORDELT_LIST * | best_cells, | |||
BOOL8 | testing_on | |||
) |
Find fp cells.
Use a dp algorithm to fit the character cells and return the sd of the cell size over the row.
Definition at line 1296 of file topitch.cpp.
References compute_pitch_sd(), NULL, STATS::pile_count(), print_pitch_sd(), projection, and tprintf().
Referenced by tune_row_pitch().
01307 { 01308 int pitch_delta; //offset pitch 01309 INT16 pixel; //pixel coord 01310 INT16 best_pixel; //pixel coord 01311 INT16 best_delta; //best pitch 01312 INT16 best_pitch; //best pitch 01313 INT16 start; //of good range 01314 INT16 end; //of good range 01315 INT32 best_count; //lowest sum 01316 float best_sd; //best result 01317 STATS *sum_proj; //summed projection 01318 01319 best_sp_sd = initial_pitch; 01320 01321 if (textord_disable_pitch_test) { 01322 return initial_pitch; 01323 } 01324 sum_proj = new STATS[textord_pitch_range * 2 + 1]; 01325 if (sum_proj == NULL) 01326 return initial_pitch; 01327 best_pitch = (INT32) initial_pitch; 01328 01329 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; 01330 pitch_delta++) 01331 sum_proj[textord_pitch_range + pitch_delta].set_range (0, 01332 best_pitch + 01333 pitch_delta + 1); 01334 for (pixel = projection_left; pixel <= projection_right; pixel++) { 01335 for (pitch_delta = -textord_pitch_range; 01336 pitch_delta <= textord_pitch_range; pitch_delta++) 01337 sum_proj[textord_pitch_range + 01338 pitch_delta].add ((pixel - projection_left) % (best_pitch + 01339 pitch_delta), 01340 projection->pile_count (pixel)); 01341 } 01342 best_count = sum_proj[textord_pitch_range].pile_count (0); 01343 best_delta = 0; 01344 best_pixel = 0; 01345 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; 01346 pitch_delta++) { 01347 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) { 01348 if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel) 01349 < best_count) { 01350 best_count = 01351 sum_proj[textord_pitch_range + 01352 pitch_delta].pile_count (pixel); 01353 best_delta = pitch_delta; 01354 best_pixel = pixel; 01355 } 01356 } 01357 } 01358 if (testing_on) 01359 tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", 01360 initial_pitch, best_delta, best_count); 01361 best_pitch += best_delta; 01362 initial_pitch = best_pitch; 01363 best_count++; 01364 best_count += best_count; 01365 for (start = best_pixel - 2; start > best_pixel - best_pitch 01366 && sum_proj[textord_pitch_range + 01367 best_delta].pile_count (start % best_pitch) <= best_count; 01368 start--); 01369 for (end = best_pixel + 2; 01370 end < best_pixel + best_pitch 01371 && sum_proj[textord_pitch_range + 01372 best_delta].pile_count (end % best_pitch) <= best_count; 01373 end++); 01374 01375 best_sd = 01376 compute_pitch_sd(row, 01377 projection, 01378 projection_left, 01379 projection_right, 01380 space_size, 01381 initial_pitch, 01382 best_sp_sd, 01383 best_mid_cuts, 01384 best_cells, 01385 testing_on, 01386 start, 01387 end); 01388 if (testing_on) 01389 tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, 01390 best_sd); 01391 01392 if (textord_debug_pitch_metric) 01393 print_pitch_sd(row, 01394 projection, 01395 projection_left, 01396 projection_right, 01397 space_size, 01398 initial_pitch); 01399 01400 delete[]sum_proj; 01401 01402 return best_sd; 01403 }